diff --git a/.config/nextest.toml b/.config/nextest.toml
new file mode 100644
index 0000000000..db7e0b6a84
--- /dev/null
+++ b/.config/nextest.toml
@@ -0,0 +1,13 @@
+# Nextest configuration for OpenVM project
+
+# Define test groups with different weights
+[[profile.default.overrides]]
+# Match all tests with "persistent" in their name
+filter = 'test(~persistent)'
+# Give these tests 5x the default weight because they use more memory
+threads-required = 16
+
+# custom profile for heavy tests
+[profile.heavy]
+# Run fewer tests in parallel for heavy workloads
+test-threads = 2
diff --git a/.github/workflows/benchmark-call.yml b/.github/workflows/benchmark-call.yml
index 737e1c81ed..71aa9bc54a 100644
--- a/.github/workflows/benchmark-call.yml
+++ b/.github/workflows/benchmark-call.yml
@@ -49,7 +49,7 @@ on:
       features:
         type: string
         required: false
-        description: Host features, comma separated (aggregation,profiling)
+        description: Host features, comma separated (aggregation,perf-metrics)
   workflow_call:
     inputs:
       benchmark_name:
@@ -102,12 +102,12 @@ on:
       features:
         type: string
         required: false
-        description: Host features, comma separated (aggregation,profiling)
+        description: Host features, comma separated (aggregation,perf-metrics)
 
 env:
   S3_METRICS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/metrics
   S3_FLAMEGRAPHS_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/github/flamegraphs
-  FEATURE_FLAGS: "bench-metrics,parallel,nightly-features"
+  FEATURE_FLAGS: "metrics,parallel,nightly-features"
   INPUT_ARGS: ""
   CARGO_NET_GIT_FETCH_WITH_CLI: "true"
 
@@ -170,7 +170,7 @@ jobs:
             ROOT_ARG="--root_log_blowup ${{ inputs.root_log_blowup }}"
             INTERNAL_ARG="--internal_log_blowup ${{ inputs.internal_log_blowup }}"
             bash ./extensions/native/recursion/trusted_setup_s3.sh
-            PARAMS_DIR=$(pwd)/params
+            PARAMS_DIR=$HOME/.openvm/params
             PARAMS_ARG="--kzg-params-dir $PARAMS_DIR"
             echo "INPUT_ARGS=${ROOT_ARG} ${INTERNAL_ARG} ${PARAMS_ARG} ${INPUT_ARGS}" >> $GITHUB_ENV
           fi
@@ -230,11 +230,11 @@ jobs:
           s5cmd cp $METRIC_PATH ${{ env.S3_METRICS_PATH }}/${METRIC_NAME}-${current_sha}.json
 
       - name: Install inferno-flamegraph
-        if: ${{ contains(env.FEATURE_FLAGS, 'profiling') }}
+        if: ${{ contains(env.FEATURE_FLAGS, 'perf-metrics') }}
         run: cargo install inferno
 
       - name: Generate flamegraphs
-        if: ${{ contains(env.FEATURE_FLAGS, 'profiling') }}
+        if: ${{ contains(env.FEATURE_FLAGS, 'perf-metrics') }}
         run: |
           if [[ -f $METRIC_PATH ]]; then
             GUEST_SYMBOLS_PATH="${METRIC_PATH%.json}.syms"
@@ -250,9 +250,15 @@ jobs:
           fi
 
       ##########################################################################
-      # Update s3 for latest main metrics upon a push event                    #
+      # Update s3 for latest branch metrics upon a push event                  #
       ##########################################################################
-      - name: Update latest main result in s3
-        if: github.event_name == 'push' && github.ref == 'refs/heads/main'
+      - name: Update latest branch result in s3
+        if: github.event_name == 'push'
         run: |
-          s5cmd cp $METRIC_PATH "${{ env.S3_METRICS_PATH }}/main-${METRIC_NAME}.json"
+          if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
+            # for backwards compatibility
+            REF_HASH="main"
+          else
+            REF_HASH=$(echo "${{ github.ref }}" | sha256sum | cut -d' ' -f1)
+          fi
+          s5cmd cp $METRIC_PATH "${{ env.S3_METRICS_PATH }}/${REF_HASH}-${METRIC_NAME}.json"
diff --git a/.github/workflows/benchmarks-execute.yml b/.github/workflows/benchmarks-execute.yml
index 741ccdb0f1..11f94fd411 100644
--- a/.github/workflows/benchmarks-execute.yml
+++ b/.github/workflows/benchmarks-execute.yml
@@ -1,8 +1,9 @@
-name: "benchmarks-execute"
+name: "Execution benchmarks"
 
 on:
   push:
-    branches: ["main"]
+    # TODO(ayush): remove after feat/new-execution is merged
+    branches: ["main", "feat/new-execution"]
   pull_request:
     types: [opened, synchronize, reopened, labeled]
     branches: ["**"]
@@ -18,95 +19,101 @@ on:
       - ".github/workflows/benchmarks-execute.yml"
   workflow_dispatch:
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
+  cancel-in-progress: true
+
 env:
   CARGO_TERM_COLOR: always
+  S3_FIXTURES_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/fixtures
+  JEMALLOC_SYS_WITH_MALLOC_CONF: "retain:true,background_thread:true,metadata_thp:always,thp:always,dirty_decay_ms:10000,muzzy_decay_ms:10000,abort_conf:true"
 
 jobs:
-  execute-benchmarks:
+  codspeed-walltime-benchmarks:
+    name: Run codspeed walltime benchmarks
     runs-on:
       - runs-on=${{ github.run_id }}
-      - runner=8cpu-linux-x64
+      - family=m5a.xlarge # 2.5Ghz clock speed
+      - image=ubuntu24-full-x64
+      - extras=s3-cache
+
+    env:
+      CODSPEED_RUNNER_MODE: walltime
+
     steps:
+      - uses: runs-on/action@v1
       - uses: actions/checkout@v4
-
-      - name: Set up Rust
-        uses: actions-rs/toolchain@v1
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
         with:
-          profile: minimal
-          toolchain: stable
-          override: true
+          cache-on-failure: true
 
-      - name: Run execution benchmarks
-        working-directory: benchmarks/execute
-        run: cargo run | tee benchmark_output.log
+      - name: Install architecture specific tools
+        run: |
+          source ci/scripts/utils.sh
+          install_s5cmd
 
-      - name: Parse benchmark results
+      - name: Pull fixtures from S3
+        run: |
+          mkdir -p benchmarks/fixtures
+          s5cmd cp "${{ env.S3_FIXTURES_PATH }}/*" benchmarks/fixtures/ || echo "No fixtures found in S3"
+
+      - name: Install cargo-binstall
+        uses: cargo-bins/cargo-binstall@main
+      - name: Install codspeed
+        run: cargo binstall --no-confirm --force cargo-codspeed
+
+      - name: Build benchmarks
         working-directory: benchmarks/execute
+        run: cargo codspeed build --profile maxperf
+      - name: Run benchmarks
+        uses: CodSpeedHQ/action@v3
+        with:
+          working-directory: benchmarks/execute
+          run: cargo codspeed run
+          token: ${{ secrets.CODSPEED_TOKEN }}
+
+  codspeed-instrumentation-benchmarks:
+    name: Run codspeed instrumentation benchmarks
+    runs-on:
+      - runs-on=${{ github.run_id }}
+      - family=m5a.xlarge
+      - image=ubuntu24-full-x64
+      - extras=s3-cache
+    if: github.event_name != 'pull_request'
+
+    env:
+      CODSPEED_RUNNER_MODE: instrumentation
+
+    steps:
+      - uses: runs-on/action@v1
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+        with:
+          cache-on-failure: true
+
+      - name: Install architecture specific tools
         run: |
-          # Determine if running in GitHub Actions environment
-          if [ -n "$GITHUB_STEP_SUMMARY" ]; then
-            SUMMARY_FILE="$GITHUB_STEP_SUMMARY"
-            echo "### Benchmark Results Summary" >> "$SUMMARY_FILE"
-          else
-            SUMMARY_FILE="benchmark_summary.md"
-            echo "### Benchmark Results Summary" > "$SUMMARY_FILE"
-            echo "Saving summary to $SUMMARY_FILE"
-          fi
-
-          # Set up summary table header
-          echo "| Program | Total Time (ms) |" >> "$SUMMARY_FILE"
-          echo "| ------- | --------------- |" >> "$SUMMARY_FILE"
-
-          # Variables to track current program and total time
-          current_program=""
-          total_time=0
-
-          # Process the output file line by line
-          while IFS= read -r line; do
-            # Check if line contains "Running program" message
-            if [[ $line =~ ｉ\ \[info\]:\ Running\ program:\ ([a-zA-Z0-9_-]+) ]]; then
-              # If we were processing a program, output its results
-              if [[ -n "$current_program" ]]; then
-                echo "| $current_program | $total_time |" >> "$SUMMARY_FILE"
-              fi
-
-              # Start tracking new program
-              current_program="${BASH_REMATCH[1]}"
-              total_time=0
-            fi
-
-            # Check for program completion to catch programs that might have no execution segments
-            if [[ $line =~ ｉ\ \[info\]:\ Completed\ program:\ ([a-zA-Z0-9_-]+) ]]; then
-              completed_program="${BASH_REMATCH[1]}"
-              # If no segments were found for this program, ensure it's still in the output
-              if [[ "$current_program" == "$completed_program" && $total_time == 0 ]]; then
-                echo "| $current_program | 0 |" >> "$SUMMARY_FILE"
-                current_program=""
-              fi
-            fi
-
-            # Check if line contains execution time (looking for the format with ms or s)
-            if [[ $line =~ execute_segment\ \[\ ([0-9.]+)(ms|s)\ \|\ [0-9.]+%\ \]\ segment ]]; then
-              segment_time="${BASH_REMATCH[1]}"
-              unit="${BASH_REMATCH[2]}"
-
-              # Convert to milliseconds if in seconds
-              if [[ "$unit" == "s" ]]; then
-                segment_time=$(echo "scale=6; $segment_time * 1000" | bc)
-              fi
-
-              # Add segment time to total
-              total_time=$(echo "scale=6; $total_time + $segment_time" | bc)
-            fi
-          done < benchmark_output.log
-
-          # Output the last program result if there was one
-          if [[ -n "$current_program" ]]; then
-            echo "| $current_program | $total_time |" >> "$SUMMARY_FILE"
-          fi
-
-          # If not in GitHub Actions, print the summary to the terminal
-          if [ -z "$GITHUB_STEP_SUMMARY" ]; then
-            echo -e "\nBenchmark Summary:"
-            cat "$SUMMARY_FILE"
-          fi
+          source ci/scripts/utils.sh
+          install_s5cmd
+
+      - name: Pull fixtures from S3
+        run: |
+          mkdir -p benchmarks/fixtures
+          s5cmd cp "${{ env.S3_FIXTURES_PATH }}/*" benchmarks/fixtures/ || echo "No fixtures found in S3"
+
+      - name: Install cargo-binstall
+        uses: cargo-bins/cargo-binstall@main
+      - name: Install codspeed
+        run: cargo binstall --no-confirm --force cargo-codspeed
+
+      - name: Build benchmarks
+        working-directory: benchmarks/execute
+        run: cargo codspeed build
+      - name: Run benchmarks
+        uses: CodSpeedHQ/action@v3
+        with:
+          working-directory: benchmarks/execute
+          run: cargo codspeed run
+          token: ${{ secrets.CODSPEED_TOKEN }}
diff --git a/.github/workflows/benchmarks-upload-fixtures.yml b/.github/workflows/benchmarks-upload-fixtures.yml
new file mode 100644
index 0000000000..d18538e419
--- /dev/null
+++ b/.github/workflows/benchmarks-upload-fixtures.yml
@@ -0,0 +1,42 @@
+name: "Upload benchmark fixtures"
+
+on:
+  workflow_dispatch:
+
+env:
+  CARGO_TERM_COLOR: always
+  S3_FIXTURES_PATH: s3://openvm-public-data-sandbox-us-east-1/benchmark/fixtures
+
+jobs:
+  generate-fixtures:
+    name: Generate and upload benchmark fixtures
+    runs-on:
+      - runs-on=${{ github.run_id }}
+      - runner=64cpu-linux-arm64
+      - family=m7
+      - extras=s3-cache
+
+    steps:
+      - uses: runs-on/action@v1
+      - uses: actions/checkout@v4
+      - uses: dtolnay/rust-toolchain@stable
+      - uses: Swatinem/rust-cache@v2
+        with:
+          cache-on-failure: true
+
+      - name: Install architecture specific tools
+        run: |
+          source ci/scripts/utils.sh
+          install_s5cmd
+
+      - name: Generate fixtures
+        run: cargo r -r --bin generate-fixtures --features generate-fixtures
+
+      - name: Upload fixtures to S3
+        run: |
+          if [ -d "benchmarks/fixtures" ]; then
+            s5cmd cp benchmarks/fixtures/ ${{ env.S3_FIXTURES_PATH }}/
+          else
+            echo "No fixtures directory found"
+            exit 1
+          fi
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 3c2b02c574..4b1fbcc502 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -2,7 +2,7 @@ name: "OpenVM Benchmarks: Coordinate Runner & Reporting"
 
 on:
   push:
-    branches: ["main"]
+    branches: ["main", "feat/new-execution"]
   pull_request:
     types: [opened, synchronize, reopened, labeled]
     branches: ["**"]
@@ -89,7 +89,7 @@ jobs:
             FEATURE_FLAGS="aggregation,${FEATURE_FLAGS}"
           fi
           if [[ "${{ github.event.inputs.flamegraphs }}" == "true" ]]; then
-            FEATURE_FLAGS="profiling,${FEATURE_FLAGS}"
+            FEATURE_FLAGS="perf-metrics,${FEATURE_FLAGS}"
           fi
 
           matrix=$(jq -c --argjson run_e2e $RUN_E2E --arg features "$FEATURE_FLAGS" '
@@ -211,9 +211,21 @@ jobs:
           json_file_list=$(echo -n "$json_files" | paste -sd "," -)
           echo $json_file_list
 
-          prev_json_files=$(echo $matrix | jq -r '
+          # For PRs, get the latest commit from the target branch
+          if [[ "${{ github.event_name }}" == "pull_request" ]]; then
+            if [[ "${{ github.base_ref }}" == "main" ]]; then
+              REF_HASH="main"
+            else
+              REF_HASH=$(echo "refs/heads/${{ github.base_ref }}" | sha256sum | cut -d' ' -f1)
+            fi
+            echo "Target branch REF_HASH: $REF_HASH"
+          else
+            REF_HASH="main"
+          fi
+
+          prev_json_files=$(echo $matrix | jq -r --arg target "$REF_HASH" '
             .[] |
-            "main-\(.id).json"')
+            "\($target)-\(.id).json"')
           prev_json_file_list=$(echo -n "$prev_json_files" | paste -sd "," -)
           echo $prev_json_file_list
 
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 574a49be15..7f3df63f6f 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -16,6 +16,7 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - runner=64cpu-linux-arm64
+      - image=ubuntu24-full-arm64
       - extras=s3-cache
     steps:
       - uses: runs-on/action@v1
diff --git a/.github/workflows/cli.yml b/.github/workflows/cli.yml
index 510a124092..d0816f6731 100644
--- a/.github/workflows/cli.yml
+++ b/.github/workflows/cli.yml
@@ -36,7 +36,8 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - disk=large
-      - runner=32cpu-linux-arm64
+      - runner=64cpu-linux-arm64
+      - image=ubuntu24-full-arm64
       - extras=s3-cache
 
     steps:
@@ -47,7 +48,8 @@ jobs:
           cache-on-failure: true
       - uses: taiki-e/install-action@nextest
       - name: Install solc # svm should support arm64 linux
-        run: (hash svm 2>/dev/null || cargo install --version 0.2.23 svm-rs) && svm install 0.8.19 && solc --version
+        run: |
+          (hash svm 2>/dev/null || cargo install --version 0.2.23 svm-rs) && svm install 0.8.19 && solc --version
 
       - name: Install cargo-openvm
         working-directory: crates/cli
@@ -80,8 +82,7 @@ jobs:
         working-directory: crates/cli
         run: |
           export RUST_BACKTRACE=1
-          cargo build
-          cargo run --bin cargo-openvm -- openvm keygen --config ./example/app_config.toml --output-dir .
+          cargo openvm keygen --config ./example/app_config.toml --output-dir .
 
       - name: Set USE_LOCAL_OPENVM environment variable
         run: |
@@ -94,4 +95,5 @@ jobs:
       - name: Run CLI tests
         working-directory: crates/cli
         run: |
-          cargo nextest run --cargo-profile=fast
+          export SKIP_INSTALL=1
+          cargo nextest run --cargo-profile=fast --test-threads=1
diff --git a/.github/workflows/extension-tests.yml b/.github/workflows/extension-tests.yml
index ef13b840c6..2d07bdf1f6 100644
--- a/.github/workflows/extension-tests.yml
+++ b/.github/workflows/extension-tests.yml
@@ -40,6 +40,7 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - runner=64cpu-linux-arm64
+      - image=ubuntu24-full-arm64
       - tag=extension-${{ matrix.extension.name }}
       - extras=s3-cache
 
@@ -69,7 +70,7 @@ jobs:
 
       - name: Run ${{ matrix.extension.name }} circuit crate tests
         working-directory: extensions/${{ matrix.extension.path }}/circuit
-        run: cargo nextest run --cargo-profile=fast
+        run: cargo nextest run --cargo-profile=fast --test-threads=32
 
       - name: Run ${{ matrix.extension.name }} guest crate tests
         if: hashFiles(format('extensions/{0}/guest', matrix.extension.path)) != ''
@@ -86,4 +87,4 @@ jobs:
         working-directory: extensions/${{ matrix.extension.path }}/tests
         run: |
           rustup component add rust-src --toolchain nightly-2025-02-14
-          cargo nextest run --cargo-profile=fast --no-tests=pass
+          cargo nextest run --cargo-profile=fast --profile=heavy --no-tests=pass
diff --git a/.github/workflows/guest-lib-tests.yml b/.github/workflows/guest-lib-tests.yml
index 1b87b600e2..98a3743a36 100644
--- a/.github/workflows/guest-lib-tests.yml
+++ b/.github/workflows/guest-lib-tests.yml
@@ -13,6 +13,7 @@ on:
       - "guest-libs/**"
       - "Cargo.toml"
       - ".github/workflows/guest-lib-tests.yml"
+      - "crates/sdk/guest/fib/**"
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}
@@ -41,6 +42,7 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - runner=64cpu-linux-arm64
+      - image=ubuntu24-full-arm64
       - tag=crate-${{ matrix.crate.name }}
       - extras=s3-cache
 
diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml
index e41580948e..14feaff101 100644
--- a/.github/workflows/lints.yml
+++ b/.github/workflows/lints.yml
@@ -23,7 +23,7 @@ jobs:
 
       - uses: codespell-project/actions-codespell@v2
         with:
-          skip: Cargo.lock,./book/pnpm-lock.yaml,*.txt,./crates/toolchain/openvm/src/memcpy.s,./crates/toolchain/openvm/src/memset.s,./audits/*.pdf,./guest-libs/ruint/*
+          skip: Cargo.lock,./docs/vocs/pnpm-lock.yaml,*.txt,./crates/toolchain/openvm/src/memcpy.s,./crates/toolchain/openvm/src/memset.s,./audits/*.pdf,./guest-libs/ruint/*
           ignore_words_file: .codespellignore
 
       - uses: dtolnay/rust-toolchain@stable
@@ -46,7 +46,7 @@ jobs:
           # list of all unique features across workspace generated using:
           # cargo metadata --format-version=1 --no-deps | jq -r '.packages[].features | to_entries[] | .key' | sort -u | tr '\n' ' ' && echo ""
           # (exclude mimalloc since it conflicts with jemalloc)
-          cargo clippy --all-targets --all --tests --features "aggregation bench-metrics bls12_381 bn254 build-binaries default entrypoint evm-prove evm-verify export-intrinsics export-libm function-span getrandom-unsupported halo2-compiler halo2curves heap-embedded-alloc jemalloc jemalloc-prof nightly-features panic-handler parallel profiling rust-runtime static-verifier std test-utils" -- -D warnings
+          cargo clippy --all-targets --all --tests --features "aggregation bls12_381 bn254 build-elfs default entrypoint evm-prove evm-verify export-intrinsics export-libm function-span getrandom-unsupported halo2-compiler halo2curves heap-embedded-alloc jemalloc jemalloc-prof metrics nightly-features panic-handler parallel perf-metrics rust-runtime static-verifier std test-utils" -- -D warnings
           cargo clippy --all-targets --all --tests --no-default-features --features "mimalloc" -- -D warnings
 
       - name: Run fmt, clippy for guest
diff --git a/.github/workflows/native-compiler.yml b/.github/workflows/native-compiler.yml
index af4f39ddff..b79a3cb1c9 100644
--- a/.github/workflows/native-compiler.yml
+++ b/.github/workflows/native-compiler.yml
@@ -25,6 +25,7 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - runner=64cpu-linux-arm64
+      - image=ubuntu24-full-arm64
       - extras=s3-cache
 
     steps:
diff --git a/.github/workflows/primitives.yml b/.github/workflows/primitives.yml
index 714230b8cd..2d86155ab2 100644
--- a/.github/workflows/primitives.yml
+++ b/.github/workflows/primitives.yml
@@ -26,6 +26,7 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - runner=32cpu-linux-arm64
+      - image=ubuntu24-full-arm64
       - extras=s3-cache
 
     steps:
diff --git a/.github/workflows/publish-mdbook.yml b/.github/workflows/publish-vocs.yml
similarity index 55%
rename from .github/workflows/publish-mdbook.yml
rename to .github/workflows/publish-vocs.yml
index 6c63293993..a6f65705ad 100644
--- a/.github/workflows/publish-mdbook.yml
+++ b/.github/workflows/publish-vocs.yml
@@ -1,6 +1,9 @@
-name: Publish mdBook to Vercel
+name: Publish Vocs Docs to Vercel
 
 on:
+  pull_request:
+    branches:
+      - feat/new-execution
   workflow_dispatch:
     inputs:
       branch:
@@ -18,29 +21,26 @@ jobs:
 
     steps:
       - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v5
         with:
           ref: ${{ github.event.inputs.branch }}
 
-      - name: Install Rust
-        uses: dtolnay/rust-toolchain@stable
-      - uses: Swatinem/rust-cache@v2
+      - name: Setup pnpm
+        uses: pnpm/action-setup@v4
         with:
-          cache-on-failure: true
+          version: 9.15.0
 
-      - name: Setup mdbook
-        uses: peaceiris/actions-mdbook@v1
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
         with:
-          mdbook-version: "latest"
+          node-version: 20
+          cache: 'pnpm'
+          cache-dependency-path: 'docs/vocs/pnpm-lock.yaml'
 
-      - name: Install mdbook plugins
+      - name: Build docs
         run: |
-          cargo install mdbook-katex mdbook-linkcheck mdbook-mermaid just
-
-      - name: Build the book
-        run: |
-          cd book
-          mdbook build
+          cd docs/vocs/
+          pnpm install --frozen-lockfile && pnpm vercel-build
 
       - name: Install Vercel CLI
         run: npm install -g vercel
@@ -48,10 +48,10 @@ jobs:
       - name: Deploy to Vercel
         env:
           VERCEL_TOKEN: ${{ secrets.VERCEL_TOKEN }} # Token stored in GitHub Secrets
-          VERCEL_PROJECT_ID: ${{ secrets.VERCEL_PROJECT_ID }}
+          VERCEL_PROJECT_ID: ${{ secrets.VERCEL_VOCS_PROJECT_ID }}
           VERCEL_ORG_ID: ${{ secrets.VERCEL_ORG_ID }}
         run: |
-          cd book/book
+          cd docs/vocs/
           mkdir .vercel
           echo "{\"projectId\":\"$VERCEL_PROJECT_ID\",\"orgId\":\"$VERCEL_ORG_ID\"}" > .vercel/project.json
-          vercel --prod --token $VERCEL_TOKEN
+          vercel --prod --token $VERCEL_TOKEN --yes
diff --git a/.github/workflows/recursion.yml b/.github/workflows/recursion.yml
index 814c1fa44a..64538c18c1 100644
--- a/.github/workflows/recursion.yml
+++ b/.github/workflows/recursion.yml
@@ -26,6 +26,7 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - runner=64cpu-linux-arm64
+      - image=ubuntu24-full-arm64
       - extras=s3-cache
 
     steps:
diff --git a/.github/workflows/sdk.yml b/.github/workflows/sdk.yml
index e24df21ffe..4d194a03df 100644
--- a/.github/workflows/sdk.yml
+++ b/.github/workflows/sdk.yml
@@ -26,6 +26,7 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - family=m7a.24xlarge
+      - image=ubuntu24-full-x64
       - disk=large
       - extras=s3-cache
 
@@ -97,4 +98,10 @@ jobs:
         working-directory: crates/sdk
         run: |
           export RUST_BACKTRACE=1
-          cargo nextest run --cargo-profile=fast --test-threads=2 --features parallel,evm-verify
+          cargo nextest run --cargo-profile=fast --features parallel,evm-verify
+
+      - name: Run ignored tests
+        working-directory: crates/sdk
+        if: ${{ github.event_name == 'push' }}
+        run: |
+          cargo nextest run --cargo-profile=fast --features parallel,evm-verify --ignored test_static_verifier_custom_pv_handler
diff --git a/.github/workflows/versioning.yml b/.github/workflows/versioning.yml
index 19f87af031..10fb65e6b4 100644
--- a/.github/workflows/versioning.yml
+++ b/.github/workflows/versioning.yml
@@ -45,19 +45,19 @@ jobs:
           rm -rf ~/.openvm-base
           mv ~/.openvm ~/.openvm-base
 
-      # - name: Build and keygen examples from base branch
-      #   run: |
-      #     mkdir -p ./base-outputs/examples
-      #     for example in examples/*/; do
-      #       if [ -f "$example/Cargo.toml" ]; then
-      #         example_name=$(basename "$example")
-      #         echo "Building and generating keys for example: $example_name"
-      #         cd "$example"
-      #         cargo openvm build --no-transpile
-      #         cargo openvm keygen --output-dir "../../base-outputs/examples/$example_name"
-      #         cd ../..
-      #       fi
-      #     done
+      - name: Build and keygen examples from base branch
+        run: |
+          mkdir -p ./base-outputs/examples
+          for example in examples/*/; do
+            if [ -f "$example/Cargo.toml" ]; then
+              example_name=$(basename "$example")
+              echo "Building and generating keys for example: $example_name"
+              cd "$example"
+              cargo openvm build --no-transpile
+              cargo openvm keygen --output-dir "../../base-outputs/examples/$example_name"
+              cd ../..
+            fi
+          done
 
       - name: Build and keygen benchmarks from base branch
         run: |
@@ -87,20 +87,20 @@ jobs:
         run: |
           cargo openvm setup --evm
 
-      # - name: Build and keygen examples from tagged version
-      #   run: |
-      #     mkdir -p ./tagged-outputs/examples
-      #     for example in examples/*/; do
-      #       if [ -f "$example/Cargo.toml" ]; then
-      #         example_name=$(basename "$example")
-      #         echo "Building and generating keys for example: $example_name"
-      #         cd "$example"
-      #         cargo openvm build --no-transpile
-      #         mkdir -p "../../tagged-outputs/examples/$example_name"
-      #         cargo openvm keygen --output-dir "../../tagged-outputs/examples/$example_name/app.vk"
-      #         cd ../..
-      #       fi
-      #     done
+      - name: Build and keygen examples from tagged version
+        run: |
+          mkdir -p ./tagged-outputs/examples
+          for example in examples/*/; do
+            if [ -f "$example/Cargo.toml" ]; then
+              example_name=$(basename "$example")
+              echo "Building and generating keys for example: $example_name"
+              cd "$example"
+              cargo openvm build --no-transpile
+              mkdir -p "../../tagged-outputs/examples/$example_name"
+              cargo openvm keygen --output-dir "../../tagged-outputs/examples/$example_name"
+              cd ../..
+            fi
+          done
 
       - name: Build and keygen benchmarks from tagged version
         run: |
@@ -117,28 +117,28 @@ jobs:
             fi
           done
 
-      # - name: Compare example verification keys
-      #   run: |
-      #     echo "Comparing example verification keys between base branch and ${{ env.version }}..."
-      #     failed=0
-      #     for example in examples/*/; do
-      #       if [ -f "$example/Cargo.toml" ]; then
-      #         example_name=$(basename "$example")
-      #         echo "Checking example: $example_name"
-      #         if cmp "./base-outputs/examples/$example_name/app.vk" "./tagged-outputs/examples/$example_name/app.vk"; then
-      #           echo "✅ $example_name verification keys are identical"
-      #         else
-      #           echo "❌ $example_name verification keys differ"
-      #           failed=1
-      #         fi
-      #       fi
-      #     done
-      #     if [ $failed -eq 1 ]; then
-      #       echo "❌ Some example verification keys differ - versioning policy violated"
-      #       exit 1
-      #     else
-      #       echo "✅ All example verification keys are identical"
-      #     fi
+      - name: Compare example verification keys
+        run: |
+          echo "Comparing example verification keys between base branch and ${{ env.version }}..."
+          failed=0
+          for example in examples/*/; do
+            if [ -f "$example/Cargo.toml" ]; then
+              example_name=$(basename "$example")
+              echo "Checking example: $example_name"
+              if cmp "./base-outputs/examples/$example_name/app.vk" "./tagged-outputs/examples/$example_name/app.vk"; then
+                echo "✅ $example_name verification keys are identical"
+              else
+                echo "❌ $example_name verification keys differ"
+                failed=1
+              fi
+            fi
+          done
+          if [ $failed -eq 1 ]; then
+            echo "❌ Some example verification keys differ - versioning policy violated"
+            exit 1
+          else
+            echo "✅ All example verification keys are identical"
+          fi
 
       - name: Compare benchmark verification keys
         run: |
diff --git a/.github/workflows/vm.yml b/.github/workflows/vm.yml
index cb7f2284ca..c8c03dc931 100644
--- a/.github/workflows/vm.yml
+++ b/.github/workflows/vm.yml
@@ -25,6 +25,7 @@ jobs:
     runs-on:
       - runs-on=${{ github.run_id }}
       - runner=64cpu-linux-arm64
+      - image=ubuntu24-full-arm64
       - extras=s3-cache
 
     steps:
@@ -40,3 +41,8 @@ jobs:
         working-directory: crates/vm
         run: |
           cargo nextest run --cargo-profile=fast --features parallel
+
+      - name: Run vm crate tests with basic memory
+        working-directory: crates/vm
+        run: |
+          cargo nextest run --cargo-profile=fast --features parallel,basic-memory
diff --git a/.gitignore b/.gitignore
index d794a5dc57..aaf6aff435 100644
--- a/.gitignore
+++ b/.gitignore
@@ -37,3 +37,9 @@ guest.syms
 
 # openvm generated files
 crates/cli/openvm/
+
+# samply profile
+profile.json.gz
+
+# test fixtures
+benchmarks/fixtures
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 928c548adf..37792cd3ed 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,31 @@ All notable changes to OpenVM will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project follows a versioning principles documented in [VERSIONING.md](./VERSIONING.md).
 
+## v1.4.0-rc (Unreleased)
+
+### Added
+- (Verifier) An `AggVerifyingKey` struct is introduced so that verifying the final STARK proof does not require the proving key.
+- (Config) Added `addr_spaces` vector of `AddressSpaceHostConfig` to `MemoryConfig`.
+
+### Changed
+- (Verifier) The `MultiStarkVerifyingKey`s for all existing App and Agg configs remain unchanged. However the serialized binary for the `AppVerifyingKey` has changed due to the removal of `as_offset` from `MemoryDimensions` (see below).
+- (Verifier) A fix was made to apply a missing permutation to the trace height constraints in the root verifier circuit's vkey. This changes the pre-vkey hash of the root verifier vkey for initializing Fiat-Shamir. This fix does not impact the security of existing root verifier vkeys generated through the SDK because root verifier proofs have fixed trace heights and these trace heights have been checked to satisfy all trace height constraints statically.
+- (Verifier/CLI) The `Halo2ProvingKey` and `Halo2Verifier.sol` verifier contract generated by the CLI now use `verifier_k = 23` instead of `24` (previous) for a smaller circuit.
+- (Toolchain) Removed `step` from `Program` struct because `DEFAULT_PC_STEP = 4` is always used.
+- (Config) The `SystemConfig` default now has `continuation_enabled: true` instead of the previous default of `false`. This is a **breaking change**.
+- (Config) The `clk_max_bits` field in `MemoryConfig` has been renamed to `timestamp_max_bits`.
+- (Config) The `as_offset` field in `MemoryDimensions` has been removed and replaced by the constant `ADDR_SPACE_OFFSET = 1`.
+- (ISA) Field arithmetic instructions now restrict address spaces `e, f` to be either `0` or `4`, instead of allowing any address space.
+- (ISA) RV32IM load instructions are now restricted to address space `2` only, instead of allowing address spaces `0`, `1`, or `2`.
+- (ISA) The maximum valid pointer value in address space `1` (register address space) is now `127`, corresponding to 32 registers with 4 byte limbs each.
+- (ISA) Memory accesses now have configurable minimum block size requirements per address space. Address spaces `1`, `2`, and `3` require minimum block size of 4. Native address space (`4`) allows minimum block size of 1. Address spaces beyond `4` default to minimum block size of 1 but are configurable.
+- (Prover) Guest memory is stored on host with address space-specified memory layouts. In particular address space `1` through `3` are now represented in bytes instead of field elements.
+- (Prover) The internal format of `VmCommittedExe` has changed. Moreover the main proving flows have been updated so they rely primarily on `VmExe` and not `VmCommittedExe`.
+- (CLI) The `prove stark` output proof no longer contains the `app_commit`. The `verify stark` command now needs to specify the target name to read the `app_commit` stored from a previous call to the `commit` command.
+- (CLI) The `setup` command now stores an `agg.vk` verifying key and the `verify stark` command now reads `agg.vk` instead of `agg.pk`.
+- (SDK) The `Sdk` is now specific to a VM config, with default constructors `Sdk::standard()` and `Sdk::riscv32()`. The methods of the `Sdk` have been updated for a better developer experience.
+- (SDK) The `Halo2ProvingKey` struct now internally contains `Arc` so it can be cloned and shared.
+
 ## v1.3.0 (2025-07-15)
 
 No circuit constraints or verifying keys were changed in this release.
diff --git a/Cargo.lock b/Cargo.lock
index ce7abadf50..4894aa51eb 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,6 +1,6 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
-version = 3
+version = 4
 
 [[package]]
 name = "Inflector"
@@ -34,9 +34,9 @@ dependencies = [
 
 [[package]]
 name = "adler2"
-version = "2.0.0"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
 
 [[package]]
 name = "aes"
@@ -51,9 +51,9 @@ dependencies = [
 
 [[package]]
 name = "ahash"
-version = "0.8.11"
+version = "0.8.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011"
+checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75"
 dependencies = [
  "cfg-if",
  "once_cell",
@@ -76,28 +76,61 @@ version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
 
+[[package]]
+name = "alloy-eip2124"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "741bdd7499908b3aa0b159bba11e71c8cddd009a2c2eb7a06e825f1ec87900a5"
+dependencies = [
+ "alloy-primitives 1.2.1",
+ "alloy-rlp",
+ "crc",
+ "serde",
+ "thiserror 2.0.12",
+]
+
 [[package]]
 name = "alloy-eip2930"
-version = "0.1.0"
+version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0069cf0642457f87a01a014f6dc29d5d893cd4fd8fddf0c3cdfad1bb3ebafc41"
+checksum = "7b82752a889170df67bbb36d42ca63c531eb16274f0d7299ae2a680facba17bd"
 dependencies = [
- "alloy-primitives 0.8.25",
+ "alloy-primitives 1.2.1",
  "alloy-rlp",
  "serde",
 ]
 
 [[package]]
 name = "alloy-eip7702"
-version = "0.4.2"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4c986539255fb839d1533c128e190e557e52ff652c9ef62939e233a81dd93f7e"
+checksum = "9d4769c6ffddca380b0070d71c8b7f30bed375543fe76bb2f74ec0acf4b7cd16"
 dependencies = [
- "alloy-primitives 0.8.25",
+ "alloy-primitives 1.2.1",
  "alloy-rlp",
- "derive_more 1.0.0",
  "k256 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde",
+ "thiserror 2.0.12",
+]
+
+[[package]]
+name = "alloy-eips"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f35887da30b5fc50267109a3c61cd63e6ca1f45967983641053a40ee83468c1"
+dependencies = [
+ "alloy-eip2124",
+ "alloy-eip2930",
+ "alloy-eip7702",
+ "alloy-primitives 1.2.1",
+ "alloy-rlp",
+ "alloy-serde",
+ "auto_impl",
+ "c-kzg",
+ "derive_more 2.0.1",
+ "either",
+ "serde",
+ "sha2 0.10.9",
 ]
 
 [[package]]
@@ -121,10 +154,10 @@ dependencies = [
  "bytes",
  "cfg-if",
  "const-hex",
- "derive_more 0.99.19",
+ "derive_more 0.99.20",
  "hex-literal 0.4.1",
  "itoa",
- "ruint 1.12.3",
+ "ruint 1.15.0",
  "tiny-keccak",
 ]
 
@@ -138,10 +171,10 @@ dependencies = [
  "bytes",
  "cfg-if",
  "const-hex",
- "derive_more 0.99.19",
+ "derive_more 0.99.20",
  "hex-literal 0.4.1",
  "itoa",
- "ruint 1.12.3",
+ "ruint 1.15.0",
  "tiny-keccak",
 ]
 
@@ -157,15 +190,42 @@ dependencies = [
  "const-hex",
  "derive_more 2.0.1",
  "foldhash",
- "hashbrown 0.15.2",
- "indexmap 2.7.1",
+ "hashbrown 0.15.4",
+ "indexmap 2.10.0",
  "itoa",
  "k256 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)",
  "keccak-asm",
  "paste",
  "proptest",
  "rand 0.8.5",
- "ruint 1.12.3",
+ "ruint 1.15.0",
+ "rustc-hash 2.1.1",
+ "serde",
+ "sha3",
+ "tiny-keccak",
+]
+
+[[package]]
+name = "alloy-primitives"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6177ed26655d4e84e00b65cb494d4e0b8830e7cae7ef5d63087d445a2600fb55"
+dependencies = [
+ "alloy-rlp",
+ "bytes",
+ "cfg-if",
+ "const-hex",
+ "derive_more 2.0.1",
+ "foldhash",
+ "hashbrown 0.15.4",
+ "indexmap 2.10.0",
+ "itoa",
+ "k256 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "keccak-asm",
+ "paste",
+ "proptest",
+ "rand 0.9.1",
+ "ruint 1.15.0",
  "rustc-hash 2.1.1",
  "serde",
  "sha3",
@@ -174,9 +234,9 @@ dependencies = [
 
 [[package]]
 name = "alloy-rlp"
-version = "0.3.11"
+version = "0.3.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3d6c1d995bff8d011f7cd6c81820d51825e6e06d6db73914c1630ecf544d83d6"
+checksum = "5f70d83b765fdc080dbcd4f4db70d8d23fe4761f2f02ebfa9146b833900634b4"
 dependencies = [
  "alloy-rlp-derive",
  "arrayvec",
@@ -185,13 +245,24 @@ dependencies = [
 
 [[package]]
 name = "alloy-rlp-derive"
-version = "0.3.11"
+version = "0.3.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a40e1ef334153322fd878d07e86af7a529bcb86b2439525920a88eba87bcf943"
+checksum = "64b728d511962dda67c1bc7ea7c03736ec275ed2cf4c35d9585298ac9ccf3b73"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "alloy-serde"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee8d2c52adebf3e6494976c8542fbdf12f10123b26e11ad56f77274c16a2a039"
+dependencies = [
+ "alloy-primitives 1.2.1",
+ "serde",
+ "serde_json",
 ]
 
 [[package]]
@@ -205,7 +276,7 @@ dependencies = [
  "proc-macro-error2",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -218,11 +289,11 @@ dependencies = [
  "alloy-sol-macro-input",
  "const-hex",
  "heck",
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
  "proc-macro-error2",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "syn-solidity",
  "tiny-keccak",
 ]
@@ -241,7 +312,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "serde_json",
- "syn 2.0.98",
+ "syn 2.0.104",
  "syn-solidity",
 ]
 
@@ -252,7 +323,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6d162f8524adfdfb0e4bd0505c734c985f3e2474eb022af32eef0d52a4f3935c"
 dependencies = [
  "serde",
- "winnow 0.7.3",
+ "winnow 0.7.12",
 ]
 
 [[package]]
@@ -300,9 +371,9 @@ dependencies = [
 
 [[package]]
 name = "anstream"
-version = "0.6.18"
+version = "0.6.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b"
+checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933"
 dependencies = [
  "anstyle",
  "anstyle-parse",
@@ -315,44 +386,44 @@ dependencies = [
 
 [[package]]
 name = "anstyle"
-version = "1.0.10"
+version = "1.0.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9"
+checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd"
 
 [[package]]
 name = "anstyle-parse"
-version = "0.2.6"
+version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9"
+checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2"
 dependencies = [
  "utf8parse",
 ]
 
 [[package]]
 name = "anstyle-query"
-version = "1.1.2"
+version = "1.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c"
+checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9"
 dependencies = [
  "windows-sys 0.59.0",
 ]
 
 [[package]]
 name = "anstyle-wincon"
-version = "3.0.7"
+version = "3.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e"
+checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882"
 dependencies = [
  "anstyle",
- "once_cell",
+ "once_cell_polyfill",
  "windows-sys 0.59.0",
 ]
 
 [[package]]
 name = "anyhow"
-version = "1.0.96"
+version = "1.0.98"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b964d184e89d9b6b67dd2715bc8e74cf3107fb2b529990c90cf517326150bf4"
+checksum = "e16d2d3311acee920a9eb8d33b8cbc1787ce4a264e85f964c2404b969bdcd487"
 
 [[package]]
 name = "approx"
@@ -379,6 +450,18 @@ dependencies = [
  "yansi 0.5.1",
 ]
 
+[[package]]
+name = "ark-bls12-381"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3df4dcc01ff89867cd86b0da835f23c3f02738353aaee7dde7495af71363b8d5"
+dependencies = [
+ "ark-ec 0.5.0",
+ "ark-ff 0.5.0",
+ "ark-serialize 0.5.0",
+ "ark-std 0.5.0",
+]
+
 [[package]]
 name = "ark-bn254"
 version = "0.3.0"
@@ -401,6 +484,18 @@ dependencies = [
  "ark-std 0.4.0",
 ]
 
+[[package]]
+name = "ark-bn254"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d69eab57e8d2663efa5c63135b2af4f396d66424f88954c21104125ab6b3e6bc"
+dependencies = [
+ "ark-ec 0.5.0",
+ "ark-ff 0.5.0",
+ "ark-r1cs-std",
+ "ark-std 0.5.0",
+]
+
 [[package]]
 name = "ark-ec"
 version = "0.3.0"
@@ -422,7 +517,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "defd9a439d56ac24968cca0571f598a61bc8c55f71d50a89cda591cb750670ba"
 dependencies = [
  "ark-ff 0.4.2",
- "ark-poly",
+ "ark-poly 0.4.2",
  "ark-serialize 0.4.2",
  "ark-std 0.4.0",
  "derivative",
@@ -432,6 +527,27 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "ark-ec"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43d68f2d516162846c1238e755a7c4d131b892b70cc70c471a8e3ca3ed818fce"
+dependencies = [
+ "ahash",
+ "ark-ff 0.5.0",
+ "ark-poly 0.5.0",
+ "ark-serialize 0.5.0",
+ "ark-std 0.5.0",
+ "educe",
+ "fnv",
+ "hashbrown 0.15.4",
+ "itertools 0.13.0",
+ "num-bigint 0.4.6",
+ "num-integer",
+ "num-traits",
+ "zeroize",
+]
+
 [[package]]
 name = "ark-ff"
 version = "0.3.0"
@@ -470,6 +586,26 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "ark-ff"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a177aba0ed1e0fbb62aa9f6d0502e9b46dad8c2eab04c14258a1212d2557ea70"
+dependencies = [
+ "ark-ff-asm 0.5.0",
+ "ark-ff-macros 0.5.0",
+ "ark-serialize 0.5.0",
+ "ark-std 0.5.0",
+ "arrayvec",
+ "digest 0.10.7",
+ "educe",
+ "itertools 0.13.0",
+ "num-bigint 0.4.6",
+ "num-traits",
+ "paste",
+ "zeroize",
+]
+
 [[package]]
 name = "ark-ff-asm"
 version = "0.3.0"
@@ -490,6 +626,16 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "ark-ff-asm"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62945a2f7e6de02a31fe400aa489f0e0f5b2502e69f95f853adb82a96c7a6b60"
+dependencies = [
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "ark-ff-macros"
 version = "0.3.0"
@@ -515,6 +661,19 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "ark-ff-macros"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09be120733ee33f7693ceaa202ca41accd5653b779563608f1234f78ae07c4b3"
+dependencies = [
+ "num-bigint 0.4.6",
+ "num-traits",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "ark-poly"
 version = "0.4.2"
@@ -528,6 +687,50 @@ dependencies = [
  "hashbrown 0.13.2",
 ]
 
+[[package]]
+name = "ark-poly"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "579305839da207f02b89cd1679e50e67b4331e2f9294a57693e5051b7703fe27"
+dependencies = [
+ "ahash",
+ "ark-ff 0.5.0",
+ "ark-serialize 0.5.0",
+ "ark-std 0.5.0",
+ "educe",
+ "fnv",
+ "hashbrown 0.15.4",
+]
+
+[[package]]
+name = "ark-r1cs-std"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "941551ef1df4c7a401de7068758db6503598e6f01850bdb2cfdb614a1f9dbea1"
+dependencies = [
+ "ark-ec 0.5.0",
+ "ark-ff 0.5.0",
+ "ark-relations",
+ "ark-std 0.5.0",
+ "educe",
+ "num-bigint 0.4.6",
+ "num-integer",
+ "num-traits",
+ "tracing",
+]
+
+[[package]]
+name = "ark-relations"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec46ddc93e7af44bcab5230937635b06fb5744464dd6a7e7b083e80ebd274384"
+dependencies = [
+ "ark-ff 0.5.0",
+ "ark-std 0.5.0",
+ "tracing",
+ "tracing-subscriber 0.2.25",
+]
+
 [[package]]
 name = "ark-serialize"
 version = "0.3.0"
@@ -544,12 +747,25 @@ version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "adb7b85a02b83d2f22f89bd5cac66c9c89474240cb6207cb1efc16d098e822a5"
 dependencies = [
- "ark-serialize-derive",
+ "ark-serialize-derive 0.4.2",
  "ark-std 0.4.0",
  "digest 0.10.7",
  "num-bigint 0.4.6",
 ]
 
+[[package]]
+name = "ark-serialize"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f4d068aaf107ebcd7dfb52bc748f8030e0fc930ac8e360146ca54c1203088f7"
+dependencies = [
+ "ark-serialize-derive 0.5.0",
+ "ark-std 0.5.0",
+ "arrayvec",
+ "digest 0.10.7",
+ "num-bigint 0.4.6",
+]
+
 [[package]]
 name = "ark-serialize-derive"
 version = "0.4.2"
@@ -561,6 +777,17 @@ dependencies = [
  "syn 1.0.109",
 ]
 
+[[package]]
+name = "ark-serialize-derive"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "213888f660fddcca0d257e88e54ac05bca01885f258ccdf695bafd77031bb69d"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "ark-std"
 version = "0.3.0"
@@ -581,6 +808,16 @@ dependencies = [
  "rand 0.8.5",
 ]
 
+[[package]]
+name = "ark-std"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "246a225cc6131e9ee4f24619af0f19d67761fff15d7ccc22e42b80846e69449a"
+dependencies = [
+ "num-traits",
+ "rand 0.8.5",
+]
+
 [[package]]
 name = "arrayref"
 version = "0.3.9"
@@ -604,24 +841,30 @@ dependencies = [
 
 [[package]]
 name = "async-trait"
-version = "0.1.86"
+version = "0.1.88"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "644dd749086bf3771a2fbc5f256fdb982d53f011c7d5d560304eafeecebce79d"
+checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "atomic"
-version = "0.6.0"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d818003e740b63afc82337e3160717f4f63078720a810b7b903e70a5d1d2994"
+checksum = "a89cbf775b137e9b968e67227ef7f775587cde3fd31b0d8599dbd0f598a48340"
 dependencies = [
  "bytemuck",
 ]
 
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
 [[package]]
 name = "aurora-engine-modexp"
 version = "1.2.0"
@@ -634,26 +877,26 @@ dependencies = [
 
 [[package]]
 name = "auto_impl"
-version = "1.2.1"
+version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e12882f59de5360c748c4cbf569a042d5fb0eb515f7bea9c1f470b47f6ffbd73"
+checksum = "ffdcb70bdbc4d478427380519163274ac86e52916e10f0a8889adf0f96d3fee7"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "autocfg"
-version = "1.4.0"
+version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
+checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
 [[package]]
 name = "aws-config"
-version = "1.5.18"
+version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "90aff65e86db5fe300752551c1b015ef72b708ac54bded8ef43d0d53cb7cb0b1"
+checksum = "ebd9b83179adf8998576317ce47785948bcff399ec5b15f4dfbdedd44ddf5b92"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -661,7 +904,7 @@ dependencies = [
  "aws-sdk-ssooidc",
  "aws-sdk-sts",
  "aws-smithy-async",
- "aws-smithy-http 0.61.1",
+ "aws-smithy-http",
  "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
@@ -670,7 +913,7 @@ dependencies = [
  "bytes",
  "fastrand",
  "hex",
- "http 0.2.12",
+ "http 1.3.1",
  "ring",
  "time",
  "tokio",
@@ -681,9 +924,9 @@ dependencies = [
 
 [[package]]
 name = "aws-credential-types"
-version = "1.2.1"
+version = "1.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60e8f6b615cb5fc60a98132268508ad104310f0cfb25a1c22eee76efdf9154da"
+checksum = "b68c2194a190e1efc999612792e25b1ab3abfefe4306494efaaabc25933c0cbe"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-runtime-api",
@@ -691,17 +934,40 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "aws-lc-rs"
+version = "1.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08b5d4e069cbc868041a64bd68dc8cb39a0d79585cd6c5a24caa8c2d622121be"
+dependencies = [
+ "aws-lc-sys",
+ "zeroize",
+]
+
+[[package]]
+name = "aws-lc-sys"
+version = "0.30.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dbfd150b5dbdb988bcc8fb1fe787eb6b7ee6180ca24da683b61ea5405f3d43ff"
+dependencies = [
+ "bindgen",
+ "cc",
+ "cmake",
+ "dunce",
+ "fs_extra",
+]
+
 [[package]]
 name = "aws-runtime"
-version = "1.5.5"
+version = "1.5.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "76dd04d39cc12844c0994f2c9c5a6f5184c22e9188ec1ff723de41910a21dcad"
+checksum = "b2090e664216c78e766b6bac10fe74d2f451c02441d43484cd76ac9a295075f7"
 dependencies = [
  "aws-credential-types",
  "aws-sigv4",
  "aws-smithy-async",
  "aws-smithy-eventstream",
- "aws-smithy-http 0.60.12",
+ "aws-smithy-http",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
@@ -710,7 +976,6 @@ dependencies = [
  "fastrand",
  "http 0.2.12",
  "http-body 0.4.6",
- "once_cell",
  "percent-encoding",
  "pin-project-lite",
  "tracing",
@@ -719,9 +984,9 @@ dependencies = [
 
 [[package]]
 name = "aws-sdk-s3"
-version = "1.78.0"
+version = "1.98.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3038614b6cf7dd68d9a7b5b39563d04337eb3678d1d4173e356e927b0356158a"
+checksum = "029e89cae7e628553643aecb3a3f054a0a0912ff0fd1f5d6a0b4fda421dce64b"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
@@ -729,7 +994,7 @@ dependencies = [
  "aws-smithy-async",
  "aws-smithy-checksums",
  "aws-smithy-eventstream",
- "aws-smithy-http 0.61.1",
+ "aws-smithy-http",
  "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
@@ -741,70 +1006,70 @@ dependencies = [
  "hex",
  "hmac",
  "http 0.2.12",
+ "http 1.3.1",
  "http-body 0.4.6",
  "lru",
- "once_cell",
  "percent-encoding",
  "regex-lite",
- "sha2",
+ "sha2 0.10.9",
  "tracing",
  "url",
 ]
 
 [[package]]
 name = "aws-sdk-sso"
-version = "1.61.0"
+version = "1.76.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e65ff295979977039a25f5a0bf067a64bc5e6aa38f3cef4037cf42516265553c"
+checksum = "64bf26698dd6d238ef1486bdda46f22a589dc813368ba868dc3d94c8d27b56ba"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
- "aws-smithy-http 0.61.1",
+ "aws-smithy-http",
  "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
  "aws-types",
  "bytes",
+ "fastrand",
  "http 0.2.12",
- "once_cell",
  "regex-lite",
  "tracing",
 ]
 
 [[package]]
 name = "aws-sdk-ssooidc"
-version = "1.62.0"
+version = "1.77.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "91430a60f754f235688387b75ee798ef00cfd09709a582be2b7525ebb5306d4f"
+checksum = "09cd07ed1edd939fae854a22054299ae3576500f4e0fadc560ca44f9c6ea1664"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
- "aws-smithy-http 0.61.1",
+ "aws-smithy-http",
  "aws-smithy-json",
  "aws-smithy-runtime",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
  "aws-types",
  "bytes",
+ "fastrand",
  "http 0.2.12",
- "once_cell",
  "regex-lite",
  "tracing",
 ]
 
 [[package]]
 name = "aws-sdk-sts"
-version = "1.62.0"
+version = "1.78.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9276e139d39fff5a0b0c984fc2d30f970f9a202da67234f948fda02e5bea1dbe"
+checksum = "37f7766d2344f56d10d12f3c32993da36d78217f32594fe4fb8e57a538c1cdea"
 dependencies = [
  "aws-credential-types",
  "aws-runtime",
  "aws-smithy-async",
- "aws-smithy-http 0.61.1",
+ "aws-smithy-http",
  "aws-smithy-json",
  "aws-smithy-query",
  "aws-smithy-runtime",
@@ -812,21 +1077,21 @@ dependencies = [
  "aws-smithy-types",
  "aws-smithy-xml",
  "aws-types",
+ "fastrand",
  "http 0.2.12",
- "once_cell",
  "regex-lite",
  "tracing",
 ]
 
 [[package]]
 name = "aws-sigv4"
-version = "1.2.9"
+version = "1.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9bfe75fad52793ce6dec0dc3d4b1f388f038b5eb866c8d4d7f3a8e21b5ea5051"
+checksum = "ddfb9021f581b71870a17eac25b52335b82211cdc092e02b6876b2bcefa61666"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-eventstream",
- "aws-smithy-http 0.60.12",
+ "aws-smithy-http",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
  "bytes",
@@ -835,12 +1100,11 @@ dependencies = [
  "hex",
  "hmac",
  "http 0.2.12",
- "http 1.2.0",
- "once_cell",
+ "http 1.3.1",
  "p256 0.11.1",
  "percent-encoding",
  "ring",
- "sha2",
+ "sha2 0.10.9",
  "subtle",
  "time",
  "tracing",
@@ -849,9 +1113,9 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-async"
-version = "1.2.4"
+version = "1.2.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa59d1327d8b5053c54bf2eaae63bf629ba9e904434d0835a28ed3c0ed0a614e"
+checksum = "1e190749ea56f8c42bf15dd76c65e14f8f765233e6df9b0506d9d934ebef867c"
 dependencies = [
  "futures-util",
  "pin-project-lite",
@@ -860,31 +1124,29 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-checksums"
-version = "0.63.0"
+version = "0.63.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db2dc8d842d872529355c72632de49ef8c5a2949a4472f10e802f28cf925770c"
+checksum = "5ab9472f7a8ec259ddb5681d2ef1cb1cf16c0411890063e67cdc7b62562cc496"
 dependencies = [
- "aws-smithy-http 0.60.12",
+ "aws-smithy-http",
  "aws-smithy-types",
  "bytes",
- "crc32c",
- "crc32fast",
- "crc64fast-nvme",
+ "crc-fast",
  "hex",
  "http 0.2.12",
  "http-body 0.4.6",
  "md-5",
  "pin-project-lite",
  "sha1",
- "sha2",
+ "sha2 0.10.9",
  "tracing",
 ]
 
 [[package]]
 name = "aws-smithy-eventstream"
-version = "0.60.7"
+version = "0.60.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "461e5e02f9864cba17cff30f007c2e37ade94d01e87cdb5204e44a84e6d38c17"
+checksum = "338a3642c399c0a5d157648426110e199ca7fd1c689cc395676b81aa563700c4"
 dependencies = [
  "aws-smithy-types",
  "bytes",
@@ -893,18 +1155,19 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-http"
-version = "0.60.12"
+version = "0.62.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7809c27ad8da6a6a68c454e651d4962479e81472aa19ae99e59f9aba1f9713cc"
+checksum = "99335bec6cdc50a346fda1437f9fefe33abf8c99060739a546a16457f2862ca9"
 dependencies = [
+ "aws-smithy-eventstream",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
  "bytes",
  "bytes-utils",
  "futures-core",
  "http 0.2.12",
+ "http 1.3.1",
  "http-body 0.4.6",
- "once_cell",
  "percent-encoding",
  "pin-project-lite",
  "pin-utils",
@@ -912,35 +1175,52 @@ dependencies = [
 ]
 
 [[package]]
-name = "aws-smithy-http"
-version = "0.61.1"
+name = "aws-smithy-http-client"
+version = "1.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6f276f21c7921fe902826618d1423ae5bf74cf8c1b8472aee8434f3dfd31824"
+checksum = "f108f1ca850f3feef3009bdcc977be201bca9a91058864d9de0684e64514bee0"
 dependencies = [
- "aws-smithy-eventstream",
+ "aws-smithy-async",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
- "bytes",
- "bytes-utils",
- "futures-core",
+ "h2 0.3.27",
+ "h2 0.4.11",
  "http 0.2.12",
+ "http 1.3.1",
  "http-body 0.4.6",
- "once_cell",
- "percent-encoding",
+ "hyper 0.14.32",
+ "hyper 1.6.0",
+ "hyper-rustls 0.24.2",
+ "hyper-rustls 0.27.7",
+ "hyper-util",
  "pin-project-lite",
- "pin-utils",
+ "rustls 0.21.12",
+ "rustls 0.23.29",
+ "rustls-native-certs 0.8.1",
+ "rustls-pki-types",
+ "tokio",
+ "tower",
  "tracing",
 ]
 
 [[package]]
 name = "aws-smithy-json"
-version = "0.61.2"
+version = "0.61.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "623a51127f24c30776c8b374295f2df78d92517386f77ba30773f15a30ce1422"
+checksum = "a16e040799d29c17412943bdbf488fd75db04112d0c0d4b9290bacf5ae0014b9"
 dependencies = [
  "aws-smithy-types",
 ]
 
+[[package]]
+name = "aws-smithy-observability"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9364d5989ac4dd918e5cc4c4bdcc61c9be17dcd2586ea7f69e348fc7c6cab393"
+dependencies = [
+ "aws-smithy-runtime-api",
+]
+
 [[package]]
 name = "aws-smithy-query"
 version = "0.60.7"
@@ -953,42 +1233,39 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-runtime"
-version = "1.7.8"
+version = "1.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d526a12d9ed61fadefda24abe2e682892ba288c2018bcb38b1b4c111d13f6d92"
+checksum = "c3aaec682eb189e43c8a19c3dab2fe54590ad5f2cc2d26ab27608a20f2acf81c"
 dependencies = [
  "aws-smithy-async",
- "aws-smithy-http 0.60.12",
+ "aws-smithy-http",
+ "aws-smithy-http-client",
+ "aws-smithy-observability",
  "aws-smithy-runtime-api",
  "aws-smithy-types",
  "bytes",
  "fastrand",
- "h2",
  "http 0.2.12",
+ "http 1.3.1",
  "http-body 0.4.6",
  "http-body 1.0.1",
- "httparse",
- "hyper",
- "hyper-rustls",
- "once_cell",
  "pin-project-lite",
  "pin-utils",
- "rustls",
  "tokio",
  "tracing",
 ]
 
 [[package]]
 name = "aws-smithy-runtime-api"
-version = "1.7.3"
+version = "1.8.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92165296a47a812b267b4f41032ff8069ab7ff783696d217f0994a0d7ab585cd"
+checksum = "9852b9226cb60b78ce9369022c0df678af1cac231c882d5da97a0c4e03be6e67"
 dependencies = [
  "aws-smithy-async",
  "aws-smithy-types",
  "bytes",
  "http 0.2.12",
- "http 1.2.0",
+ "http 1.3.1",
  "pin-project-lite",
  "tokio",
  "tracing",
@@ -997,16 +1274,16 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-types"
-version = "1.2.13"
+version = "1.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c7b8a53819e42f10d0821f56da995e1470b199686a1809168db6ca485665f042"
+checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8"
 dependencies = [
  "base64-simd",
  "bytes",
  "bytes-utils",
  "futures-core",
  "http 0.2.12",
- "http 1.2.0",
+ "http 1.3.1",
  "http-body 0.4.6",
  "http-body 1.0.1",
  "http-body-util",
@@ -1023,18 +1300,18 @@ dependencies = [
 
 [[package]]
 name = "aws-smithy-xml"
-version = "0.60.9"
+version = "0.60.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ab0b0166827aa700d3dc519f72f8b3a91c35d0b8d042dc5d643a91e6f80648fc"
+checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728"
 dependencies = [
  "xmlparser",
 ]
 
 [[package]]
 name = "aws-types"
-version = "1.3.5"
+version = "1.3.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dfbd0a668309ec1f66c0f6bda4840dd6d4796ae26d699ebc266d7cc95c6d040f"
+checksum = "8a322fec39e4df22777ed3ad8ea868ac2f94cd15e1a55f6ee8d8d6305057689a"
 dependencies = [
  "aws-credential-types",
  "aws-smithy-async",
@@ -1046,9 +1323,9 @@ dependencies = [
 
 [[package]]
 name = "backtrace"
-version = "0.3.74"
+version = "0.3.75"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d82cb332cdfaed17ae235a638438ac4d4839913cc2af585c3c6746e8f8bee1a"
+checksum = "6806a6321ec58106fea15becdad98371e28d92ccbc7c8f1b3b6dd724fe8f1002"
 dependencies = [
  "addr2line",
  "cfg-if",
@@ -1102,9 +1379,9 @@ dependencies = [
 
 [[package]]
 name = "base64ct"
-version = "1.6.0"
+version = "1.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b"
+checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba"
 
 [[package]]
 name = "bincode"
@@ -1115,6 +1392,29 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "bindgen"
+version = "0.69.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271383c67ccabffb7381723dea0672a673f292304fcb45c01cc648c7a8d58088"
+dependencies = [
+ "bitflags 2.9.1",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.12.1",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash 1.1.0",
+ "shlex",
+ "syn 2.0.104",
+ "which",
+]
+
 [[package]]
 name = "bit-set"
 version = "0.5.3"
@@ -1147,9 +1447,9 @@ checksum = "5e764a1d40d510daf35e07be9eb06e75770908c27d411ee6c92109c9840eaaf7"
 
 [[package]]
 name = "bitcode"
-version = "0.6.5"
+version = "0.6.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "18c1406a27371b2f76232a2259df6ab607b91b5a0a7476a7729ff590df5a969a"
+checksum = "cf300f4aa6e66f3bdff11f1236a88c622fe47ea814524792240b4d554d9858ee"
 dependencies = [
  "arrayvec",
  "bitcode_derive",
@@ -1166,7 +1466,23 @@ checksum = "42b6b4cb608b8282dc3b53d0f4c9ab404655d562674c682db7e6c0458cc83c23"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "bitcoin-io"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b47c4ab7a93edb0c7198c5535ed9b52b63095f4e9b45279c6736cec4b856baf"
+
+[[package]]
+name = "bitcoin_hashes"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb18c03d0db0247e147a21a6faafd5a7eb851c743db062de72018b6b7e8e4d16"
+dependencies = [
+ "bitcoin-io",
+ "hex-conservative",
 ]
 
 [[package]]
@@ -1177,9 +1493,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
 
 [[package]]
 name = "bitflags"
-version = "2.8.0"
+version = "2.9.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f68f53c83ab957f72c32642f3868eec03eb974d1fb82e453128456482613d36"
+checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967"
 
 [[package]]
 name = "bitvec"
@@ -1215,16 +1531,24 @@ dependencies = [
 
 [[package]]
 name = "blake3"
-version = "1.6.0"
+version = "1.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1230237285e3e10cde447185e8975408ae24deaa67205ce684805c25bc0c7937"
+checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0"
 dependencies = [
  "arrayref",
  "arrayvec",
  "cc",
  "cfg-if",
  "constant_time_eq 0.3.1",
- "memmap2",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
+dependencies = [
+ "generic-array",
 ]
 
 [[package]]
@@ -1251,9 +1575,9 @@ dependencies = [
 
 [[package]]
 name = "blst"
-version = "0.3.14"
+version = "0.3.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "47c79a94619fade3c0b887670333513a67ac28a6a7e653eb260bf0d4103db38d"
+checksum = "4fd49896f12ac9b6dcd7a5998466b9b58263a695a3dd1ecc1aaca2e12a90b080"
 dependencies = [
  "cc",
  "glob",
@@ -1267,7 +1591,7 @@ version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c34e20109dce74b02019885a01edc8ca485380a297ed8d6eb9e63e657774074b"
 dependencies = [
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
  "js-sys",
  "primitive-types",
  "rustc-hex",
@@ -1278,9 +1602,9 @@ dependencies = [
 
 [[package]]
 name = "bon"
-version = "3.3.2"
+version = "3.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fe7acc34ff59877422326db7d6f2d845a582b16396b6b08194942bf34c6528ab"
+checksum = "f61138465baf186c63e8d9b6b613b508cd832cba4ce93cf37ce5f096f91ac1a6"
 dependencies = [
  "bon-macros",
  "rustversion",
@@ -1288,9 +1612,9 @@ dependencies = [
 
 [[package]]
 name = "bon-macros"
-version = "3.3.2"
+version = "3.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4159dd617a7fbc9be6a692fe69dc2954f8e6bb6bb5e4d7578467441390d77fd0"
+checksum = "40d1dad34aa19bf02295382f08d9bc40651585bd497266831d40ee6296fb49ca"
 dependencies = [
  "darling",
  "ident_case",
@@ -1298,7 +1622,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -1321,7 +1645,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -1342,21 +1666,21 @@ checksum = "b4ae4235e6dac0694637c763029ecea1a2ec9e4e06ec2729bd21ba4d9c863eb7"
 
 [[package]]
 name = "bumpalo"
-version = "3.17.0"
+version = "3.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf"
+checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43"
 
 [[package]]
 name = "byte-slice-cast"
-version = "1.2.2"
+version = "1.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c3ac9f8b63eca6fd385229b3675f6cc0dc5c8a5c8a54a59d4f52ffd670d87b0c"
+checksum = "7575182f7272186991736b70173b0ea045398f984bf5ebbb3804736ce1330c9d"
 
 [[package]]
 name = "bytemuck"
-version = "1.21.0"
+version = "1.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef657dfab802224e671f5818e9a4935f9b1957ed18e58292690cc39e7a4092a3"
+checksum = "5c76a5792e44e4abe34d3abf15636779261d45a7450612059293d1d2cfc63422"
 
 [[package]]
 name = "byteorder"
@@ -1366,9 +1690,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b"
 
 [[package]]
 name = "bytes"
-version = "1.10.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f61dac84819c6588b558454b194026eb1f09c293b9036ae9b159e74e73ab6cf9"
+checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a"
 dependencies = [
  "serde",
 ]
@@ -1405,9 +1729,9 @@ dependencies = [
 
 [[package]]
 name = "c-kzg"
-version = "1.0.3"
+version = "2.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0307f72feab3300336fb803a57134159f6e20139af1357f36c54cb90d8e8928"
+checksum = "7318cfa722931cb5fe0838b98d3ce5621e75f6a6408abc21721d80de9223f2e4"
 dependencies = [
  "blst",
  "cc",
@@ -1420,16 +1744,16 @@ dependencies = [
 
 [[package]]
 name = "camino"
-version = "1.1.9"
+version = "1.1.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b96ec4966b5813e2c0507c1f86115c8c5abaadc3980879c3424042a02fd1ad3"
+checksum = "0da45bc31171d8d6960122e222a67740df867c1dd53b4d51caa297084c185cab"
 dependencies = [
  "serde",
 ]
 
 [[package]]
 name = "cargo-openvm"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "aws-config",
  "aws-sdk-s3",
@@ -1440,7 +1764,6 @@ dependencies = [
  "itertools 0.14.0",
  "openvm-build",
  "openvm-circuit",
- "openvm-native-recursion",
  "openvm-sdk",
  "openvm-stark-backend",
  "openvm-stark-sdk",
@@ -1450,8 +1773,8 @@ dependencies = [
  "target-lexicon 0.12.16",
  "tempfile",
  "tokio",
- "toml 0.8.20",
- "toml_edit 0.22.24",
+ "toml 0.8.23",
+ "toml_edit 0.22.27",
  "tracing",
  "vergen",
 ]
@@ -1473,7 +1796,7 @@ checksum = "2d886547e41f740c616ae73108f6eb70afe6d940c7bc697cb30f13daec073037"
 dependencies = [
  "camino",
  "cargo-platform",
- "semver 1.0.25",
+ "semver 1.0.26",
  "serde",
  "serde_json",
  "thiserror 1.0.69",
@@ -1486,21 +1809,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5"
 
 [[package]]
-name = "cc"
-version = "1.2.14"
+name = "cc"
+version = "1.2.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c1599538de2394445747c8cf7935946e3cc27e9625f889d979bfb2aaf569362"
+dependencies = [
+ "jobserver",
+ "libc",
+ "shlex",
+]
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0c3d1b2e905a3a7b00a6141adb0e4c0bb941d11caf55349d863942a1cc44e3c9"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
 dependencies = [
- "jobserver",
- "libc",
- "shlex",
+ "nom",
 ]
 
 [[package]]
 name = "cfg-if"
-version = "1.0.0"
+version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268"
 
 [[package]]
 name = "cfg_aliases"
@@ -1510,15 +1842,15 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
 
 [[package]]
 name = "chrono"
-version = "0.4.39"
+version = "0.4.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7e36cc9d416881d2e24f9a963be5fb1cd90966419ac844274161d10488b3e825"
+checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d"
 dependencies = [
  "android-tzdata",
  "iana-time-zone",
  "num-traits",
  "serde",
- "windows-targets 0.52.6",
+ "windows-link",
 ]
 
 [[package]]
@@ -1558,11 +1890,22 @@ dependencies = [
  "inout",
 ]
 
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
 [[package]]
 name = "clap"
-version = "4.5.30"
+version = "4.5.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "92b7b18d71fad5313a1e320fa9897994228ce274b60faa4d694fe0ea89cd9e6d"
+checksum = "be92d32e80243a54711e5d7ce823c35c41c9d929dc4ab58e1276f625841aadf9"
 dependencies = [
  "clap_builder",
  "clap_derive",
@@ -1570,39 +1913,117 @@ dependencies = [
 
 [[package]]
 name = "clap_builder"
-version = "4.5.30"
+version = "4.5.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a35db2071778a7344791a4fb4f95308b5673d219dee3ae348b86642574ecc90c"
+checksum = "707eab41e9622f9139419d573eca0900137718000c517d47da73045f54331c3d"
 dependencies = [
  "anstream",
  "anstyle",
  "clap_lex",
  "strsim",
+ "terminal_size",
 ]
 
 [[package]]
 name = "clap_derive"
-version = "4.5.28"
+version = "4.5.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf4ced95c6f4a675af3da73304b9ac4ed991640c36374e4b46795c49e17cf1ed"
+checksum = "ef4f52386a59ca4c860f7393bcf8abd8dfd91ecccc0f774635ff68e92eeef491"
 dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "clap_lex"
-version = "0.7.4"
+version = "0.7.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675"
+
+[[package]]
+name = "cmake"
+version = "0.1.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7caa3f9de89ddbe2c607f4101924c5abec803763ae9534e4f4d7d8f84aa81f0"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "codspeed"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7524e02ff6173bc143d9abc01b518711b77addb60de871bbe5686843f88fb48"
+dependencies = [
+ "anyhow",
+ "bincode",
+ "colored",
+ "glob",
+ "libc",
+ "nix",
+ "serde",
+ "serde_json",
+ "statrs",
+ "uuid",
+]
+
+[[package]]
+name = "codspeed-divan-compat"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "157f6307b7400d74f3e41bd429b751b53d05c138a6a0f35853055e2523440354"
+dependencies = [
+ "codspeed",
+ "codspeed-divan-compat-macros",
+ "codspeed-divan-compat-walltime",
+]
+
+[[package]]
+name = "codspeed-divan-compat-macros"
+version = "3.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5e422ac666f5871ab86d17b0f7292696ef194138bab5b49f743d23799cd6c04"
+dependencies = [
+ "divan-macros",
+ "itertools 0.14.0",
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "codspeed-divan-compat-walltime"
+version = "3.0.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6"
+checksum = "66715e496e52fe861695e2644577adc7573544a729585fba4737193a62fd5a8a"
+dependencies = [
+ "cfg-if",
+ "clap",
+ "codspeed",
+ "condtype",
+ "divan-macros",
+ "libc",
+ "regex-lite",
+]
 
 [[package]]
 name = "colorchoice"
-version = "1.0.3"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75"
+
+[[package]]
+name = "colored"
+version = "2.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990"
+checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c"
+dependencies = [
+ "lazy_static",
+ "windows-sys 0.59.0",
+]
 
 [[package]]
 name = "concurrent-queue"
@@ -1613,6 +2034,12 @@ dependencies = [
  "crossbeam-utils",
 ]
 
+[[package]]
+name = "condtype"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "baf0a07a401f374238ab8e2f11a104d2851bf9ce711ec69804834de8af45c7af"
+
 [[package]]
 name = "const-default"
 version = "1.0.0"
@@ -1621,9 +2048,9 @@ checksum = "0b396d1f76d455557e1218ec8066ae14bba60b4b36ecd55577ba979f5db7ecaa"
 
 [[package]]
 name = "const-hex"
-version = "1.14.0"
+version = "1.14.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4b0485bab839b018a8f1723fc5391819fea5f8f0f32288ef8a735fd096b6160c"
+checksum = "83e22e0ed40b96a48d3db274f72fd365bd78f67af39b6bbd47e8a15e1c6207ff"
 dependencies = [
  "cfg-if",
  "cpufeatures",
@@ -1686,6 +2113,16 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "core-foundation"
+version = "0.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6"
+dependencies = [
+ "core-foundation-sys",
+ "libc",
+]
+
 [[package]]
 name = "core-foundation-sys"
 version = "0.8.7"
@@ -1703,9 +2140,9 @@ dependencies = [
 
 [[package]]
 name = "crc"
-version = "3.2.1"
+version = "3.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "69e6e4d7b33a94f0991c26729976b10ebde1d34c3ee82408fb536164fa10d636"
+checksum = "9710d3b3739c2e349eb44fe848ad0b7c8cb1e42bd87ee49371df2f7acaf3e675"
 dependencies = [
  "crc-catalog",
 ]
@@ -1717,12 +2154,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "19d374276b40fb8bbdee95aef7c7fa6b5316ec764510eb64b8dd0e2ed0d7e7f5"
 
 [[package]]
-name = "crc32c"
-version = "0.6.8"
+name = "crc-fast"
+version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47"
+checksum = "6bf62af4cc77d8fe1c22dde4e721d87f2f54056139d8c412e1366b740305f56f"
 dependencies = [
- "rustc_version 0.4.1",
+ "crc",
+ "digest 0.10.7",
+ "libc",
+ "rand 0.9.1",
+ "regex",
 ]
 
 [[package]]
@@ -1734,15 +2175,6 @@ dependencies = [
  "cfg-if",
 ]
 
-[[package]]
-name = "crc64fast-nvme"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4955638f00a809894c947f85a024020a20815b65a5eea633798ea7924edab2b3"
-dependencies = [
- "crc",
-]
-
 [[package]]
 name = "criterion"
 version = "0.5.1"
@@ -1843,9 +2275,9 @@ checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
 
 [[package]]
 name = "crunchy"
-version = "0.2.3"
+version = "0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
 
 [[package]]
 name = "crypto-bigint"
@@ -1883,9 +2315,9 @@ dependencies = [
 
 [[package]]
 name = "darling"
-version = "0.20.10"
+version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6f63b86c8a8826a49b8c21f08a2d07338eec8d900540f8630dc76284be802989"
+checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee"
 dependencies = [
  "darling_core",
  "darling_macro",
@@ -1893,27 +2325,42 @@ dependencies = [
 
 [[package]]
 name = "darling_core"
-version = "0.20.10"
+version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "95133861a8032aaea082871032f5815eb9e98cef03fa916ab4500513994df9e5"
+checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e"
 dependencies = [
  "fnv",
  "ident_case",
  "proc-macro2",
  "quote",
  "strsim",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "darling_macro"
-version = "0.20.10"
+version = "0.20.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d336a2a514f6ccccaa3e09b02d41d35330c07ddf03a62165fcec10bb561c7806"
+checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead"
 dependencies = [
  "darling_core",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "dashmap"
+version = "6.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf"
+dependencies = [
+ "cfg-if",
+ "crossbeam-utils",
+ "hashbrown 0.14.5",
+ "lock_api",
+ "once_cell",
+ "parking_lot_core",
+ "rayon",
 ]
 
 [[package]]
@@ -1928,9 +2375,9 @@ dependencies = [
 
 [[package]]
 name = "der"
-version = "0.7.9"
+version = "0.7.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f55bf8e7b65898637379c1b74eb1551107c8294ed26d855ceb9fd1a09cfc9bc0"
+checksum = "e7c1832837b905bbfb5101e07cc24c8deddf52f93225eee6ead5f4d63d53ddcb"
 dependencies = [
  "const-oid",
  "pem-rfc7468",
@@ -1939,9 +2386,9 @@ dependencies = [
 
 [[package]]
 name = "deranged"
-version = "0.3.11"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4"
+checksum = "9c9e6a11ca8224451684bc0d7d5a7adbf8f2fd6887261a1cfc3c0432f9d4068e"
 dependencies = [
  "powerfmt",
  "serde",
@@ -1966,7 +2413,7 @@ checksum = "d150dea618e920167e5973d70ae6ece4385b7164e0d799fe7c122dd0a5d912ad"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -1977,20 +2424,31 @@ checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "derive-where"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "510c292c8cf384b1a340b816a9a6cf2599eb8f566a44949024af88418000c50b"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "derive_more"
-version = "0.99.19"
+version = "0.99.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3da29a38df43d6f156149c9b43ded5e018ddff2a855cf2cfd62e8cd7d079c69f"
+checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f"
 dependencies = [
  "convert_case",
  "proc-macro2",
  "quote",
  "rustc_version 0.4.1",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2019,7 +2477,7 @@ checksum = "cb7330aeadfbe296029522e6c40f315320aba36fc43a5b3632f3795348f3bd22"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "unicode-xid",
 ]
 
@@ -2031,30 +2489,30 @@ checksum = "bda628edc44c4bb645fbe0f758797143e4e07926f7ebf4e9bdfbd3d2ce621df3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "unicode-xid",
 ]
 
 [[package]]
 name = "diesel"
-version = "2.2.10"
+version = "2.2.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ff3e1edb1f37b4953dd5176916347289ed43d7119cc2e6c7c3f7849ff44ea506"
+checksum = "229850a212cd9b84d4f0290ad9d294afc0ae70fccaa8949dbe8b43ffafa1e20c"
 dependencies = [
  "diesel_derives",
 ]
 
 [[package]]
 name = "diesel_derives"
-version = "2.2.5"
+version = "2.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68d4216021b3ea446fd2047f5c8f8fe6e98af34508a254a01e4d6bc1e844f84d"
+checksum = "1b96984c469425cb577bf6f17121ecb3e4fe1e81de5d8f780dd372802858d756"
 dependencies = [
  "diesel_table_macro_syntax",
  "dsl_auto_type",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2063,7 +2521,7 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "209c735641a413bc68c4923a9d6ad4bcb3ca306b794edaa7eb0b3228a99ffb25"
 dependencies = [
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2081,7 +2539,7 @@ version = "0.10.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
 dependencies = [
- "block-buffer",
+ "block-buffer 0.10.4",
  "const-oid",
  "crypto-common",
  "subtle",
@@ -2137,7 +2595,18 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "divan-macros"
+version = "0.1.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8dc51d98e636f5e3b0759a39257458b22619cac7e96d932da6eeb052891bb67c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2157,7 +2626,7 @@ dependencies = [
  "heck",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2168,9 +2637,9 @@ checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813"
 
 [[package]]
 name = "dyn-clone"
-version = "1.0.18"
+version = "1.0.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "feeef44e73baff3a26d371801df019877a9866a8c493d315ab00177843314f35"
+checksum = "1c7a8fb8a9fbf66c1f703fe16184d10ca0ee9d23be5b4436400408ba54a95005"
 
 [[package]]
 name = "ecdsa"
@@ -2190,7 +2659,7 @@ version = "0.16.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ee27f32b5c5292967d2d4a9d7f1e0b0aed2c15daded5a60300e4abb9d8020bca"
 dependencies = [
- "der 0.7.9",
+ "der 0.7.10",
  "digest 0.10.7",
  "elliptic-curve 0.13.8",
  "rfc6979 0.4.0",
@@ -2199,11 +2668,23 @@ dependencies = [
  "spki 0.7.3",
 ]
 
+[[package]]
+name = "educe"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d7bc049e1bd8cdeb31b68bbd586a9464ecf9f3944af3958a7a9d0f8b9799417"
+dependencies = [
+ "enum-ordinalize",
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "either"
-version = "1.13.0"
+version = "1.15.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60b1af1c220855b6ceac025d3f6ecdd2b7c4894bfe9cd9bda4fbb4bc7c0d4cf0"
+checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
 
 [[package]]
 name = "elf"
@@ -2292,6 +2773,26 @@ version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c34f04666d835ff5d62e058c3995147c06f42fe86ff053337632bca83e42702d"
 
+[[package]]
+name = "enum-ordinalize"
+version = "4.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fea0dcfa4e54eeb516fe454635a95753ddd39acda650ce703031c6973e315dd5"
+dependencies = [
+ "enum-ordinalize-derive",
+]
+
+[[package]]
+name = "enum-ordinalize-derive"
+version = "4.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0d28318a75d4aead5c4db25382e8ef717932d0346600cacae6357eb5941bc5ff"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "enum_dispatch"
 version = "0.3.13"
@@ -2301,7 +2802,7 @@ dependencies = [
  "once_cell",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2312,7 +2813,7 @@ checksum = "2f9ed6b3789237c8a0c1c505af1c7eb2c560df6186f01b098c3a1064ea532f38"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2326,9 +2827,9 @@ dependencies = [
 
 [[package]]
 name = "env_logger"
-version = "0.11.6"
+version = "0.11.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcaee3d8e3cfc3fd92428d477bc97fc29ec8716d180c0d74c643bb26166660e0"
+checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f"
 dependencies = [
  "anstream",
  "anstyle",
@@ -2344,9 +2845,9 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
 
 [[package]]
 name = "errno"
-version = "0.3.10"
+version = "0.3.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33d852cb9b869c2a9b3df2f71a3074817f01e1844f839a144f5fcef059a4eb5d"
+checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad"
 dependencies = [
  "libc",
  "windows-sys 0.59.0",
@@ -2447,7 +2948,7 @@ dependencies = [
  "chrono",
  "ethers-core",
  "reqwest",
- "semver 1.0.25",
+ "semver 1.0.26",
  "serde",
  "serde_json",
  "thiserror 1.0.69",
@@ -2474,10 +2975,10 @@ dependencies = [
  "path-slash",
  "rayon",
  "regex",
- "semver 1.0.25",
+ "semver 1.0.26",
  "serde",
  "serde_json",
- "sha2",
+ "sha2 0.10.9",
  "solang-parser",
  "svm-rs",
  "svm-rs-builds",
@@ -2592,7 +3093,7 @@ dependencies = [
  "atomic",
  "pear",
  "serde",
- "toml 0.8.20",
+ "toml 0.8.23",
  "uncased",
  "version_check",
 ]
@@ -2617,9 +3118,9 @@ checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
 
 [[package]]
 name = "flate2"
-version = "1.1.0"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "11faaf5a5236997af9848be0bef4db95824b1d534ebc64d0f0c6cf3e67bd38dc"
+checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d"
 dependencies = [
  "crc32fast",
  "miniz_oxide",
@@ -2633,9 +3134,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
 
 [[package]]
 name = "foldhash"
-version = "0.1.4"
+version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a0d2fde1f7b3d48b8395d5f2de76c18a528bd6a9cdde438df747bfcba3e05d6f"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
 [[package]]
 name = "forge-fmt"
@@ -2682,7 +3183,7 @@ dependencies = [
  "regex",
  "reqwest",
  "revm-primitives 1.3.0",
- "semver 1.0.25",
+ "semver 1.0.26",
  "serde",
  "serde_json",
  "serde_regex",
@@ -2703,6 +3204,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "fs_extra"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c"
+
 [[package]]
 name = "funty"
 version = "2.0.0"
@@ -2750,7 +3257,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2801,39 +3308,39 @@ dependencies = [
 
 [[package]]
 name = "getrandom"
-version = "0.2.15"
+version = "0.2.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
+checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592"
 dependencies = [
  "cfg-if",
  "js-sys",
  "libc",
- "wasi 0.11.0+wasi-snapshot-preview1",
+ "wasi 0.11.1+wasi-snapshot-preview1",
  "wasm-bindgen",
 ]
 
 [[package]]
 name = "getrandom"
-version = "0.3.1"
+version = "0.3.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "43a49c392881ce6d5c3b8cb70f98717b7c07aabbdff06687b9030dbfbe2725f8"
+checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4"
 dependencies = [
  "cfg-if",
  "libc",
- "wasi 0.13.3+wasi-0.2.2",
- "windows-targets 0.52.6",
+ "r-efi",
+ "wasi 0.14.2+wasi-0.2.4",
 ]
 
 [[package]]
 name = "getset"
-version = "0.1.4"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "eded738faa0e88d3abc9d1a13cb11adc2073c400969eeb8793cf7132589959fc"
+checksum = "9cf0fc11e47561d47397154977bc219f4cf809b2974facc3ccb3b89e2436f912"
 dependencies = [
  "proc-macro-error2",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -2848,7 +3355,7 @@ version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b903b73e45dc0c6c596f2d37eccece7c1c8bb6e4407b001096387c63d0d93724"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "libc",
  "libgit2-sys",
  "log",
@@ -2857,9 +3364,9 @@ dependencies = [
 
 [[package]]
 name = "glam"
-version = "0.30.0"
+version = "0.30.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17fcdf9683c406c2fc4d124afd29c0d595e22210d633cbdb8695ba9935ab1dc6"
+checksum = "50a99dbe56b72736564cfa4b85bf9a33079f16ae8b74983ab06af3b1a3696b11"
 
 [[package]]
 name = "glob"
@@ -2905,9 +3412,9 @@ dependencies = [
 
 [[package]]
 name = "h2"
-version = "0.3.26"
+version = "0.3.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8"
+checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d"
 dependencies = [
  "bytes",
  "fnv",
@@ -2915,7 +3422,26 @@ dependencies = [
  "futures-sink",
  "futures-util",
  "http 0.2.12",
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
+[[package]]
+name = "h2"
+version = "0.4.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17da50a276f1e01e0ba6c029e47b7100754904ee8a278f886546e98575380785"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http 1.3.1",
+ "indexmap 2.10.0",
  "slab",
  "tokio",
  "tokio-util",
@@ -2924,9 +3450,9 @@ dependencies = [
 
 [[package]]
 name = "half"
-version = "2.4.1"
+version = "2.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888"
+checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9"
 dependencies = [
  "cfg-if",
  "crunchy",
@@ -3041,7 +3567,7 @@ dependencies = [
  "rayon",
  "serde",
  "serde_arrays",
- "sha2",
+ "sha2 0.10.9",
  "static_assertions",
  "subtle",
  "unroll",
@@ -3069,7 +3595,7 @@ dependencies = [
  "rayon",
  "serde",
  "serde_arrays",
- "sha2",
+ "sha2 0.10.9",
  "static_assertions",
  "subtle",
  "unroll",
@@ -3116,9 +3642,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.2"
+version = "0.15.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289"
+checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
 dependencies = [
  "allocator-api2",
  "equivalent",
@@ -3132,7 +3658,7 @@ version = "0.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1"
 dependencies = [
- "hashbrown 0.15.2",
+ "hashbrown 0.15.4",
 ]
 
 [[package]]
@@ -3143,15 +3669,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
 [[package]]
 name = "hermit-abi"
-version = "0.3.9"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
-
-[[package]]
-name = "hermit-abi"
-version = "0.4.0"
+version = "0.5.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
 
 [[package]]
 name = "hex"
@@ -3162,6 +3682,15 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "hex-conservative"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5313b072ce3c597065a808dbf612c4c8e8590bdbf8b579508bf7a762c5eae6cd"
+dependencies = [
+ "arrayvec",
+]
+
 [[package]]
 name = "hex-literal"
 version = "0.4.1"
@@ -3214,9 +3743,9 @@ dependencies = [
 
 [[package]]
 name = "http"
-version = "1.2.0"
+version = "1.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f16ca2af56261c99fba8bac40a10251ce8188205a4c448fbb745a2e4daa76fea"
+checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565"
 dependencies = [
  "bytes",
  "fnv",
@@ -3241,27 +3770,27 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
 dependencies = [
  "bytes",
- "http 1.2.0",
+ "http 1.3.1",
 ]
 
 [[package]]
 name = "http-body-util"
-version = "0.1.2"
+version = "0.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "793429d76616a256bcb62c2a2ec2bed781c8307e797e2598c50010f2bee2544f"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
 dependencies = [
  "bytes",
- "futures-util",
- "http 1.2.0",
+ "futures-core",
+ "http 1.3.1",
  "http-body 1.0.1",
  "pin-project-lite",
 ]
 
 [[package]]
 name = "httparse"
-version = "1.10.0"
+version = "1.10.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f2d708df4e7140240a16cd6ab0ab65c972d7433ab77819ea693fde9c43811e2a"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
 
 [[package]]
 name = "httpdate"
@@ -3279,7 +3808,7 @@ dependencies = [
  "futures-channel",
  "futures-core",
  "futures-util",
- "h2",
+ "h2 0.3.27",
  "http 0.2.12",
  "http-body 0.4.6",
  "httparse",
@@ -3290,35 +3819,94 @@ dependencies = [
  "tokio",
  "tower-service",
  "tracing",
- "want",
-]
-
-[[package]]
-name = "hyper-rustls"
-version = "0.24.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
-dependencies = [
- "futures-util",
- "http 0.2.12",
- "hyper",
- "log",
- "rustls",
- "rustls-native-certs",
- "tokio",
- "tokio-rustls",
+ "want",
+]
+
+[[package]]
+name = "hyper"
+version = "1.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc2b571658e38e0c01b1fdca3bbbe93c00d3d71693ff2770043f8c29bc7d6f80"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "h2 0.4.11",
+ "http 1.3.1",
+ "http-body 1.0.1",
+ "httparse",
+ "itoa",
+ "pin-project-lite",
+ "smallvec",
+ "tokio",
+ "want",
+]
+
+[[package]]
+name = "hyper-rustls"
+version = "0.24.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590"
+dependencies = [
+ "futures-util",
+ "http 0.2.12",
+ "hyper 0.14.32",
+ "log",
+ "rustls 0.21.12",
+ "rustls-native-certs 0.6.3",
+ "tokio",
+ "tokio-rustls 0.24.1",
+]
+
+[[package]]
+name = "hyper-rustls"
+version = "0.27.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58"
+dependencies = [
+ "http 1.3.1",
+ "hyper 1.6.0",
+ "hyper-util",
+ "rustls 0.23.29",
+ "rustls-native-certs 0.8.1",
+ "rustls-pki-types",
+ "tokio",
+ "tokio-rustls 0.26.2",
+ "tower-service",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f66d5bd4c6f02bf0542fad85d626775bab9258cf795a4256dcaf3161114d1df"
+dependencies = [
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "http 1.3.1",
+ "http-body 1.0.1",
+ "hyper 1.6.0",
+ "libc",
+ "pin-project-lite",
+ "socket2",
+ "tokio",
+ "tower-service",
+ "tracing",
 ]
 
 [[package]]
 name = "iana-time-zone"
-version = "0.1.61"
+version = "0.1.63"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220"
+checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8"
 dependencies = [
  "android_system_properties",
  "core-foundation-sys",
  "iana-time-zone-haiku",
  "js-sys",
+ "log",
  "wasm-bindgen",
  "windows-core",
 ]
@@ -3334,21 +3922,22 @@ dependencies = [
 
 [[package]]
 name = "icu_collections"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526"
+checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47"
 dependencies = [
  "displaydoc",
+ "potential_utf",
  "yoke",
  "zerofrom",
  "zerovec",
 ]
 
 [[package]]
-name = "icu_locid"
-version = "1.5.0"
+name = "icu_locale_core"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637"
+checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a"
 dependencies = [
  "displaydoc",
  "litemap",
@@ -3357,31 +3946,11 @@ dependencies = [
  "zerovec",
 ]
 
-[[package]]
-name = "icu_locid_transform"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e"
-dependencies = [
- "displaydoc",
- "icu_locid",
- "icu_locid_transform_data",
- "icu_provider",
- "tinystr",
- "zerovec",
-]
-
-[[package]]
-name = "icu_locid_transform_data"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e"
-
 [[package]]
 name = "icu_normalizer"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f"
+checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979"
 dependencies = [
  "displaydoc",
  "icu_collections",
@@ -3389,67 +3958,54 @@ dependencies = [
  "icu_properties",
  "icu_provider",
  "smallvec",
- "utf16_iter",
- "utf8_iter",
- "write16",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_normalizer_data"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516"
+checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3"
 
 [[package]]
 name = "icu_properties"
-version = "1.5.1"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5"
+checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b"
 dependencies = [
  "displaydoc",
  "icu_collections",
- "icu_locid_transform",
+ "icu_locale_core",
  "icu_properties_data",
  "icu_provider",
- "tinystr",
+ "potential_utf",
+ "zerotrie",
  "zerovec",
 ]
 
 [[package]]
 name = "icu_properties_data"
-version = "1.5.0"
+version = "2.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569"
+checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632"
 
 [[package]]
 name = "icu_provider"
-version = "1.5.0"
+version = "2.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9"
+checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af"
 dependencies = [
  "displaydoc",
- "icu_locid",
- "icu_provider_macros",
+ "icu_locale_core",
  "stable_deref_trait",
  "tinystr",
  "writeable",
  "yoke",
  "zerofrom",
+ "zerotrie",
  "zerovec",
 ]
 
-[[package]]
-name = "icu_provider_macros"
-version = "1.5.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.98",
-]
-
 [[package]]
 name = "ident_case"
 version = "1.0.1"
@@ -3469,9 +4025,9 @@ dependencies = [
 
 [[package]]
 name = "idna_adapter"
-version = "1.2.0"
+version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71"
+checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344"
 dependencies = [
  "icu_normalizer",
  "icu_properties",
@@ -3512,7 +4068,7 @@ checksum = "a0eb5a3343abf848c0984fe4604b2b105da9539376e24fc0a3b0007411ae4fd9"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -3553,12 +4109,12 @@ dependencies = [
 
 [[package]]
 name = "indexmap"
-version = "2.7.1"
+version = "2.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652"
+checksum = "fe4cd85333e22411419a0bcae1297d25e58c9443848b11dc6a86fefe8c78a661"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.4",
  "serde",
 ]
 
@@ -3577,6 +4133,17 @@ dependencies = [
  "generic-array",
 ]
 
+[[package]]
+name = "io-uring"
+version = "0.7.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b86e202f00093dcba4275d4636b93ef9dd75d025ae560d2521b45ea28ab49013"
+dependencies = [
+ "bitflags 2.9.1",
+ "cfg-if",
+ "libc",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.11.0"
@@ -3585,11 +4152,11 @@ checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130"
 
 [[package]]
 name = "is-terminal"
-version = "0.4.15"
+version = "0.4.16"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e19b23d53f35ce9f56aebc7d1bb4e6ac1e9c0db7ac85c8d1760c04379edced37"
+checksum = "e04d7f318608d35d4b61ddd75cbdaee86b023ebe2bd5a66ee0915f0bf93095a9"
 dependencies = [
- "hermit-abi 0.4.0",
+ "hermit-abi",
  "libc",
  "windows-sys 0.59.0",
 ]
@@ -3618,6 +4185,24 @@ dependencies = [
  "either",
 ]
 
+[[package]]
+name = "itertools"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
 [[package]]
 name = "itertools"
 version = "0.14.0"
@@ -3629,16 +4214,17 @@ dependencies = [
 
 [[package]]
 name = "itoa"
-version = "1.0.14"
+version = "1.0.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674"
+checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c"
 
 [[package]]
 name = "jobserver"
-version = "0.1.32"
+version = "0.1.33"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "48d1dbcbbeb6a7fec7e059840aa538bd62aaccf972c7346c4d9d2059312853d0"
+checksum = "38f262f097c174adebe41eb73d66ae9c06b2844fb0da69969647bbddd9b0538a"
 dependencies = [
+ "getrandom 0.3.3",
  "libc",
 ]
 
@@ -3679,7 +4265,6 @@ dependencies = [
  "num-bigint 0.4.6",
  "once_cell",
  "openvm",
- "openvm-algebra-circuit",
  "openvm-algebra-guest",
  "openvm-algebra-moduli-macros",
  "openvm-algebra-transpiler",
@@ -3688,7 +4273,6 @@ dependencies = [
  "openvm-ecc-guest",
  "openvm-ecc-sw-macros",
  "openvm-ecc-transpiler",
- "openvm-rv32im-circuit",
  "openvm-rv32im-transpiler",
  "openvm-sha256-circuit",
  "openvm-sha256-transpiler",
@@ -3696,6 +4280,7 @@ dependencies = [
  "openvm-stark-sdk",
  "openvm-toolchain-tests",
  "openvm-transpiler",
+ "rand 0.8.5",
  "serde",
  "signature 2.2.0",
 ]
@@ -3710,7 +4295,7 @@ dependencies = [
  "ecdsa 0.16.9",
  "elliptic-curve 0.13.8",
  "once_cell",
- "sha2",
+ "sha2 0.10.9",
 ]
 
 [[package]]
@@ -3771,11 +4356,17 @@ dependencies = [
  "spin",
 ]
 
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
 [[package]]
 name = "libc"
-version = "0.2.169"
+version = "0.2.175"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
+checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
 
 [[package]]
 name = "libgit2-sys"
@@ -3789,17 +4380,27 @@ dependencies = [
  "pkg-config",
 ]
 
+[[package]]
+name = "libloading"
+version = "0.8.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667"
+dependencies = [
+ "cfg-if",
+ "windows-targets 0.52.6",
+]
+
 [[package]]
 name = "libm"
-version = "0.2.11"
+version = "0.2.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
+checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de"
 
 [[package]]
 name = "libmimalloc-sys"
-version = "0.1.39"
+version = "0.1.43"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "23aa6811d3bd4deb8a84dde645f943476d13b248d818edcf8ce0b2f37f036b44"
+checksum = "bf88cd67e9de251c1781dbe2f641a1a3ad66eaae831b8a2c38fbdc5ddae16d4d"
 dependencies = [
  "cc",
  "libc",
@@ -3807,19 +4408,65 @@ dependencies = [
 
 [[package]]
 name = "libredox"
-version = "0.1.3"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
+checksum = "1580801010e535496706ba011c15f8532df6b42297d2e471fec38ceadd8c0638"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "libc",
 ]
 
+[[package]]
+name = "libsecp256k1"
+version = "0.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e79019718125edc905a079a70cfa5f3820bc76139fc91d6f9abc27ea2a887139"
+dependencies = [
+ "arrayref",
+ "base64 0.22.1",
+ "digest 0.9.0",
+ "libsecp256k1-core",
+ "libsecp256k1-gen-ecmult",
+ "libsecp256k1-gen-genmult",
+ "rand 0.8.5",
+ "serde",
+ "sha2 0.9.9",
+]
+
+[[package]]
+name = "libsecp256k1-core"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5be9b9bb642d8522a44d533eab56c16c738301965504753b03ad1de3425d5451"
+dependencies = [
+ "crunchy",
+ "digest 0.9.0",
+ "subtle",
+]
+
+[[package]]
+name = "libsecp256k1-gen-ecmult"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3038c808c55c87e8a172643a7d87187fc6c4174468159cb3090659d55bcb4809"
+dependencies = [
+ "libsecp256k1-core",
+]
+
+[[package]]
+name = "libsecp256k1-gen-genmult"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3db8d6ba2cec9eacc40e6e8ccc98931840301f1006e95647ceb2dd5c3aa06f7c"
+dependencies = [
+ "libsecp256k1-core",
+]
+
 [[package]]
 name = "libz-sys"
-version = "1.1.21"
+version = "1.1.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "df9b68e50e6e0b26f672573834882eb57759f6db9b3be2ea3c35c91188bb4eaa"
+checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d"
 dependencies = [
  "cc",
  "libc",
@@ -3839,17 +4486,23 @@ version = "0.4.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
+
 [[package]]
 name = "litemap"
-version = "0.7.4"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104"
+checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956"
 
 [[package]]
 name = "lock_api"
-version = "0.4.12"
+version = "0.4.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17"
+checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765"
 dependencies = [
  "autocfg",
  "scopeguard",
@@ -3863,9 +4516,9 @@ checksum = "9374ef4228402d4b7e403e5838cb880d9ee663314b0a900d5a6aabf0c213552e"
 
 [[package]]
 name = "log"
-version = "0.4.25"
+version = "0.4.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "04cbf5b083de1c7e0222a7a51dbfdba1cbe1c6ab0b15e29fff3f6c077fd9cd9f"
+checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94"
 
 [[package]]
 name = "lru"
@@ -3873,7 +4526,7 @@ version = "0.12.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
 dependencies = [
- "hashbrown 0.15.2",
+ "hashbrown 0.15.4",
 ]
 
 [[package]]
@@ -3884,7 +4537,7 @@ checksum = "1b27834086c65ec3f9387b096d66e99f221cf081c2b738042aa252bcd41204e3"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -3918,9 +4571,9 @@ dependencies = [
 
 [[package]]
 name = "memchr"
-version = "2.7.4"
+version = "2.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
+checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
 
 [[package]]
 name = "memmap2"
@@ -3948,9 +4601,9 @@ checksum = "3d97bbf43eb4f088f8ca469930cde17fa036207c9a5e02ccc5107c4e8b17c964"
 
 [[package]]
 name = "metrics"
-version = "0.23.0"
+version = "0.23.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "884adb57038347dfbaf2d5065887b6cf4312330dc8e94bc30a1a839bd79d3261"
+checksum = "3045b4193fbdc5b5681f32f11070da9be3609f189a79f3390706d42587f46bb5"
 dependencies = [
  "ahash",
  "portable-atomic",
@@ -3962,7 +4615,7 @@ version = "0.16.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "62a6a1f7141f1d9bc7a886b87536bbfc97752e08b369e1e0453a9acfab5f5da4"
 dependencies = [
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
  "itoa",
  "lockfree-object-pool",
  "metrics",
@@ -3970,7 +4623,7 @@ dependencies = [
  "once_cell",
  "tracing",
  "tracing-core",
- "tracing-subscriber",
+ "tracing-subscriber 0.3.19",
 ]
 
 [[package]]
@@ -3983,7 +4636,7 @@ dependencies = [
  "crossbeam-epoch",
  "crossbeam-utils",
  "hashbrown 0.14.5",
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
  "metrics",
  "num_cpus",
  "ordered-float",
@@ -3994,9 +4647,9 @@ dependencies = [
 
 [[package]]
 name = "mimalloc"
-version = "0.1.43"
+version = "0.1.47"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "68914350ae34959d83f732418d51e2427a794055d0b9529f48259ac07af65633"
+checksum = "b1791cbe101e95af5764f06f20f6760521f7158f69dbf9d6baf941ee1bf6bc40"
 dependencies = [
  "libmimalloc-sys",
 ]
@@ -4007,24 +4660,30 @@ version = "0.3.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a"
 
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
 [[package]]
 name = "miniz_oxide"
-version = "0.8.4"
+version = "0.8.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b3b1c9bd4fe1f0f8b387f6eb9eb3b4a1aa26185e5750efb9140301703f62cd1b"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
 dependencies = [
  "adler2",
 ]
 
 [[package]]
 name = "mio"
-version = "1.0.3"
+version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd"
+checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c"
 dependencies = [
  "libc",
- "wasi 0.11.0+wasi-snapshot-preview1",
- "windows-sys 0.52.0",
+ "wasi 0.11.1+wasi-snapshot-preview1",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -4042,6 +4701,28 @@ dependencies = [
  "smallvec",
 ]
 
+[[package]]
+name = "nix"
+version = "0.29.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71e2746dc3a24dd78b3cfcb7be93368c6de9963d30f43a6a73998a9cf4b17b46"
+dependencies = [
+ "bitflags 2.9.1",
+ "cfg-if",
+ "cfg_aliases",
+ "libc",
+]
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
 [[package]]
 name = "nu-ansi-term"
 version = "0.46.0"
@@ -4184,33 +4865,34 @@ dependencies = [
 
 [[package]]
 name = "num_cpus"
-version = "1.16.0"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
 dependencies = [
- "hermit-abi 0.3.9",
+ "hermit-abi",
  "libc",
 ]
 
 [[package]]
 name = "num_enum"
-version = "0.7.3"
+version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e613fc340b2220f734a8595782c551f1250e969d87d3be1ae0579e8d4065179"
+checksum = "a973b4e44ce6cad84ce69d797acf9a044532e4184c4f267913d1b546a0727b7a"
 dependencies = [
  "num_enum_derive",
+ "rustversion",
 ]
 
 [[package]]
 name = "num_enum_derive"
-version = "0.7.3"
+version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "af1844ef2428cc3e1cb900be36181049ef3d3193c63e43026cfe202983b27a56"
+checksum = "77e878c846a8abae00dd069496dbe8751b16ac1c3d6bd2a7283a938e8228f90d"
 dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -4251,19 +4933,31 @@ dependencies = [
 
 [[package]]
 name = "once_cell"
-version = "1.20.3"
+version = "1.21.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e"
+checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d"
 dependencies = [
  "critical-section",
  "portable-atomic",
 ]
 
+[[package]]
+name = "once_cell_polyfill"
+version = "1.70.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad"
+
 [[package]]
 name = "oorandom"
-version = "11.1.4"
+version = "11.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6790f58c7ff633d8771f42965289203411a5e5c68388703c06e14f24770b41e"
+
+[[package]]
+name = "opaque-debug"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9"
+checksum = "c08d65885ee38876c4f86fa503fb49d7b507c2b62552df7c70b2fce627e06381"
 
 [[package]]
 name = "open-fastrlp"
@@ -4298,12 +4992,12 @@ checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e"
 
 [[package]]
 name = "openvm"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "bytemuck",
  "chrono",
- "getrandom 0.2.15",
- "getrandom 0.3.1",
+ "getrandom 0.2.16",
+ "getrandom 0.3.3",
  "num-bigint 0.4.6",
  "openvm-custom-insn",
  "openvm-platform",
@@ -4313,13 +5007,12 @@ dependencies = [
 
 [[package]]
 name = "openvm-algebra-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "derive_more 1.0.0",
  "eyre",
  "halo2curves-axiom",
- "itertools 0.14.0",
  "num-bigint 0.4.6",
  "num-traits",
  "openvm-algebra-transpiler",
@@ -4336,23 +5029,23 @@ dependencies = [
  "openvm-stark-sdk",
  "rand 0.8.5",
  "serde",
- "serde-big-array",
  "serde_with",
  "strum",
+ "test-case",
 ]
 
 [[package]]
 name = "openvm-algebra-complex-macros"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-macros-common",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-algebra-guest"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "halo2curves-axiom",
  "num-bigint 0.4.6",
@@ -4367,18 +5060,18 @@ dependencies = [
 
 [[package]]
 name = "openvm-algebra-moduli-macros"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "num-bigint 0.4.6",
  "num-prime",
  "openvm-macros-common",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-algebra-tests"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "eyre",
  "num-bigint 0.4.6",
@@ -4395,7 +5088,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-algebra-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-algebra-guest",
  "openvm-instructions",
@@ -4408,57 +5101,62 @@ dependencies = [
 
 [[package]]
 name = "openvm-benchmarks-execute"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
- "cargo-openvm",
+ "bitcode",
  "clap",
- "criterion",
+ "codspeed-divan-compat",
  "derive_more 1.0.0",
  "eyre",
+ "openvm-algebra-circuit",
+ "openvm-algebra-transpiler",
  "openvm-benchmarks-utils",
+ "openvm-bigint-circuit",
+ "openvm-bigint-transpiler",
  "openvm-circuit",
+ "openvm-continuations",
+ "openvm-ecc-circuit",
+ "openvm-ecc-transpiler",
  "openvm-keccak256-circuit",
  "openvm-keccak256-transpiler",
+ "openvm-native-circuit",
+ "openvm-pairing-circuit",
+ "openvm-pairing-guest",
+ "openvm-pairing-transpiler",
  "openvm-rv32im-circuit",
  "openvm-rv32im-transpiler",
  "openvm-sdk",
+ "openvm-sha256-circuit",
+ "openvm-sha256-transpiler",
  "openvm-stark-sdk",
  "openvm-transpiler",
+ "rand 0.8.5",
+ "serde",
  "tracing",
- "tracing-subscriber",
+ "tracing-subscriber 0.3.19",
 ]
 
 [[package]]
 name = "openvm-benchmarks-prove"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "clap",
- "derive-new 0.6.0",
  "derive_more 1.0.0",
  "eyre",
  "k256 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "num-bigint 0.4.6",
- "openvm-algebra-circuit",
- "openvm-algebra-transpiler",
+ "metrics",
  "openvm-benchmarks-utils",
  "openvm-circuit",
- "openvm-ecc-circuit",
- "openvm-ecc-transpiler",
- "openvm-keccak256-circuit",
- "openvm-keccak256-transpiler",
+ "openvm-continuations",
  "openvm-native-circuit",
  "openvm-native-compiler",
  "openvm-native-recursion",
- "openvm-pairing-circuit",
- "openvm-pairing-guest",
- "openvm-rv32im-circuit",
- "openvm-rv32im-transpiler",
  "openvm-sdk",
  "openvm-stark-backend",
  "openvm-stark-sdk",
  "openvm-transpiler",
+ "rand 0.8.5",
  "rand_chacha 0.3.1",
- "serde",
  "tiny-keccak",
  "tokio",
  "tracing",
@@ -4466,22 +5164,29 @@ dependencies = [
 
 [[package]]
 name = "openvm-benchmarks-utils"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
+ "bitcode",
  "cargo_metadata",
  "clap",
  "eyre",
  "openvm-build",
+ "openvm-circuit",
+ "openvm-continuations",
+ "openvm-native-circuit",
+ "openvm-sdk",
+ "openvm-stark-sdk",
  "openvm-transpiler",
  "tempfile",
  "tracing",
- "tracing-subscriber",
+ "tracing-subscriber 0.3.19",
 ]
 
 [[package]]
 name = "openvm-bigint-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
+ "alloy-primitives 1.2.1",
  "derive-new 0.6.0",
  "derive_more 1.0.0",
  "openvm-bigint-transpiler",
@@ -4497,11 +5202,12 @@ dependencies = [
  "openvm-stark-sdk",
  "rand 0.8.5",
  "serde",
+ "test-case",
 ]
 
 [[package]]
 name = "openvm-bigint-guest"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-platform",
  "strum_macros",
@@ -4509,7 +5215,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-bigint-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-bigint-guest",
  "openvm-instructions",
@@ -4523,7 +5229,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-build"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "cargo_metadata",
  "eyre",
@@ -4534,10 +5240,10 @@ dependencies = [
 
 [[package]]
 name = "openvm-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "backtrace",
- "cfg-if",
+ "dashmap",
  "derivative",
  "derive-new 0.6.0",
  "derive_more 1.0.0",
@@ -4545,13 +5251,14 @@ dependencies = [
  "eyre",
  "getset",
  "itertools 0.14.0",
+ "libc",
+ "memmap2",
  "metrics",
  "openvm-circuit",
  "openvm-circuit-derive",
  "openvm-circuit-primitives",
  "openvm-circuit-primitives-derive",
  "openvm-instructions",
- "openvm-native-circuit",
  "openvm-native-compiler",
  "openvm-poseidon2-air",
  "openvm-rv32im-transpiler",
@@ -4570,16 +5277,17 @@ dependencies = [
 
 [[package]]
 name = "openvm-circuit-derive"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "itertools 0.14.0",
+ "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-circuit-primitives"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "itertools 0.14.0",
@@ -4595,16 +5303,16 @@ dependencies = [
 
 [[package]]
 name = "openvm-circuit-primitives-derive"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "itertools 0.14.0",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-continuations"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derivative",
  "openvm-circuit",
@@ -4622,15 +5330,16 @@ version = "0.1.0"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-ecc-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "derive_more 1.0.0",
+ "halo2curves-axiom",
  "hex-literal 0.4.1",
  "lazy_static",
  "num-bigint 0.4.6",
@@ -4640,14 +5349,14 @@ dependencies = [
  "openvm-circuit",
  "openvm-circuit-derive",
  "openvm-circuit-primitives",
- "openvm-circuit-primitives-derive",
  "openvm-ecc-transpiler",
  "openvm-instructions",
  "openvm-mod-circuit-builder",
+ "openvm-pairing-guest",
  "openvm-rv32-adapters",
- "openvm-rv32im-circuit",
  "openvm-stark-backend",
  "openvm-stark-sdk",
+ "rand 0.8.5",
  "serde",
  "serde_with",
  "strum",
@@ -4655,7 +5364,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-ecc-guest"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "ecdsa 0.16.9",
  "elliptic-curve 0.13.8",
@@ -4673,7 +5382,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-ecc-integration-tests"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "eyre",
  "halo2curves-axiom",
@@ -4690,21 +5399,21 @@ dependencies = [
  "openvm-transpiler",
  "serde",
  "serde_with",
- "toml 0.8.20",
+ "toml 0.8.23",
 ]
 
 [[package]]
 name = "openvm-ecc-sw-macros"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-macros-common",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-ecc-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-ecc-guest",
  "openvm-instructions",
@@ -4717,7 +5426,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-ff-derive"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "addchain",
  "eyre",
@@ -4740,7 +5449,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-instructions"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "backtrace",
  "bitcode",
@@ -4760,18 +5469,18 @@ dependencies = [
 
 [[package]]
 name = "openvm-instructions-derive"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-instructions",
  "quote",
  "strum",
  "strum_macros",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-keccak256"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "eyre",
  "openvm-circuit",
@@ -4788,7 +5497,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-keccak256-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "derive_more 1.0.0",
@@ -4806,22 +5515,20 @@ dependencies = [
  "p3-keccak-air",
  "rand 0.8.5",
  "serde",
- "serde-big-array",
  "strum",
  "tiny-keccak",
- "tracing",
 ]
 
 [[package]]
 name = "openvm-keccak256-guest"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-platform",
 ]
 
 [[package]]
 name = "openvm-keccak256-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-instructions",
  "openvm-instructions-derive",
@@ -4834,14 +5541,14 @@ dependencies = [
 
 [[package]]
 name = "openvm-macros-common"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-mod-circuit-builder"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "halo2curves-axiom",
  "itertools 0.14.0",
@@ -4854,14 +5561,12 @@ dependencies = [
  "openvm-stark-backend",
  "openvm-stark-sdk",
  "rand 0.8.5",
- "serde",
- "serde_with",
  "tracing",
 ]
 
 [[package]]
 name = "openvm-native-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "derive_more 1.0.0",
@@ -4872,22 +5577,26 @@ dependencies = [
  "openvm-circuit-primitives",
  "openvm-circuit-primitives-derive",
  "openvm-instructions",
+ "openvm-native-circuit",
  "openvm-native-compiler",
+ "openvm-native-compiler-derive",
  "openvm-poseidon2-air",
  "openvm-rv32im-circuit",
+ "openvm-rv32im-transpiler",
  "openvm-stark-backend",
  "openvm-stark-sdk",
+ "p3-symmetric",
  "rand 0.8.5",
  "serde",
- "serde-big-array",
  "static_assertions",
  "strum",
- "tracing",
+ "test-case",
+ "test-log",
 ]
 
 [[package]]
 name = "openvm-native-compiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "backtrace",
  "itertools 0.14.0",
@@ -4897,13 +5606,10 @@ dependencies = [
  "openvm-circuit",
  "openvm-instructions",
  "openvm-instructions-derive",
- "openvm-native-circuit",
  "openvm-native-compiler-derive",
  "openvm-rv32im-transpiler",
  "openvm-stark-backend",
  "openvm-stark-sdk",
- "p3-symmetric",
- "rand 0.8.5",
  "serde",
  "snark-verifier-sdk",
  "strum",
@@ -4913,15 +5619,15 @@ dependencies = [
 
 [[package]]
 name = "openvm-native-compiler-derive"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "openvm-native-recursion"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "bitcode",
  "cfg-if",
@@ -4951,7 +5657,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-native-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-instructions",
  "openvm-transpiler",
@@ -4960,7 +5666,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-pairing"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "eyre",
  "group 0.13.0",
@@ -4982,6 +5688,7 @@ dependencies = [
  "openvm-ecc-sw-macros",
  "openvm-ecc-transpiler",
  "openvm-instructions",
+ "openvm-pairing",
  "openvm-pairing-circuit",
  "openvm-pairing-guest",
  "openvm-pairing-transpiler",
@@ -4997,27 +5704,24 @@ dependencies = [
 
 [[package]]
 name = "openvm-pairing-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "derive_more 1.0.0",
  "eyre",
  "halo2curves-axiom",
- "itertools 0.14.0",
  "num-bigint 0.4.6",
  "num-traits",
  "openvm-algebra-circuit",
  "openvm-circuit",
  "openvm-circuit-derive",
  "openvm-circuit-primitives",
- "openvm-circuit-primitives-derive",
  "openvm-ecc-circuit",
  "openvm-ecc-guest",
  "openvm-instructions",
  "openvm-mod-circuit-builder",
  "openvm-pairing-guest",
  "openvm-pairing-transpiler",
- "openvm-rv32-adapters",
  "openvm-rv32im-circuit",
  "openvm-stark-backend",
  "openvm-stark-sdk",
@@ -5028,7 +5732,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-pairing-guest"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "halo2curves-axiom",
  "hex-literal 0.4.1",
@@ -5049,10 +5753,9 @@ dependencies = [
 
 [[package]]
 name = "openvm-pairing-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-instructions",
- "openvm-instructions-derive",
  "openvm-pairing-guest",
  "openvm-stark-backend",
  "openvm-transpiler",
@@ -5062,7 +5765,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-platform"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "critical-section",
  "embedded-alloc",
@@ -5073,7 +5776,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-poseidon2-air"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derivative",
  "lazy_static",
@@ -5089,7 +5792,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-prof"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "clap",
  "eyre",
@@ -5102,7 +5805,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-rv32-adapters"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "itertools 0.14.0",
@@ -5114,14 +5817,11 @@ dependencies = [
  "openvm-stark-backend",
  "openvm-stark-sdk",
  "rand 0.8.5",
- "serde",
- "serde-big-array",
- "serde_with",
 ]
 
 [[package]]
 name = "openvm-rv32im-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "derive_more 1.0.0",
@@ -5138,13 +5838,13 @@ dependencies = [
  "openvm-stark-sdk",
  "rand 0.8.5",
  "serde",
- "serde-big-array",
  "strum",
+ "test-case",
 ]
 
 [[package]]
 name = "openvm-rv32im-guest"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-custom-insn",
  "p3-field",
@@ -5153,7 +5853,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-rv32im-integration-tests"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "eyre",
  "openvm",
@@ -5166,12 +5866,13 @@ dependencies = [
  "openvm-toolchain-tests",
  "openvm-transpiler",
  "serde",
+ "strum",
  "test-case",
 ]
 
 [[package]]
 name = "openvm-rv32im-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-instructions",
  "openvm-instructions-derive",
@@ -5186,10 +5887,9 @@ dependencies = [
 
 [[package]]
 name = "openvm-sdk"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "alloy-sol-types",
- "async-trait",
  "bitcode",
  "bon",
  "clap",
@@ -5228,6 +5928,7 @@ dependencies = [
  "openvm-stark-sdk",
  "openvm-transpiler",
  "p3-fri",
+ "rand 0.8.5",
  "rrs-lib",
  "serde",
  "serde_json",
@@ -5236,12 +5937,13 @@ dependencies = [
  "snark-verifier-sdk",
  "tempfile",
  "thiserror 1.0.69",
+ "toml 0.8.23",
  "tracing",
 ]
 
 [[package]]
 name = "openvm-sha2"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "eyre",
  "openvm-circuit",
@@ -5253,31 +5955,31 @@ dependencies = [
  "openvm-stark-sdk",
  "openvm-toolchain-tests",
  "openvm-transpiler",
- "sha2",
+ "sha2 0.10.9",
 ]
 
 [[package]]
 name = "openvm-sha256-air"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-circuit",
  "openvm-circuit-primitives",
  "openvm-stark-backend",
  "openvm-stark-sdk",
  "rand 0.8.5",
- "sha2",
+ "sha2 0.10.9",
 ]
 
 [[package]]
 name = "openvm-sha256-circuit"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive-new 0.6.0",
  "derive_more 1.0.0",
+ "hex",
  "openvm-circuit",
  "openvm-circuit-derive",
  "openvm-circuit-primitives",
- "openvm-circuit-primitives-derive",
  "openvm-instructions",
  "openvm-rv32im-circuit",
  "openvm-sha256-air",
@@ -5286,20 +5988,20 @@ dependencies = [
  "openvm-stark-sdk",
  "rand 0.8.5",
  "serde",
- "sha2",
+ "sha2 0.10.9",
  "strum",
 ]
 
 [[package]]
 name = "openvm-sha256-guest"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-platform",
 ]
 
 [[package]]
 name = "openvm-sha256-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "openvm-instructions",
  "openvm-instructions-derive",
@@ -5312,8 +6014,8 @@ dependencies = [
 
 [[package]]
 name = "openvm-stark-backend"
-version = "1.1.1"
-source = "git+https://github.com/openvm-org/stark-backend.git?tag=v1.1.1#0879de162658b797b8dd6b6ee4429cbb8dd78ba1"
+version = "1.2.0-rc.0"
+source = "git+https://github.com/openvm-org/stark-backend.git?tag=v1.2.0-rc.2#c8eb4dde511b068b6b23dc48d7b0695897d5a00a"
 dependencies = [
  "bitcode",
  "cfg-if",
@@ -5340,11 +6042,12 @@ dependencies = [
 
 [[package]]
 name = "openvm-stark-sdk"
-version = "1.1.1"
-source = "git+https://github.com/openvm-org/stark-backend.git?tag=v1.1.1#0879de162658b797b8dd6b6ee4429cbb8dd78ba1"
+version = "1.2.0-rc.0"
+source = "git+https://github.com/openvm-org/stark-backend.git?tag=v1.2.0-rc.2#c8eb4dde511b068b6b23dc48d7b0695897d5a00a"
 dependencies = [
+ "dashmap",
  "derivative",
- "derive_more 0.99.19",
+ "derive_more 0.99.20",
  "ff 0.13.1",
  "itertools 0.14.0",
  "metrics",
@@ -5367,16 +6070,16 @@ dependencies = [
  "serde",
  "serde_json",
  "static_assertions",
- "toml 0.8.20",
+ "toml 0.8.23",
  "tracing",
  "tracing-forest",
- "tracing-subscriber",
+ "tracing-subscriber 0.3.19",
  "zkhash",
 ]
 
 [[package]]
 name = "openvm-toolchain-tests"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "derive_more 1.0.0",
  "eyre",
@@ -5394,6 +6097,7 @@ dependencies = [
  "openvm-stark-backend",
  "openvm-stark-sdk",
  "openvm-transpiler",
+ "rand 0.8.5",
  "serde",
  "tempfile",
  "test-case",
@@ -5401,7 +6105,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-transpiler"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "elf",
  "eyre",
@@ -5415,7 +6119,7 @@ dependencies = [
 
 [[package]]
 name = "openvm-verify-stark"
-version = "1.3.0"
+version = "1.4.0-rc.4"
 dependencies = [
  "eyre",
  "openvm-circuit",
@@ -5462,7 +6166,7 @@ checksum = "51f44edd08f51e2ade572f141051021c5af22677e42b7dd28a88155151c33594"
 dependencies = [
  "ecdsa 0.14.8",
  "elliptic-curve 0.12.3",
- "sha2",
+ "sha2 0.10.9",
 ]
 
 [[package]]
@@ -5477,7 +6181,6 @@ dependencies = [
  "hex-literal 0.4.1",
  "num-bigint 0.4.6",
  "openvm",
- "openvm-algebra-circuit",
  "openvm-algebra-guest",
  "openvm-algebra-moduli-macros",
  "openvm-algebra-transpiler",
@@ -5486,7 +6189,6 @@ dependencies = [
  "openvm-ecc-guest",
  "openvm-ecc-sw-macros",
  "openvm-ecc-transpiler",
- "openvm-rv32im-circuit",
  "openvm-rv32im-transpiler",
  "openvm-sha256-circuit",
  "openvm-sha256-transpiler",
@@ -5494,9 +6196,22 @@ dependencies = [
  "openvm-stark-sdk",
  "openvm-toolchain-tests",
  "openvm-transpiler",
+ "rand 0.8.5",
  "serde",
 ]
 
+[[package]]
+name = "p256"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c9863ad85fa8f4460f9c48cb909d38a0d689dba1f6f6988a5e3e0d31071bcd4b"
+dependencies = [
+ "ecdsa 0.16.9",
+ "elliptic-curve 0.13.8",
+ "primeorder",
+ "sha2 0.10.9",
+]
+
 [[package]]
 name = "p3-air"
 version = "0.1.0"
@@ -5858,9 +6573,9 @@ dependencies = [
 
 [[package]]
 name = "parity-scale-codec"
-version = "3.7.4"
+version = "3.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c9fde3d0718baf5bc92f577d652001da0f8d54cd03a7974e118d04fc888dc23d"
+checksum = "799781ae679d79a948e13d4824a40970bfa500058d245760dd857301059810fa"
 dependencies = [
  "arrayvec",
  "bitvec",
@@ -5874,14 +6589,14 @@ dependencies = [
 
 [[package]]
 name = "parity-scale-codec-derive"
-version = "3.7.4"
+version = "3.7.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "581c837bb6b9541ce7faa9377c20616e4fb7650f6b0f68bc93c827ee504fb7b3"
+checksum = "34b4653168b563151153c9e4c08ebed57fb8262bebfa79711552fa983c623e7a"
 dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -5892,9 +6607,9 @@ checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba"
 
 [[package]]
 name = "parking_lot"
-version = "0.12.3"
+version = "0.12.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27"
+checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13"
 dependencies = [
  "lock_api",
  "parking_lot_core",
@@ -5902,9 +6617,9 @@ dependencies = [
 
 [[package]]
 name = "parking_lot_core"
-version = "0.9.10"
+version = "0.9.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8"
+checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5"
 dependencies = [
  "cfg-if",
  "libc",
@@ -5975,7 +6690,7 @@ dependencies = [
  "digest 0.10.7",
  "hmac",
  "password-hash",
- "sha2",
+ "sha2 0.10.9",
 ]
 
 [[package]]
@@ -5998,7 +6713,7 @@ dependencies = [
  "proc-macro2",
  "proc-macro2-diagnostics",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -6018,12 +6733,12 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
 
 [[package]]
 name = "pest"
-version = "2.7.15"
+version = "2.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b7cafe60d6cf8e62e1b9b2ea516a089c008945bb5a275416789e7db0bc199dc"
+checksum = "1db05f56d34358a8b1066f67cbb203ee3e7ed2ba674a6263a1d5ec6db2204323"
 dependencies = [
  "memchr",
- "thiserror 2.0.11",
+ "thiserror 2.0.12",
  "ucd-trie",
 ]
 
@@ -6034,7 +6749,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db"
 dependencies = [
  "fixedbitset",
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
 ]
 
 [[package]]
@@ -6067,7 +6782,7 @@ dependencies = [
  "phf_shared",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -6107,15 +6822,15 @@ version = "0.10.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f950b2377845cebe5cf8b5165cb3cc1a5e0fa5cfa3e1f7f55707d8fd82e0a7b7"
 dependencies = [
- "der 0.7.9",
+ "der 0.7.10",
  "spki 0.7.3",
 ]
 
 [[package]]
 name = "pkg-config"
-version = "0.3.31"
+version = "0.3.32"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "953ec861398dccce10c670dfeaf3ec4911ca479e9c02154b3a215178c5f566f2"
+checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c"
 
 [[package]]
 name = "plotters"
@@ -6147,9 +6862,9 @@ dependencies = [
 
 [[package]]
 name = "portable-atomic"
-version = "1.10.0"
+version = "1.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6"
+checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483"
 
 [[package]]
 name = "poseidon-primitives"
@@ -6162,7 +6877,7 @@ dependencies = [
  "lazy_static",
  "log",
  "rand 0.8.5",
- "rand_xorshift",
+ "rand_xorshift 0.3.0",
  "thiserror 1.0.69",
 ]
 
@@ -6194,7 +6909,7 @@ dependencies = [
  "md-5",
  "memchr",
  "rand 0.9.1",
- "sha2",
+ "sha2 0.10.9",
  "stringprep",
 ]
 
@@ -6209,6 +6924,15 @@ dependencies = [
  "postgres-protocol",
 ]
 
+[[package]]
+name = "potential_utf"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e5a7c30837279ca13e7c867e9e40053bc68740f988cb07f7ca6df43cc734b585"
+dependencies = [
+ "zerovec",
+]
+
 [[package]]
 name = "powerfmt"
 version = "0.2.0"
@@ -6217,9 +6941,9 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
 
 [[package]]
 name = "ppv-lite86"
-version = "0.2.20"
+version = "0.2.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
 dependencies = [
  "zerocopy",
 ]
@@ -6232,12 +6956,21 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
 
 [[package]]
 name = "prettyplease"
-version = "0.2.29"
+version = "0.2.35"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6924ced06e1f7dfe3fa48d57b9f74f55d8915f5036121bef647ef4b204895fac"
+checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a"
 dependencies = [
  "proc-macro2",
- "syn 2.0.98",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "primeorder"
+version = "0.13.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "353e1ca18966c16d9deb1c69278edbc5f194139612772bd9537af60ac231e1e6"
+dependencies = [
+ "elliptic-curve 0.13.8",
 ]
 
 [[package]]
@@ -6256,11 +6989,11 @@ dependencies = [
 
 [[package]]
 name = "proc-macro-crate"
-version = "3.2.0"
+version = "3.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8ecf48c7ca261d60b74ab1a7b20da18bede46776b2e55535cb958eb595c5fa7b"
+checksum = "edce586971a4dfaa28950c6f18ed55e0406c1ab88bbce2c6f6293a7aaba73d35"
 dependencies = [
- "toml_edit 0.22.24",
+ "toml_edit 0.22.27",
 ]
 
 [[package]]
@@ -6282,14 +7015,14 @@ dependencies = [
  "proc-macro-error-attr2",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.93"
+version = "1.0.95"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99"
+checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778"
 dependencies = [
  "unicode-ident",
 ]
@@ -6302,25 +7035,25 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "version_check",
  "yansi 1.0.1",
 ]
 
 [[package]]
 name = "proptest"
-version = "1.6.0"
+version = "1.7.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50"
+checksum = "6fcdab19deb5195a31cf7726a210015ff1496ba1464fd42cb4f537b8b01b471f"
 dependencies = [
  "bit-set 0.8.0",
  "bit-vec 0.8.0",
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "lazy_static",
  "num-traits",
- "rand 0.8.5",
- "rand_chacha 0.3.1",
- "rand_xorshift",
+ "rand 0.9.1",
+ "rand_chacha 0.9.0",
+ "rand_xorshift 0.4.0",
  "regex-syntax 0.8.5",
  "rusty-fork",
  "tempfile",
@@ -6329,9 +7062,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3"
-version = "0.25.0"
+version = "0.25.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f239d656363bcee73afef85277f1b281e8ac6212a1d42aa90e55b90ed43c47a4"
+checksum = "8970a78afe0628a3e3430376fc5fd76b6b45c4d43360ffd6cdd40bdde72b682a"
 dependencies = [
  "libc",
  "memoffset",
@@ -6343,9 +7076,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-build-config"
-version = "0.25.0"
+version = "0.25.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "755ea671a1c34044fa165247aaf6f419ca39caa6003aee791a0df2713d8f1b6d"
+checksum = "458eb0c55e7ece017adeba38f2248ff3ac615e53660d7c71a238d7d2a01c7598"
 dependencies = [
  "once_cell",
  "target-lexicon 0.13.2",
@@ -6353,9 +7086,9 @@ dependencies = [
 
 [[package]]
 name = "pyo3-ffi"
-version = "0.25.0"
+version = "0.25.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fc95a2e67091e44791d4ea300ff744be5293f394f1bafd9f78c080814d35956e"
+checksum = "7114fe5457c61b276ab77c5055f206295b812608083644a5c5b2640c3102565c"
 dependencies = [
  "libc",
  "pyo3-build-config",
@@ -6363,15 +7096,15 @@ dependencies = [
 
 [[package]]
 name = "quanta"
-version = "0.12.5"
+version = "0.12.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3bd1fe6824cea6538803de3ff1bc0cf3949024db3d43c9643024bfb33a807c0e"
+checksum = "f3ab5a9d756f0d97bdc89019bd2e4ea098cf9cde50ee7564dde6b81ccc8f06c7"
 dependencies = [
  "crossbeam-utils",
  "libc",
  "once_cell",
  "raw-cpuid",
- "wasi 0.11.0+wasi-snapshot-preview1",
+ "wasi 0.11.1+wasi-snapshot-preview1",
  "web-sys",
  "winapi",
 ]
@@ -6393,13 +7126,19 @@ dependencies = [
 
 [[package]]
 name = "quote"
-version = "1.0.38"
+version = "1.0.40"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
+checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d"
 dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
 [[package]]
 name = "radium"
 version = "0.7.0"
@@ -6436,6 +7175,7 @@ checksum = "9fbfd9d094a40bf3ae768db9361049ace4c0e04a4fd6b359518bd7b73a73dd97"
 dependencies = [
  "rand_chacha 0.9.0",
  "rand_core 0.9.3",
+ "serde",
 ]
 
 [[package]]
@@ -6464,7 +7204,7 @@ version = "0.6.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 dependencies = [
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
 ]
 
 [[package]]
@@ -6473,7 +7213,8 @@ version = "0.9.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38"
 dependencies = [
- "getrandom 0.3.1",
+ "getrandom 0.3.3",
+ "serde",
 ]
 
 [[package]]
@@ -6485,13 +7226,22 @@ dependencies = [
  "rand_core 0.6.4",
 ]
 
+[[package]]
+name = "rand_xorshift"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "513962919efc330f829edb2535844d1b912b0fbe2ca165d613e4e8788bb05a5a"
+dependencies = [
+ "rand_core 0.9.3",
+]
+
 [[package]]
 name = "raw-cpuid"
-version = "11.4.0"
+version = "11.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "529468c1335c1c03919960dfefdb1b3648858c20d7ec2d0663e728e4a717efbc"
+checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
 ]
 
 [[package]]
@@ -6516,11 +7266,11 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.11"
+version = "0.5.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d2f103c6d277498fbceb16e84d317e2a400f160f46904d5f5410848c829511a3"
+checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
 ]
 
 [[package]]
@@ -6529,11 +7279,31 @@ version = "0.4.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
 dependencies = [
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
  "libredox",
  "thiserror 1.0.69",
 ]
 
+[[package]]
+name = "ref-cast"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf"
+dependencies = [
+ "ref-cast-impl",
+]
+
+[[package]]
+name = "ref-cast-impl"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
 [[package]]
 name = "regex"
 version = "1.11.1"
@@ -6595,11 +7365,11 @@ dependencies = [
  "encoding_rs",
  "futures-core",
  "futures-util",
- "h2",
+ "h2 0.3.27",
  "http 0.2.12",
  "http-body 0.4.6",
- "hyper",
- "hyper-rustls",
+ "hyper 0.14.32",
+ "hyper-rustls 0.24.2",
  "ipnet",
  "js-sys",
  "log",
@@ -6607,7 +7377,7 @@ dependencies = [
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
- "rustls",
+ "rustls 0.21.12",
  "rustls-pemfile",
  "serde",
  "serde_json",
@@ -6615,7 +7385,7 @@ dependencies = [
  "sync_wrapper",
  "system-configuration",
  "tokio",
- "tokio-rustls",
+ "tokio-rustls 0.24.1",
  "tower-service",
  "url",
  "wasm-bindgen",
@@ -6627,46 +7397,164 @@ dependencies = [
 
 [[package]]
 name = "revm"
-version = "18.0.0"
+version = "24.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "15689a3c6a8d14b647b4666f2e236ef47b5a5133cdfd423f545947986fff7013"
+checksum = "01d277408ff8d6f747665ad9e52150ab4caf8d5eaf0d787614cf84633c8337b4"
+dependencies = [
+ "revm-bytecode",
+ "revm-context",
+ "revm-context-interface",
+ "revm-database",
+ "revm-database-interface",
+ "revm-handler",
+ "revm-inspector",
+ "revm-interpreter",
+ "revm-precompile",
+ "revm-primitives 19.2.0",
+ "revm-state",
+]
+
+[[package]]
+name = "revm-bytecode"
+version = "4.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "942fe4724cf552fd28db6b0a2ca5b79e884d40dd8288a4027ed1e9090e0c6f49"
+dependencies = [
+ "bitvec",
+ "once_cell",
+ "phf",
+ "revm-primitives 19.2.0",
+ "serde",
+]
+
+[[package]]
+name = "revm-context"
+version = "5.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b01aad49e1233f94cebda48a4e5cef022f7c7ed29b4edf0d202b081af23435ef"
 dependencies = [
- "auto_impl",
  "cfg-if",
- "dyn-clone",
+ "derive-where",
+ "revm-bytecode",
+ "revm-context-interface",
+ "revm-database-interface",
+ "revm-primitives 19.2.0",
+ "revm-state",
+ "serde",
+]
+
+[[package]]
+name = "revm-context-interface"
+version = "5.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b844f48a411e62c7dde0f757bf5cce49c85b86d6fc1d3b2722c07f2bec4c3ce"
+dependencies = [
+ "alloy-eip2930",
+ "alloy-eip7702",
+ "auto_impl",
+ "either",
+ "revm-database-interface",
+ "revm-primitives 19.2.0",
+ "revm-state",
+ "serde",
+]
+
+[[package]]
+name = "revm-database"
+version = "4.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad3fbe34f6bb00a9c3155723b3718b9cb9f17066ba38f9eb101b678cd3626775"
+dependencies = [
+ "alloy-eips",
+ "revm-bytecode",
+ "revm-database-interface",
+ "revm-primitives 19.2.0",
+ "revm-state",
+ "serde",
+]
+
+[[package]]
+name = "revm-database-interface"
+version = "4.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7b8acd36784a6d95d5b9e1b7be3ce014f1e759abb59df1fa08396b30f71adc2a"
+dependencies = [
+ "auto_impl",
+ "revm-primitives 19.2.0",
+ "revm-state",
+ "serde",
+]
+
+[[package]]
+name = "revm-handler"
+version = "5.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "481e8c3290ff4fa1c066592fdfeb2b172edfd14d12e6cade6f6f5588cad9359a"
+dependencies = [
+ "auto_impl",
+ "revm-bytecode",
+ "revm-context",
+ "revm-context-interface",
+ "revm-database-interface",
  "revm-interpreter",
  "revm-precompile",
+ "revm-primitives 19.2.0",
+ "revm-state",
+ "serde",
+]
+
+[[package]]
+name = "revm-inspector"
+version = "5.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdc1167ef8937d8867888e63581d8ece729a72073d322119ef4627d813d99ecb"
+dependencies = [
+ "auto_impl",
+ "revm-context",
+ "revm-database-interface",
+ "revm-handler",
+ "revm-interpreter",
+ "revm-primitives 19.2.0",
+ "revm-state",
  "serde",
  "serde_json",
 ]
 
 [[package]]
 name = "revm-interpreter"
-version = "14.0.0"
+version = "20.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "74e3f11d0fed049a4a10f79820c59113a79b38aed4ebec786a79d5c667bfeb51"
+checksum = "b5ee65e57375c6639b0f50555e92a4f1b2434349dd32f52e2176f5c711171697"
 dependencies = [
- "revm-primitives 14.0.0",
+ "revm-bytecode",
+ "revm-context-interface",
+ "revm-primitives 19.2.0",
  "serde",
 ]
 
 [[package]]
 name = "revm-precompile"
-version = "15.0.0"
+version = "21.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e381060af24b750069a2b2d2c54bba273d84e8f5f9e8026fc9262298e26cc336"
+checksum = "0f9311e735123d8d53a02af2aa81877bba185be7c141be7f931bb3d2f3af449c"
 dependencies = [
+ "ark-bls12-381",
+ "ark-bn254 0.5.0",
+ "ark-ec 0.5.0",
+ "ark-ff 0.5.0",
+ "ark-serialize 0.5.0",
  "aurora-engine-modexp",
  "blst",
  "c-kzg",
  "cfg-if",
  "k256 0.13.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "libsecp256k1",
  "once_cell",
- "revm-primitives 14.0.0",
+ "p256 0.13.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "revm-primitives 19.2.0",
  "ripemd",
  "secp256k1",
- "sha2",
- "substrate-bn",
+ "sha2 0.10.9",
 ]
 
 [[package]]
@@ -6678,7 +7566,7 @@ dependencies = [
  "alloy-primitives 0.4.2",
  "alloy-rlp",
  "auto_impl",
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
  "bitvec",
  "enumn",
  "hashbrown 0.14.5",
@@ -6687,21 +7575,24 @@ dependencies = [
 
 [[package]]
 name = "revm-primitives"
-version = "14.0.0"
+version = "19.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3702f132bb484f4f0d0ca4f6fbde3c82cfd745041abbedd6eda67730e1868ef0"
+checksum = "1c1588093530ec4442461163be49c433c07a3235d1ca6f6799fef338dacc50d3"
 dependencies = [
- "alloy-eip2930",
- "alloy-eip7702",
- "alloy-primitives 0.8.25",
- "auto_impl",
- "bitflags 2.8.0",
- "bitvec",
- "c-kzg",
- "cfg-if",
- "dyn-clone",
- "enumn",
- "hex",
+ "alloy-primitives 1.2.1",
+ "num_enum",
+ "serde",
+]
+
+[[package]]
+name = "revm-state"
+version = "4.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0040c61c30319254b34507383ba33d85f92949933adf6525a2cede05d165e1fa"
+dependencies = [
+ "bitflags 2.9.1",
+ "revm-bytecode",
+ "revm-primitives 19.2.0",
  "serde",
 ]
 
@@ -6728,13 +7619,13 @@ dependencies = [
 
 [[package]]
 name = "ring"
-version = "0.17.13"
+version = "0.17.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "70ac5d832aa16abd7d1def883a8545280c20a60f523a370aa3a9617c2b8550ee"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
 dependencies = [
  "cc",
  "cfg-if",
- "getrandom 0.2.15",
+ "getrandom 0.2.16",
  "libc",
  "untrusted",
  "windows-sys 0.52.0",
@@ -6793,30 +7684,6 @@ dependencies = [
  "paste",
 ]
 
-[[package]]
-name = "ruint"
-version = "1.12.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2c3cc4c2511671f327125da14133d0c5c5d137f006a1017a16f557bc85b16286"
-dependencies = [
- "alloy-rlp",
- "ark-ff 0.3.0",
- "ark-ff 0.4.2",
- "bytes",
- "fastrlp 0.3.1",
- "num-bigint 0.4.6",
- "num-traits",
- "parity-scale-codec",
- "primitive-types",
- "proptest",
- "rand 0.8.5",
- "rlp",
- "ruint-macro 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "serde",
- "valuable",
- "zeroize",
-]
-
 [[package]]
 name = "ruint"
 version = "1.14.0"
@@ -6834,7 +7701,7 @@ dependencies = [
  "bytemuck",
  "bytes",
  "criterion",
- "der 0.7.9",
+ "der 0.7.10",
  "diesel",
  "ethereum_ssz",
  "eyre",
@@ -6855,22 +7722,49 @@ dependencies = [
  "openvm-toolchain-tests",
  "openvm-transpiler",
  "parity-scale-codec",
- "postgres",
- "postgres-types",
+ "postgres",
+ "postgres-types",
+ "primitive-types",
+ "proptest",
+ "pyo3",
+ "quickcheck",
+ "rand 0.8.5",
+ "rand 0.9.1",
+ "rlp",
+ "ruint 1.14.0",
+ "ruint-macro 1.2.1",
+ "serde",
+ "serde_json",
+ "sqlx-core",
+ "subtle",
+ "thiserror 2.0.12",
+ "valuable",
+ "zeroize",
+]
+
+[[package]]
+name = "ruint"
+version = "1.15.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11256b5fe8c68f56ac6f39ef0720e592f33d2367a4782740d9c9142e889c7fb4"
+dependencies = [
+ "alloy-rlp",
+ "ark-ff 0.3.0",
+ "ark-ff 0.4.2",
+ "bytes",
+ "fastrlp 0.3.1",
+ "fastrlp 0.4.0",
+ "num-bigint 0.4.6",
+ "num-integer",
+ "num-traits",
+ "parity-scale-codec",
  "primitive-types",
  "proptest",
- "pyo3",
- "quickcheck",
  "rand 0.8.5",
  "rand 0.9.1",
  "rlp",
- "ruint 1.14.0",
- "ruint-macro 1.2.1",
+ "ruint-macro 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "serde",
- "serde_json",
- "sqlx-core",
- "subtle",
- "thiserror 2.0.11",
  "valuable",
  "zeroize",
 ]
@@ -6890,9 +7784,9 @@ checksum = "48fd7bd8a6377e15ad9d42a8ec25371b94ddc67abe7c8b9127bec79bebaaae18"
 
 [[package]]
 name = "rustc-demangle"
-version = "0.1.24"
+version = "0.1.25"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f"
 
 [[package]]
 name = "rustc-hash"
@@ -6927,7 +7821,7 @@ version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92"
 dependencies = [
- "semver 1.0.25",
+ "semver 1.0.26",
 ]
 
 [[package]]
@@ -6936,10 +7830,23 @@ version = "0.38.44"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.4.15",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "rustix"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266"
+dependencies = [
+ "bitflags 2.9.1",
  "errno",
  "libc",
- "linux-raw-sys",
+ "linux-raw-sys 0.9.4",
  "windows-sys 0.59.0",
 ]
 
@@ -6951,10 +7858,24 @@ checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e"
 dependencies = [
  "log",
  "ring",
- "rustls-webpki",
+ "rustls-webpki 0.101.7",
  "sct",
 ]
 
+[[package]]
+name = "rustls"
+version = "0.23.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2491382039b29b9b11ff08b76ff6c97cf287671dbb74f0be44bda389fffe9bd1"
+dependencies = [
+ "aws-lc-rs",
+ "once_cell",
+ "rustls-pki-types",
+ "rustls-webpki 0.103.4",
+ "subtle",
+ "zeroize",
+]
+
 [[package]]
 name = "rustls-native-certs"
 version = "0.6.3"
@@ -6964,7 +7885,19 @@ dependencies = [
  "openssl-probe",
  "rustls-pemfile",
  "schannel",
- "security-framework",
+ "security-framework 2.11.1",
+]
+
+[[package]]
+name = "rustls-native-certs"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7fcff2dd52b58a8d98a70243663a0d234c4e2b79235637849d15913394a247d3"
+dependencies = [
+ "openssl-probe",
+ "rustls-pki-types",
+ "schannel",
+ "security-framework 3.2.0",
 ]
 
 [[package]]
@@ -6976,6 +7909,15 @@ dependencies = [
  "base64 0.21.7",
 ]
 
+[[package]]
+name = "rustls-pki-types"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79"
+dependencies = [
+ "zeroize",
+]
+
 [[package]]
 name = "rustls-webpki"
 version = "0.101.7"
@@ -6986,11 +7928,23 @@ dependencies = [
  "untrusted",
 ]
 
+[[package]]
+name = "rustls-webpki"
+version = "0.103.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a17884ae0c1b773f1ccd2bd4a8c72f16da897310a98b0e84bf349ad5ead92fc"
+dependencies = [
+ "aws-lc-rs",
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
+]
+
 [[package]]
 name = "rustversion"
-version = "1.0.19"
+version = "1.0.21"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4"
+checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d"
 
 [[package]]
 name = "rusty-fork"
@@ -7006,9 +7960,9 @@ dependencies = [
 
 [[package]]
 name = "ryu"
-version = "1.0.19"
+version = "1.0.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd"
+checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
 
 [[package]]
 name = "same-file"
@@ -7040,7 +7994,7 @@ dependencies = [
  "proc-macro-crate",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -7052,6 +8006,30 @@ dependencies = [
  "windows-sys 0.59.0",
 ]
 
+[[package]]
+name = "schemars"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f"
+dependencies = [
+ "dyn-clone",
+ "ref-cast",
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "schemars"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0"
+dependencies = [
+ "dyn-clone",
+ "ref-cast",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "scopeguard"
 version = "1.2.0"
@@ -7089,7 +8067,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d3e97a565f76233a6003f9f5c54be1d9c5bdfa3eccfb189469f11ec4901c47dc"
 dependencies = [
  "base16ct 0.2.0",
- "der 0.7.9",
+ "der 0.7.10",
  "generic-array",
  "pkcs8 0.10.2",
  "serdect",
@@ -7099,10 +8077,11 @@ dependencies = [
 
 [[package]]
 name = "secp256k1"
-version = "0.29.1"
+version = "0.30.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9465315bc9d4566e1724f0fffcbcc446268cb522e60f9a27bcded6b19c108113"
+checksum = "b50c5943d326858130af85e049f2661ba3c78b26589b8ab98e65e80ae44a1252"
 dependencies = [
+ "bitcoin_hashes",
  "rand 0.8.5",
  "secp256k1-sys",
 ]
@@ -7122,8 +8101,21 @@ version = "2.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
 dependencies = [
- "bitflags 2.8.0",
- "core-foundation",
+ "bitflags 2.9.1",
+ "core-foundation 0.9.4",
+ "core-foundation-sys",
+ "libc",
+ "security-framework-sys",
+]
+
+[[package]]
+name = "security-framework"
+version = "3.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316"
+dependencies = [
+ "bitflags 2.9.1",
+ "core-foundation 0.10.1",
  "core-foundation-sys",
  "libc",
  "security-framework-sys",
@@ -7150,9 +8142,9 @@ dependencies = [
 
 [[package]]
 name = "semver"
-version = "1.0.25"
+version = "1.0.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f79dfe2d285b0488816f30e700a7438c5a73d816b5b7d3ac72fbc48b0d185e03"
+checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0"
 dependencies = [
  "serde",
 ]
@@ -7168,9 +8160,9 @@ dependencies = [
 
 [[package]]
 name = "serde"
-version = "1.0.218"
+version = "1.0.219"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60"
+checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6"
 dependencies = [
  "serde_derive",
 ]
@@ -7195,22 +8187,22 @@ dependencies = [
 
 [[package]]
 name = "serde_derive"
-version = "1.0.218"
+version = "1.0.219"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b"
+checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "serde_json"
-version = "1.0.139"
+version = "1.0.140"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6"
+checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373"
 dependencies = [
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
  "itoa",
  "memchr",
  "ryu",
@@ -7229,9 +8221,9 @@ dependencies = [
 
 [[package]]
 name = "serde_spanned"
-version = "0.6.8"
+version = "0.6.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1"
+checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3"
 dependencies = [
  "serde",
 ]
@@ -7250,15 +8242,17 @@ dependencies = [
 
 [[package]]
 name = "serde_with"
-version = "3.12.0"
+version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d6b6f7f2fcb69f747921f79f3926bd1e203fce4fef62c268dd3abfb6d86029aa"
+checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5"
 dependencies = [
  "base64 0.22.1",
  "chrono",
  "hex",
  "indexmap 1.9.3",
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
+ "schemars 0.9.0",
+ "schemars 1.0.4",
  "serde",
  "serde_derive",
  "serde_json",
@@ -7268,14 +8262,14 @@ dependencies = [
 
 [[package]]
 name = "serde_with_macros"
-version = "3.12.0"
+version = "3.14.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8d00caa5193a3c8362ac2b73be6b9e768aa5a4b2f721d8f4b339600c3cb51f8e"
+checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f"
 dependencies = [
  "darling",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -7301,9 +8295,22 @@ dependencies = [
 
 [[package]]
 name = "sha2"
-version = "0.10.8"
+version = "0.9.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4d58a1e1bf39749807d89cf2d98ac2dfa0ff1cb3faa38fbb64dd88ac8013d800"
+dependencies = [
+ "block-buffer 0.9.0",
+ "cfg-if",
+ "cpufeatures",
+ "digest 0.9.0",
+ "opaque-debug",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
 dependencies = [
  "cfg-if",
  "cpufeatures",
@@ -7347,9 +8354,9 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
 [[package]]
 name = "signal-hook-registry"
-version = "1.4.2"
+version = "1.4.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1"
+checksum = "9203b8055f63a2a00e2f593bb0510367fe707d7ff1e5c872de2f537b339e5410"
 dependencies = [
  "libc",
 ]
@@ -7388,24 +8395,21 @@ checksum = "85636c14b73d81f541e525f585c0a2109e6744e1565b5c1668e31c70c10ed65c"
 
 [[package]]
 name = "slab"
-version = "0.4.9"
+version = "0.4.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67"
-dependencies = [
- "autocfg",
-]
+checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589"
 
 [[package]]
 name = "smallvec"
-version = "1.14.0"
+version = "1.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7fcf8323ef1faaee30a44a340193b1ac6814fd9b7b4e88e9d4519a3e4abe1cfd"
+checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
 
 [[package]]
 name = "snark-verifier"
-version = "0.2.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "28e4c4ed1edca41687fe2d8a09ba30badb0a5cc7fa56dd1159d62aeab7c99ace"
+checksum = "c9203c416ff9de0762667270b21573ba5e6edaeda08743b3ca37dc8a5e0a4480"
 dependencies = [
  "halo2-base",
  "halo2-ecc",
@@ -7418,16 +8422,16 @@ dependencies = [
  "pairing 0.23.0",
  "rand 0.8.5",
  "revm",
- "ruint 1.12.3",
+ "ruint 1.15.0",
  "serde",
  "sha3",
 ]
 
 [[package]]
 name = "snark-verifier-sdk"
-version = "0.2.0"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "babff70ce6292fce03f692d68569f76b8f6710dbac7be7fe5f32c915909c9065"
+checksum = "290ae6e750d9d5fdf05393bbcae6bf7a63e3408eab023abf7d466156a234ac85"
 dependencies = [
  "bincode",
  "ethereum-types",
@@ -7448,9 +8452,9 @@ dependencies = [
 
 [[package]]
 name = "socket2"
-version = "0.5.8"
+version = "0.5.10"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c970269d99b64e60ec3bd6ad27270092a5394c4e309314b18ae3fe575695fbe8"
+checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678"
 dependencies = [
  "libc",
  "windows-sys 0.52.0",
@@ -7493,7 +8497,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d91ed6c858b01f942cd56b37a94b3e0a1798290327d1236e4d9cf4eaca44d29d"
 dependencies = [
  "base64ct",
- "der 0.7.9",
+ "der 0.7.10",
 ]
 
 [[package]]
@@ -7511,15 +8515,15 @@ dependencies = [
  "futures-intrusive",
  "futures-io",
  "futures-util",
- "hashbrown 0.15.2",
+ "hashbrown 0.15.4",
  "hashlink",
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
  "log",
  "memchr",
  "once_cell",
  "percent-encoding",
  "smallvec",
- "thiserror 2.0.11",
+ "thiserror 2.0.12",
  "tracing",
  "url",
 ]
@@ -7536,6 +8540,16 @@ version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
 
+[[package]]
+name = "statrs"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a3fe7c28c6512e766b0874335db33c94ad7b8f9054228ae1c2abd47ce7d335e"
+dependencies = [
+ "approx",
+ "num-traits",
+]
+
 [[package]]
 name = "strength_reduce"
 version = "0.2.4"
@@ -7590,20 +8604,7 @@ dependencies = [
  "proc-macro2",
  "quote",
  "rustversion",
- "syn 2.0.98",
-]
-
-[[package]]
-name = "substrate-bn"
-version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72b5bbfa79abbae15dd642ea8176a21a635ff3c00059961d1ea27ad04e5b441c"
-dependencies = [
- "byteorder",
- "crunchy",
- "lazy_static",
- "rand 0.8.5",
- "rustc-hex",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -7636,10 +8637,10 @@ dependencies = [
  "hex",
  "once_cell",
  "reqwest",
- "semver 1.0.25",
+ "semver 1.0.26",
  "serde",
  "serde_json",
- "sha2",
+ "sha2 0.10.9",
  "thiserror 1.0.69",
  "url",
  "zip",
@@ -7653,7 +8654,7 @@ checksum = "aa64b5e8eecd3a8af7cfc311e29db31a268a62d5953233d3e8243ec77a71c4e3"
 dependencies = [
  "build_const",
  "hex",
- "semver 1.0.25",
+ "semver 1.0.26",
  "serde_json",
  "svm-rs",
 ]
@@ -7671,9 +8672,9 @@ dependencies = [
 
 [[package]]
 name = "syn"
-version = "2.0.98"
+version = "2.0.104"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1"
+checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -7689,7 +8690,7 @@ dependencies = [
  "paste",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -7700,13 +8701,13 @@ checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160"
 
 [[package]]
 name = "synstructure"
-version = "0.13.1"
+version = "0.13.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -7716,7 +8717,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ba3a3adc5c275d719af8cb4272ea1c4a6d668a777f37e115f6d11ddbc1c8e0e7"
 dependencies = [
  "bitflags 1.3.2",
- "core-foundation",
+ "core-foundation 0.9.4",
  "system-configuration-sys",
 ]
 
@@ -7750,15 +8751,14 @@ checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a"
 
 [[package]]
 name = "tempfile"
-version = "3.17.1"
+version = "3.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "22e5a0acb1f3f55f65cc4a866c361b2fb2a0ff6366785ae6fbb5f85df07ba230"
+checksum = "e8a64e3985349f2441a1a9ef0b853f869006c3855f2cda6862a94d26ebb9d6a1"
 dependencies = [
- "cfg-if",
  "fastrand",
- "getrandom 0.3.1",
+ "getrandom 0.3.3",
  "once_cell",
- "rustix",
+ "rustix 1.0.7",
  "windows-sys 0.59.0",
 ]
 
@@ -7773,6 +8773,16 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "terminal_size"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed"
+dependencies = [
+ "rustix 1.0.7",
+ "windows-sys 0.59.0",
+]
+
 [[package]]
 name = "test-case"
 version = "3.3.1"
@@ -7791,7 +8801,7 @@ dependencies = [
  "cfg-if",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -7802,30 +8812,30 @@ checksum = "5c89e72a01ed4c579669add59014b9a524d609c0c88c6a585ce37485879f6ffb"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "test-case-core",
 ]
 
 [[package]]
 name = "test-log"
-version = "0.2.17"
+version = "0.2.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e7f46083d221181166e5b6f6b1e5f1d499f3a76888826e6cb1d057554157cd0f"
+checksum = "1e33b98a582ea0be1168eba097538ee8dd4bbe0f2b01b22ac92ea30054e5be7b"
 dependencies = [
  "env_logger",
  "test-log-macros",
- "tracing-subscriber",
+ "tracing-subscriber 0.3.19",
 ]
 
 [[package]]
 name = "test-log-macros"
-version = "0.2.17"
+version = "0.2.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "888d0c3c6db53c0fdab160d2ed5e12ba745383d3e85813f2ea0f2b1475ab553f"
+checksum = "451b374529930d7601b1eef8d32bc79ae870b6079b069401709c2a8bf9e75f36"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -7839,11 +8849,11 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "2.0.11"
+version = "2.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d452f284b73e6d76dd36758a0c8684b1d5be31f92b89d07fd5822175732206fc"
+checksum = "567b8a2dae586314f7be2a752ec7474332959c6460e02bde30d702a66d488708"
 dependencies = [
- "thiserror-impl 2.0.11",
+ "thiserror-impl 2.0.12",
 ]
 
 [[package]]
@@ -7854,28 +8864,27 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "thiserror-impl"
-version = "2.0.11"
+version = "2.0.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26afc1baea8a989337eeb52b6e72a039780ce45c3edfcc9c5b9d112feeb173c2"
+checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "thread_local"
-version = "1.1.8"
+version = "1.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c"
+checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185"
 dependencies = [
  "cfg-if",
- "once_cell",
 ]
 
 [[package]]
@@ -7909,9 +8918,9 @@ dependencies = [
 
 [[package]]
 name = "time"
-version = "0.3.37"
+version = "0.3.41"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "35e7868883861bd0e56d9ac6efcaaca0d6d5d82a2a7ec8209ff492c07cf37b21"
+checksum = "8a7619e19bc266e0f9c5e6686659d394bc57973859340060a69221e57dbc0c40"
 dependencies = [
  "deranged",
  "itoa",
@@ -7926,15 +8935,15 @@ dependencies = [
 
 [[package]]
 name = "time-core"
-version = "0.1.2"
+version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3"
+checksum = "c9e9a38711f559d9e3ce1cdb06dd7c5b8ea546bc90052da6d06bb76da74bb07c"
 
 [[package]]
 name = "time-macros"
-version = "0.2.19"
+version = "0.2.22"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2834e6017e3e5e4b9834939793b282bc03b37a3336245fa820e35e233e2a85de"
+checksum = "3526739392ec93fd8b359c8e98514cb3e8e021beb4e5f597b00a0221f8ed8a49"
 dependencies = [
  "num-conv",
  "time-core",
@@ -7951,9 +8960,9 @@ dependencies = [
 
 [[package]]
 name = "tinystr"
-version = "0.7.6"
+version = "0.8.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f"
+checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b"
 dependencies = [
  "displaydoc",
  "zerovec",
@@ -7986,16 +8995,18 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
 
 [[package]]
 name = "tokio"
-version = "1.44.2"
+version = "1.46.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e6b88822cbe49de4185e3a4cbf8321dd487cf5fe0c5c65695fef6346371e9c48"
+checksum = "0cc3a2344dafbe23a245241fe8b09735b521110d30fcefbbd5feb1797ca35d17"
 dependencies = [
  "backtrace",
  "bytes",
+ "io-uring",
  "libc",
  "mio",
  "pin-project-lite",
  "signal-hook-registry",
+ "slab",
  "socket2",
  "tokio-macros",
  "windows-sys 0.52.0",
@@ -8009,7 +9020,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -8044,15 +9055,25 @@ version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
 dependencies = [
- "rustls",
+ "rustls 0.21.12",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-rustls"
+version = "0.26.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b"
+dependencies = [
+ "rustls 0.23.29",
  "tokio",
 ]
 
 [[package]]
 name = "tokio-util"
-version = "0.7.13"
+version = "0.7.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7fcaa8d55a2bdd6b83ace262b016eca0d79ee02818c5c1bcdf0305114081078"
+checksum = "66a539a9ad6d5d281510d5bd368c973d636c02dbf8a67300bfb6b950696ad7df"
 dependencies = [
  "bytes",
  "futures-core",
@@ -8067,7 +9088,7 @@ version = "0.7.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "dd79e69d3b627db300ff956027cc6c3798cef26d22526befdfcd12feeb6d2257"
 dependencies = [
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
  "serde",
  "serde_spanned",
  "toml_datetime",
@@ -8076,21 +9097,21 @@ dependencies = [
 
 [[package]]
 name = "toml"
-version = "0.8.20"
+version = "0.8.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148"
+checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362"
 dependencies = [
  "serde",
  "serde_spanned",
  "toml_datetime",
- "toml_edit 0.22.24",
+ "toml_edit 0.22.27",
 ]
 
 [[package]]
 name = "toml_datetime"
-version = "0.6.8"
+version = "0.6.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
+checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c"
 dependencies = [
  "serde",
 ]
@@ -8101,7 +9122,7 @@ version = "0.19.15"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
 dependencies = [
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
  "serde",
  "serde_spanned",
  "toml_datetime",
@@ -8110,17 +9131,40 @@ dependencies = [
 
 [[package]]
 name = "toml_edit"
-version = "0.22.24"
+version = "0.22.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474"
+checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a"
 dependencies = [
- "indexmap 2.7.1",
+ "indexmap 2.10.0",
  "serde",
  "serde_spanned",
  "toml_datetime",
- "winnow 0.7.3",
+ "toml_write",
+ "winnow 0.7.12",
+]
+
+[[package]]
+name = "toml_write"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
+
+[[package]]
+name = "tower"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9"
+dependencies = [
+ "tower-layer",
+ "tower-service",
 ]
 
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
 [[package]]
 name = "tower-service"
 version = "0.3.3"
@@ -8141,20 +9185,20 @@ dependencies = [
 
 [[package]]
 name = "tracing-attributes"
-version = "0.1.28"
+version = "0.1.30"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d"
+checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "tracing-core"
-version = "0.1.33"
+version = "0.1.34"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c"
+checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678"
 dependencies = [
  "once_cell",
  "valuable",
@@ -8170,7 +9214,7 @@ dependencies = [
  "smallvec",
  "thiserror 1.0.69",
  "tracing",
- "tracing-subscriber",
+ "tracing-subscriber 0.3.19",
 ]
 
 [[package]]
@@ -8184,6 +9228,15 @@ dependencies = [
  "tracing-core",
 ]
 
+[[package]]
+name = "tracing-subscriber"
+version = "0.2.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0e0d2eaa99c3c2e41547cfa109e910a68ea03823cccad4a0525dcbc9b01e8c71"
+dependencies = [
+ "tracing-core",
+]
+
 [[package]]
 name = "tracing-subscriber"
 version = "0.3.19"
@@ -8265,9 +9318,9 @@ checksum = "5c1cb5db39152898a79168971543b1cb5020dff7fe43c8dc468b0885f5e29df5"
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.17"
+version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe"
+checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512"
 
 [[package]]
 name = "unicode-normalization"
@@ -8329,12 +9382,6 @@ version = "2.1.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da"
 
-[[package]]
-name = "utf16_iter"
-version = "1.0.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246"
-
 [[package]]
 name = "utf8_iter"
 version = "1.0.4"
@@ -8349,9 +9396,14 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
 
 [[package]]
 name = "uuid"
-version = "1.13.2"
+version = "1.17.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c1f41ffb7cf259f1ecc2876861a17e7142e63ead296f671f81f6ae85903e0d6"
+checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d"
+dependencies = [
+ "getrandom 0.3.3",
+ "js-sys",
+ "wasm-bindgen",
+]
 
 [[package]]
 name = "valuable"
@@ -8420,15 +9472,15 @@ dependencies = [
 
 [[package]]
 name = "wasi"
-version = "0.11.0+wasi-snapshot-preview1"
+version = "0.11.1+wasi-snapshot-preview1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
 
 [[package]]
 name = "wasi"
-version = "0.13.3+wasi-0.2.2"
+version = "0.14.2+wasi-0.2.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "26816d2e1a4a36a2940b96c5296ce403917633dff8f3440e9b236ed6f6bacad2"
+checksum = "9683f9a5a998d873c0d21fcbe3c083009670149a8fab228644b8bd36b2c48cb3"
 dependencies = [
  "wit-bindgen-rt",
 ]
@@ -8461,7 +9513,7 @@ dependencies = [
  "log",
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "wasm-bindgen-shared",
 ]
 
@@ -8496,7 +9548,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -8526,6 +9578,18 @@ version = "0.25.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1"
 
+[[package]]
+name = "which"
+version = "4.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "87ba24419a2078cd2b0f2ede2691b6c66d8e47836da3b6db8265ebad47afbfc7"
+dependencies = [
+ "either",
+ "home",
+ "once_cell",
+ "rustix 0.38.44",
+]
+
 [[package]]
 name = "whoami"
 version = "1.6.0"
@@ -8570,11 +9634,61 @@ checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
 
 [[package]]
 name = "windows-core"
-version = "0.52.0"
+version = "0.61.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9"
+checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3"
 dependencies = [
- "windows-targets 0.52.6",
+ "windows-implement",
+ "windows-interface",
+ "windows-link",
+ "windows-result",
+ "windows-strings",
+]
+
+[[package]]
+name = "windows-implement"
+version = "0.60.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "windows-interface"
+version = "0.59.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a"
+
+[[package]]
+name = "windows-result"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-strings"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57"
+dependencies = [
+ "windows-link",
 ]
 
 [[package]]
@@ -8736,9 +9850,9 @@ dependencies = [
 
 [[package]]
 name = "winnow"
-version = "0.7.3"
+version = "0.7.12"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1"
+checksum = "f3edebf492c8125044983378ecb5766203ad3b4c2f7a922bd7dd207f6d443e95"
 dependencies = [
  "memchr",
 ]
@@ -8755,24 +9869,18 @@ dependencies = [
 
 [[package]]
 name = "wit-bindgen-rt"
-version = "0.33.0"
+version = "0.39.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3268f3d866458b787f390cf61f4bbb563b922d091359f9608842999eaee3943c"
+checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
 dependencies = [
- "bitflags 2.8.0",
+ "bitflags 2.9.1",
 ]
 
-[[package]]
-name = "write16"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936"
-
 [[package]]
 name = "writeable"
-version = "0.5.5"
+version = "0.6.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51"
+checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb"
 
 [[package]]
 name = "wyz"
@@ -8803,9 +9911,9 @@ checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049"
 
 [[package]]
 name = "yoke"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40"
+checksum = "5f41bb01b8226ef4bfd589436a297c53d118f65921786300e427be8d487695cc"
 dependencies = [
  "serde",
  "stable_deref_trait",
@@ -8815,55 +9923,54 @@ dependencies = [
 
 [[package]]
 name = "yoke-derive"
-version = "0.7.5"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154"
+checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "synstructure",
 ]
 
 [[package]]
 name = "zerocopy"
-version = "0.7.35"
+version = "0.8.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0"
+checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f"
 dependencies = [
- "byteorder",
  "zerocopy-derive",
 ]
 
 [[package]]
 name = "zerocopy-derive"
-version = "0.7.35"
+version = "0.8.26"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
+checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
 name = "zerofrom"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e"
+checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5"
 dependencies = [
  "zerofrom-derive",
 ]
 
 [[package]]
 name = "zerofrom-derive"
-version = "0.1.5"
+version = "0.1.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808"
+checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
  "synstructure",
 ]
 
@@ -8884,14 +9991,25 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
+]
+
+[[package]]
+name = "zerotrie"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
 ]
 
 [[package]]
 name = "zerovec"
-version = "0.10.4"
+version = "0.11.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
+checksum = "4a05eb080e015ba39cc9e23bbe5e7fb04d5fb040350f99f34e338d5fdd294428"
 dependencies = [
  "yoke",
  "zerofrom",
@@ -8900,13 +10018,13 @@ dependencies = [
 
 [[package]]
 name = "zerovec-derive"
-version = "0.10.3"
+version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6"
+checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn 2.0.98",
+ "syn 2.0.104",
 ]
 
 [[package]]
@@ -8950,7 +10068,7 @@ dependencies = [
  "pasta_curves 0.5.1",
  "rand 0.8.5",
  "serde",
- "sha2",
+ "sha2 0.10.9",
  "sha3",
  "subtle",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 2734767aff..733294c63f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,7 +1,7 @@
 [workspace.package]
-version = "1.3.0"
+version = "1.4.0-rc.4"
 edition = "2021"
-rust-version = "1.82"
+rust-version = "1.86.0"
 authors = ["OpenVM Authors"]
 homepage = "https://openvm.dev"
 repository = "https://github.com/openvm-org/"
@@ -70,7 +70,7 @@ members = [
     "guest-libs/sha2/",
     "guest-libs/verify_stark/",
 ]
-exclude = ["crates/sdk/example"]
+exclude = ["crates/sdk/example", "benchmarks/guest/**"]
 resolver = "2"
 
 # Fastest runtime configuration
@@ -85,6 +85,7 @@ codegen-units = 16
 [profile.profiling]
 inherits = "release"
 debug = 2
+debug-assertions = false
 strip = false
 
 # Make sure debug symbols are in the bench profile for flamegraphs
@@ -99,6 +100,7 @@ codegen-units = 1
 
 [profile.dev]
 opt-level = 1
+debug = 2
 
 # For O1 optimization but still fast(ish) compile times
 [profile.fast]
@@ -110,8 +112,8 @@ lto = "thin"
 
 [workspace.dependencies]
 # Stark Backend
-openvm-stark-backend = { git = "https://github.com/openvm-org/stark-backend.git", tag = "v1.1.1", default-features = false }
-openvm-stark-sdk = { git = "https://github.com/openvm-org/stark-backend.git", tag = "v1.1.1", default-features = false }
+openvm-stark-backend = { git = "https://github.com/openvm-org/stark-backend.git", tag = "v1.2.0-rc.2", default-features = false }
+openvm-stark-sdk = { git = "https://github.com/openvm-org/stark-backend.git", tag = "v1.2.0-rc.2", default-features = false }
 
 # OpenVM
 openvm-sdk = { path = "crates/sdk", default-features = false }
@@ -171,18 +173,16 @@ openvm-verify-stark = { path = "guest-libs/verify_stark", default-features = fal
 openvm-benchmarks-utils = { path = "benchmarks/utils", default-features = false }
 
 # Plonky3
-p3-field = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3.git", features = [
-    "nightly-features",
-], rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-dft = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-fri = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-keccak-air = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-merkle-tree = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-monty-31 = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-poseidon2 = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-poseidon2-air = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
-p3-symmetric = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb" }
+p3-field = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-baby-bear = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-dft = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-fri = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-keccak-air = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-merkle-tree = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-monty-31 = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-poseidon2 = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-poseidon2-air = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
+p3-symmetric = { git = "https://github.com/Plonky3/Plonky3.git", rev = "539bbc84085efb609f4f62cb03cf49588388abdb", default-features = false }
 
 zkhash = { git = "https://github.com/HorizenLabs/poseidon2.git", rev = "bb476b9" }
 snark-verifier-sdk = { version = "0.2.0", default-features = false, features = [
@@ -220,12 +220,15 @@ tempfile = "3.13.0"
 thiserror = "1.0.65"
 rustc-hash = "2.0.0"
 static_assertions = "1.1.0"
-async-trait = "0.1.83"
 getset = "0.1.3"
 rrs-lib = "0.1.0"
 rand = { version = "0.8.5", default-features = false }
 hex = { version = "0.4.3", default-features = false }
 serde-big-array = "0.5.1"
+dashmap = "6.1.0"
+memmap2 = "0.9.5"
+libc = "0.2.175"
+tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] }
 
 # default-features = false for no_std for use in guest programs
 itertools = { version = "0.14.0", default-features = false }
@@ -258,3 +261,6 @@ sha2 = { version = "0.10", default-features = false }
 # p3-poseidon2 = { path = "../Plonky3/poseidon2" }
 # p3-poseidon2-air = { path = "../Plonky3/poseidon2-air" }
 # p3-symmetric = { path = "../Plonky3/symmetric" }
+
+[workspace.metadata.cargo-shear]
+ignored = ["cargo-openvm"]
diff --git a/benchmarks/execute/Cargo.toml b/benchmarks/execute/Cargo.toml
index 319490220a..76ca243c2d 100644
--- a/benchmarks/execute/Cargo.toml
+++ b/benchmarks/execute/Cargo.toml
@@ -9,41 +9,66 @@ license.workspace = true
 
 [dependencies]
 openvm-benchmarks-utils.workspace = true
-cargo-openvm.workspace = true
 openvm-circuit.workspace = true
-openvm-sdk.workspace = true
 openvm-stark-sdk.workspace = true
 openvm-transpiler.workspace = true
-openvm-rv32im-circuit.workspace = true
-openvm-rv32im-transpiler.workspace = true
+openvm-algebra-circuit.workspace = true
+openvm-algebra-transpiler.workspace = true
+openvm-bigint-circuit.workspace = true
+openvm-bigint-transpiler.workspace = true
+openvm-ecc-circuit.workspace = true
+openvm-ecc-transpiler.workspace = true
+openvm-native-circuit = { workspace = true }
+openvm-pairing-circuit.workspace = true
+openvm-pairing-guest.workspace = true
+openvm-pairing-transpiler.workspace = true
 openvm-keccak256-circuit.workspace = true
 openvm-keccak256-transpiler.workspace = true
+openvm-rv32im-circuit.workspace = true
+openvm-rv32im-transpiler.workspace = true
+openvm-sha256-circuit.workspace = true
+openvm-sha256-transpiler.workspace = true
+openvm-continuations = { workspace = true }
+openvm-sdk = { workspace = true }
 
-clap = { version = "4.5.9", features = ["derive", "env"] }
+clap.workspace = true
 eyre.workspace = true
-tracing.workspace = true
 derive_more = { workspace = true, features = ["from"] }
-
-tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] }
+rand.workspace = true
+serde = { workspace = true, features = ["derive"] }
+bitcode.workspace = true
+tracing.workspace = true
+tracing-subscriber.workspace = true
 
 [dev-dependencies]
-criterion = { version = "0.5", features = ["html_reports"] }
+divan = { package = "codspeed-divan-compat", version = "3.0.2" }
 
 [features]
 default = ["jemalloc"]
-profiling = ["openvm-sdk/profiling"]
 mimalloc = ["openvm-circuit/mimalloc"]
 jemalloc = ["openvm-circuit/jemalloc"]
 jemalloc-prof = ["openvm-circuit/jemalloc-prof"]
 nightly-features = ["openvm-circuit/nightly-features"]
+perf-metrics = [
+    "openvm-circuit/perf-metrics",
+    "openvm-transpiler/function-span",
+]
 
-[[bench]]
-name = "fibonacci_execute"
-harness = false
+# [[bench]]
+# name = "fibonacci_execute"
+# harness = false
+
+# [[bench]]
+# name = "regex_execute"
+# harness = false
 
 [[bench]]
-name = "regex_execute"
+name = "execute"
 harness = false
 
+[[bin]]
+name = "execute-leaf-verifier"
+path = "src/execute-leaf-verifier.rs"
+
 [package.metadata.cargo-shear]
-ignored = ["derive_more"]
+ignored = ["derive_more", "rand"]
diff --git a/benchmarks/execute/benches/execute.rs b/benchmarks/execute/benches/execute.rs
new file mode 100644
index 0000000000..50a52be450
--- /dev/null
+++ b/benchmarks/execute/benches/execute.rs
@@ -0,0 +1,399 @@
+use std::{fs, io, path::Path, sync::OnceLock};
+
+use divan::Bencher;
+use eyre::Result;
+use openvm_algebra_circuit::{
+    AlgebraCpuProverExt, Fp2Extension, Fp2ExtensionExecutor, ModularExtension,
+    ModularExtensionExecutor,
+};
+use openvm_algebra_transpiler::{Fp2TranspilerExtension, ModularTranspilerExtension};
+use openvm_benchmarks_utils::{get_elf_path, get_fixtures_dir, get_programs_dir, read_elf_file};
+use openvm_bigint_circuit::{Int256, Int256CpuProverExt, Int256Executor};
+use openvm_bigint_transpiler::Int256TranspilerExtension;
+use openvm_circuit::{
+    arch::{
+        execution_mode::{MeteredCostCtx, MeteredCtx},
+        instructions::exe::VmExe,
+        interpreter::InterpretedInstance,
+        ContinuationVmProof, *,
+    },
+    derive::VmConfig,
+    system::*,
+};
+use openvm_continuations::{
+    verifier::{common::types::VmVerifierPvs, leaf::types::LeafVmVerifierInput},
+    SC,
+};
+use openvm_ecc_circuit::{EccCpuProverExt, WeierstrassExtension, WeierstrassExtensionExecutor};
+use openvm_ecc_transpiler::EccTranspilerExtension;
+use openvm_keccak256_circuit::{Keccak256, Keccak256CpuProverExt, Keccak256Executor};
+use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder, NATIVE_MAX_TRACE_HEIGHTS};
+use openvm_pairing_circuit::{
+    PairingCurve, PairingExtension, PairingExtensionExecutor, PairingProverExt,
+};
+use openvm_pairing_guest::bn254::BN254_COMPLEX_STRUCT_NAME;
+use openvm_pairing_transpiler::PairingTranspilerExtension;
+use openvm_rv32im_circuit::{
+    Rv32I, Rv32IExecutor, Rv32ImCpuProverExt, Rv32Io, Rv32IoExecutor, Rv32M, Rv32MExecutor,
+};
+use openvm_rv32im_transpiler::{
+    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+};
+use openvm_sdk::config::{DEFAULT_LEAF_LOG_BLOWUP, SBOX_SIZE};
+use openvm_sha256_circuit::{Sha256, Sha256Executor, Sha2CpuProverExt};
+use openvm_sha256_transpiler::Sha256TranspilerExtension;
+use openvm_stark_sdk::{
+    config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
+    engine::{StarkEngine, StarkFriEngine},
+    openvm_stark_backend::{
+        self,
+        config::{StarkGenericConfig, Val},
+        p3_field::PrimeField32,
+        prover::{
+            cpu::{CpuBackend, CpuDevice},
+            hal::DeviceDataTransporter,
+        },
+    },
+    p3_baby_bear::BabyBear,
+};
+use openvm_transpiler::{transpiler::Transpiler, FromElf};
+use serde::{Deserialize, Serialize};
+
+static AVAILABLE_PROGRAMS: &[&str] = &[
+    "fibonacci_recursive",
+    "fibonacci_iterative",
+    "quicksort",
+    "bubblesort",
+    "factorial_iterative_u256",
+    "revm_snailtracer",
+    "keccak256",
+    "keccak256_iter",
+    "sha256",
+    "sha256_iter",
+    "revm_transfer",
+    "pairing",
+];
+
+static METERED_CTX: OnceLock<(MeteredCtx, Vec<usize>)> = OnceLock::new();
+static METERED_COST_CTX: OnceLock<(MeteredCostCtx, Vec<usize>)> = OnceLock::new();
+static EXECUTOR: OnceLock<VmExecutor<BabyBear, ExecuteConfig>> = OnceLock::new();
+
+#[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
+pub struct ExecuteConfig {
+    #[config(executor = "SystemExecutor<F>")]
+    pub system: SystemConfig,
+    #[extension]
+    pub rv32i: Rv32I,
+    #[extension]
+    pub rv32m: Rv32M,
+    #[extension]
+    pub io: Rv32Io,
+    #[extension]
+    pub bigint: Int256,
+    #[extension]
+    pub keccak: Keccak256,
+    #[extension]
+    pub sha256: Sha256,
+    #[extension]
+    pub modular: ModularExtension,
+    #[extension]
+    pub fp2: Fp2Extension,
+    #[extension]
+    pub weierstrass: WeierstrassExtension,
+    #[extension(generics = true)]
+    pub pairing: PairingExtension,
+}
+
+impl Default for ExecuteConfig {
+    fn default() -> Self {
+        let bn_config = PairingCurve::Bn254.curve_config();
+        Self {
+            system: SystemConfig::default(),
+            rv32i: Rv32I,
+            rv32m: Rv32M::default(),
+            io: Rv32Io,
+            bigint: Int256::default(),
+            keccak: Keccak256,
+            sha256: Sha256,
+            modular: ModularExtension::new(vec![
+                bn_config.modulus.clone(),
+                bn_config.scalar.clone(),
+            ]),
+            fp2: Fp2Extension::new(vec![(
+                BN254_COMPLEX_STRUCT_NAME.to_string(),
+                bn_config.modulus.clone(),
+            )]),
+            weierstrass: WeierstrassExtension::new(vec![bn_config.clone()]),
+            pairing: PairingExtension::new(vec![PairingCurve::Bn254]),
+        }
+    }
+}
+
+impl InitFileGenerator for ExecuteConfig {
+    fn write_to_init_file(
+        &self,
+        _manifest_dir: &Path,
+        _init_file_name: Option<&str>,
+    ) -> io::Result<()> {
+        Ok(())
+    }
+}
+
+pub struct ExecuteBuilder;
+impl<E, SC> VmBuilder<E> for ExecuteBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = ExecuteConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &ExecuteConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32i, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32m, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.io, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &Int256CpuProverExt,
+            &config.bigint,
+            inventory,
+        )?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &Keccak256CpuProverExt,
+            &config.keccak,
+            inventory,
+        )?;
+        VmProverExtension::<E, _, _>::extend_prover(&Sha2CpuProverExt, &config.sha256, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &AlgebraCpuProverExt,
+            &config.modular,
+            inventory,
+        )?;
+        VmProverExtension::<E, _, _>::extend_prover(&AlgebraCpuProverExt, &config.fp2, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &EccCpuProverExt,
+            &config.weierstrass,
+            inventory,
+        )?;
+        VmProverExtension::<E, _, _>::extend_prover(&PairingProverExt, &config.pairing, inventory)?;
+        Ok(chip_complex)
+    }
+}
+
+fn main() {
+    divan::main();
+}
+
+fn create_default_transpiler() -> Transpiler<BabyBear> {
+    Transpiler::<BabyBear>::default()
+        .with_extension(Rv32ITranspilerExtension)
+        .with_extension(Rv32IoTranspilerExtension)
+        .with_extension(Rv32MTranspilerExtension)
+        .with_extension(Int256TranspilerExtension)
+        .with_extension(Keccak256TranspilerExtension)
+        .with_extension(Sha256TranspilerExtension)
+        .with_extension(ModularTranspilerExtension)
+        .with_extension(Fp2TranspilerExtension)
+        .with_extension(EccTranspilerExtension)
+        .with_extension(PairingTranspilerExtension)
+}
+
+fn load_program_executable(program: &str) -> Result<VmExe<BabyBear>> {
+    let transpiler = create_default_transpiler();
+    let program_dir = get_programs_dir().join(program);
+    let elf_path = get_elf_path(&program_dir);
+    let elf = read_elf_file(&elf_path)?;
+    Ok(VmExe::from_elf(elf, transpiler)?)
+}
+
+fn metering_setup() -> &'static (MeteredCtx, Vec<usize>) {
+    METERED_CTX.get_or_init(|| {
+        let config = ExecuteConfig::default();
+        let engine = BabyBearPoseidon2Engine::new(FriParameters::standard_fast());
+        let (vm, _) = VirtualMachine::new_with_keygen(engine, ExecuteBuilder, config).unwrap();
+        let ctx = vm.build_metered_ctx();
+        let executor_idx_to_air_idx = vm.executor_idx_to_air_idx();
+        (ctx, executor_idx_to_air_idx)
+    })
+}
+
+fn metered_cost_setup() -> &'static (MeteredCostCtx, Vec<usize>) {
+    METERED_COST_CTX.get_or_init(|| {
+        let config = ExecuteConfig::default();
+        let engine = BabyBearPoseidon2Engine::new(FriParameters::standard_fast());
+        let (vm, _) = VirtualMachine::new_with_keygen(engine, ExecuteBuilder, config).unwrap();
+        let ctx = vm.build_metered_cost_ctx();
+        let executor_idx_to_air_idx = vm.executor_idx_to_air_idx();
+        (ctx, executor_idx_to_air_idx)
+    })
+}
+
+fn executor() -> &'static VmExecutor<BabyBear, ExecuteConfig> {
+    EXECUTOR.get_or_init(|| {
+        let vm_config = ExecuteConfig::default();
+        VmExecutor::<BabyBear, _>::new(vm_config).unwrap()
+    })
+}
+
+#[divan::bench(args = AVAILABLE_PROGRAMS, sample_count=10)]
+fn benchmark_execute(bencher: Bencher, program: &str) {
+    bencher
+        .with_inputs(|| {
+            let exe = load_program_executable(program).expect("Failed to load program executable");
+            let interpreter = executor().instance(&exe).unwrap();
+            (interpreter, vec![])
+        })
+        .bench_values(|(interpreter, input)| {
+            interpreter
+                .execute(input, None)
+                .expect("Failed to execute program in interpreted mode");
+        });
+}
+
+#[divan::bench(args = AVAILABLE_PROGRAMS, sample_count=5)]
+fn benchmark_execute_metered(bencher: Bencher, program: &str) {
+    bencher
+        .with_inputs(|| {
+            let exe = load_program_executable(program).expect("Failed to load program executable");
+            let (ctx, executor_idx_to_air_idx) = metering_setup();
+            let interpreter = executor()
+                .metered_instance(&exe, executor_idx_to_air_idx)
+                .unwrap();
+            (interpreter, vec![], ctx.clone())
+        })
+        .bench_values(|(interpreter, input, ctx)| {
+            interpreter
+                .execute_metered(input, ctx)
+                .expect("Failed to execute program");
+        });
+}
+
+#[divan::bench(ignore = true, args = AVAILABLE_PROGRAMS, sample_count=5)]
+fn benchmark_execute_metered_cost(bencher: Bencher, program: &str) {
+    bencher
+        .with_inputs(|| {
+            let exe = load_program_executable(program).expect("Failed to load program executable");
+            let (ctx, executor_idx_to_air_idx) = metered_cost_setup();
+            let interpreter = executor()
+                .metered_cost_instance(&exe, executor_idx_to_air_idx)
+                .unwrap();
+            (interpreter, vec![], ctx.clone())
+        })
+        .bench_values(|(interpreter, input, ctx)| {
+            interpreter
+                .execute_metered_cost(input, ctx)
+                .expect("Failed to execute program with metered cost");
+        });
+}
+
+fn setup_leaf_verifier() -> (
+    VirtualMachine<BabyBearPoseidon2Engine, NativeCpuBuilder>,
+    VmExe<BabyBear>,
+    Vec<Vec<BabyBear>>,
+) {
+    let fixtures_dir = get_fixtures_dir();
+    let app_proof_bytes = fs::read(fixtures_dir.join("kitchen-sink.app.proof")).unwrap();
+    let app_proof: ContinuationVmProof<SC> = bitcode::deserialize(&app_proof_bytes).unwrap();
+
+    let leaf_exe_bytes = fs::read(fixtures_dir.join("kitchen-sink.leaf.exe")).unwrap();
+    let leaf_exe: VmExe<BabyBear> = bitcode::deserialize(&leaf_exe_bytes).unwrap();
+
+    let leaf_pk_bytes = fs::read(fixtures_dir.join("kitchen-sink.leaf.pk")).unwrap();
+    let leaf_pk = bitcode::deserialize(&leaf_pk_bytes).unwrap();
+
+    let leaf_inputs = LeafVmVerifierInput::chunk_continuation_vm_proof(&app_proof, 2);
+    let leaf_input = leaf_inputs.first().expect("No leaf input available");
+
+    let config = NativeConfig::aggregation(
+        VmVerifierPvs::<u8>::width(),
+        SBOX_SIZE.min(FriParameters::standard_fast().max_constraint_degree()),
+    );
+    let fri_params =
+        FriParameters::standard_with_100_bits_conjectured_security(DEFAULT_LEAF_LOG_BLOWUP);
+    let engine = BabyBearPoseidon2Engine::new(fri_params);
+    let d_pk = engine.device().transport_pk_to_device(&leaf_pk);
+    let vm = VirtualMachine::new(engine, NativeCpuBuilder, config, d_pk).unwrap();
+    let input_stream = leaf_input.write_to_stream();
+
+    (vm, leaf_exe, input_stream)
+}
+
+#[divan::bench(sample_count = 5)]
+fn benchmark_leaf_verifier_execute(bencher: Bencher) {
+    bencher
+        .with_inputs(|| {
+            let (vm, leaf_exe, input_stream) = setup_leaf_verifier();
+            let interpreter = vm.executor().instance(&leaf_exe).unwrap();
+
+            // SAFETY: We transmute the interpreter to have the same lifetime as the VM.
+            // This is safe because the vm is moved into the tuple and will remain
+            // alive for the entire duration that the interpreter is used.
+            #[allow(clippy::missing_transmute_annotations)]
+            let interpreter =
+                unsafe { std::mem::transmute::<_, InterpretedInstance<'_, _, _>>(interpreter) };
+
+            (vm, interpreter, input_stream)
+        })
+        .bench_values(|(_vm, interpreter, input_stream)| {
+            interpreter
+                .execute(input_stream, None)
+                .expect("Failed to execute program in interpreted mode");
+        });
+}
+
+#[divan::bench(sample_count = 5)]
+fn benchmark_leaf_verifier_execute_metered(bencher: Bencher) {
+    bencher
+        .with_inputs(|| {
+            let (vm, leaf_exe, input_stream) = setup_leaf_verifier();
+            let ctx = vm.build_metered_ctx();
+            let executor_idx_to_air_idx = vm.executor_idx_to_air_idx();
+            let interpreter = vm
+                .executor()
+                .metered_instance(&leaf_exe, &executor_idx_to_air_idx)
+                .unwrap();
+
+            // SAFETY: We transmute the interpreter to have the same lifetime as the VM.
+            // This is safe because the vm is moved into the tuple and will remain
+            // alive for the entire duration that the interpreter is used.
+            #[allow(clippy::missing_transmute_annotations)]
+            let interpreter =
+                unsafe { std::mem::transmute::<_, InterpretedInstance<'_, _, _>>(interpreter) };
+
+            (vm, interpreter, input_stream, ctx)
+        })
+        .bench_values(|(_vm, interpreter, input_stream, ctx)| {
+            interpreter
+                .execute_metered(input_stream, ctx)
+                .expect("Failed to execute program");
+        });
+}
+
+#[divan::bench(sample_count = 5)]
+fn benchmark_leaf_verifier_execute_preflight(bencher: Bencher) {
+    bencher
+        .with_inputs(|| {
+            let (vm, leaf_exe, input_stream) = setup_leaf_verifier();
+            let state = vm.create_initial_state(&leaf_exe, input_stream);
+            let interpreter = vm.preflight_interpreter(&leaf_exe).unwrap();
+
+            (vm, state, interpreter)
+        })
+        .bench_values(|(vm, state, mut interpreter)| {
+            let _out = vm
+                .execute_preflight(&mut interpreter, state, None, NATIVE_MAX_TRACE_HEIGHTS)
+                .expect("Failed to execute preflight");
+        });
+}
diff --git a/benchmarks/execute/benches/fibonacci_execute.rs b/benchmarks/execute/benches/fibonacci_execute.rs
index 70952b53c9..49b453d028 100644
--- a/benchmarks/execute/benches/fibonacci_execute.rs
+++ b/benchmarks/execute/benches/fibonacci_execute.rs
@@ -1,42 +1,44 @@
-use criterion::{criterion_group, criterion_main, Criterion};
-use openvm_benchmarks_utils::{build_elf, get_programs_dir};
-use openvm_circuit::arch::{instructions::exe::VmExe, VmExecutor};
-use openvm_rv32im_circuit::Rv32ImConfig;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
-};
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+// use criterion::{criterion_group, criterion_main, Criterion};
+// use openvm_benchmarks_utils::{build_elf, get_programs_dir};
+// use openvm_circuit::arch::{instructions::exe::VmExe, VmExecutor};
+// use openvm_rv32im_circuit::Rv32ImConfig;
+// use openvm_rv32im_transpiler::{
+//     Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+// };
+// // TODO(ayush): add this back
+// // use openvm_sdk::StdIn;
+// use openvm_stark_sdk::p3_baby_bear::BabyBear;
+// use openvm_transpiler::{transpiler::Transpiler, FromElf};
 
-fn benchmark_function(c: &mut Criterion) {
-    let program_dir = get_programs_dir().join("fibonacci");
-    let elf = build_elf(&program_dir, "release").unwrap();
+// fn benchmark_function(c: &mut Criterion) {
+//     let program_dir = get_programs_dir().join("fibonacci");
+//     let elf = build_elf(&program_dir, "release").unwrap();
 
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension),
-    )
-    .unwrap();
+//     let exe = VmExe::from_elf(
+//         elf,
+//         Transpiler::<BabyBear>::default()
+//             .with_extension(Rv32ITranspilerExtension)
+//             .with_extension(Rv32MTranspilerExtension)
+//             .with_extension(Rv32IoTranspilerExtension),
+//     )
+//     .unwrap();
 
-    let mut group = c.benchmark_group("fibonacci");
-    let config = Rv32ImConfig::default();
-    let executor = VmExecutor::<BabyBear, Rv32ImConfig>::new(config);
+//     let mut group = c.benchmark_group("fibonacci");
+//     let config = Rv32ImConfig::default();
+//     let executor = VmExecutor::<BabyBear, Rv32ImConfig>::new(config);
 
-    group.bench_function("execute", |b| {
-        b.iter(|| {
-            let n = 100_000u64;
-            let mut stdin = StdIn::default();
-            stdin.write(&n);
-            executor.execute(exe.clone(), stdin).unwrap();
-        })
-    });
+//     group.bench_function("execute", |b| {
+//         b.iter(|| {
+//             // TODO(ayush): add this back
+//             // let n = 100_000u64;
+//             // let mut stdin = StdIn::default();
+//             // stdin.write(&n);
+//             executor.execute(exe.clone(), vec![]).unwrap();
+//         })
+//     });
 
-    group.finish();
-}
+//     group.finish();
+// }
 
-criterion_group!(benches, benchmark_function);
-criterion_main!(benches);
+// criterion_group!(benches, benchmark_function);
+// criterion_main!(benches);
diff --git a/benchmarks/execute/benches/regex_execute.rs b/benchmarks/execute/benches/regex_execute.rs
index a3a110e344..d4116b5aab 100644
--- a/benchmarks/execute/benches/regex_execute.rs
+++ b/benchmarks/execute/benches/regex_execute.rs
@@ -1,47 +1,47 @@
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
-use openvm_benchmarks_utils::{build_elf, get_programs_dir};
-use openvm_circuit::arch::{instructions::exe::VmExe, VmExecutor};
-use openvm_keccak256_circuit::Keccak256Rv32Config;
-use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
-};
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+// TODO(ayush): add this back
+// use criterion::{black_box, criterion_group, criterion_main, Criterion};
+// use openvm_benchmarks_utils::{build_elf, get_programs_dir};
+// use openvm_circuit::arch::{instructions::exe::VmExe, VmExecutor};
+// use openvm_keccak256_circuit::Keccak256Rv32Config;
+// use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
+// use openvm_rv32im_transpiler::{
+//     Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+// };
+// use openvm_sdk::StdIn;
+// use openvm_stark_sdk::p3_baby_bear::BabyBear;
+// use openvm_transpiler::{transpiler::Transpiler, FromElf};
 
-fn benchmark_function(c: &mut Criterion) {
-    let program_dir = get_programs_dir().join("regex");
-    let elf = build_elf(&program_dir, "release").unwrap();
+// fn benchmark_function(c: &mut Criterion) {
+//     let program_dir = get_programs_dir().join("regex");
+//     let elf = build_elf(&program_dir, "release").unwrap();
 
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension)
-            .with_extension(Keccak256TranspilerExtension),
-    )
-    .unwrap();
+//     let exe = VmExe::from_elf(
+//         elf,
+//         Transpiler::<BabyBear>::default()
+//             .with_extension(Rv32ITranspilerExtension)
+//             .with_extension(Rv32MTranspilerExtension)
+//             .with_extension(Rv32IoTranspilerExtension)
+//             .with_extension(Keccak256TranspilerExtension),
+//     )
+//     .unwrap();
 
-    let mut group = c.benchmark_group("regex");
-    group.sample_size(10);
-    let config = Keccak256Rv32Config::default();
-    let executor = VmExecutor::<BabyBear, Keccak256Rv32Config>::new(config);
+//     let mut group = c.benchmark_group("regex");
+//     group.sample_size(10);
+//     let config = Keccak256Rv32Config::default();
+//     let executor = VmExecutor::<BabyBear, Keccak256Rv32Config>::new(config);
 
-    let data = include_str!("../../guest/regex/regex_email.txt");
+//     let data = include_str!("../../guest/regex/regex_email.txt");
 
-    let fe_bytes = data.to_owned().into_bytes();
-    group.bench_function("execute", |b| {
-        b.iter(|| {
-            executor
-                .execute(exe.clone(), black_box(StdIn::from_bytes(&fe_bytes)))
-                .unwrap();
-        })
-    });
+//     let fe_bytes = data.to_owned().into_bytes();
+//     group.bench_function("execute", |b| {
+//         b.iter(|| {
+//             let input = black_box(Stdin::from_bytes(&fe_bytes));
+//             executor.execute(exe.clone(), input).unwrap();
+//         })
+//     });
 
-    group.finish();
-}
+//     group.finish();
+// }
 
-criterion_group!(benches, benchmark_function);
-criterion_main!(benches);
+// criterion_group!(benches, benchmark_function);
+// criterion_main!(benches);
diff --git a/benchmarks/execute/examples/regex_execute.rs b/benchmarks/execute/examples/regex_execute.rs
index 59705a19fd..3a6fd4162f 100644
--- a/benchmarks/execute/examples/regex_execute.rs
+++ b/benchmarks/execute/examples/regex_execute.rs
@@ -1,35 +1,35 @@
-use openvm_circuit::arch::{instructions::exe::VmExe, VmExecutor};
-use openvm_keccak256_circuit::Keccak256Rv32Config;
-use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
-};
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
-use openvm_transpiler::{
-    elf::Elf, openvm_platform::memory::MEM_SIZE, transpiler::Transpiler, FromElf,
-};
+// use openvm_circuit::arch::{instructions::exe::VmExe, VmExecutor};
+// use openvm_keccak256_circuit::Keccak256Rv32Config;
+// use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
+// use openvm_rv32im_transpiler::{
+//     Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+// };
+// use openvm_sdk::StdIn;
+// use openvm_stark_sdk::p3_baby_bear::BabyBear;
+// use openvm_transpiler::{
+//     elf::Elf, openvm_platform::memory::MEM_SIZE, transpiler::Transpiler, FromElf,
+// };
 
 fn main() {
-    let elf = Elf::decode(include_bytes!("regex-elf"), MEM_SIZE as u32).unwrap();
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension)
-            .with_extension(Keccak256TranspilerExtension),
-    )
-    .unwrap();
+    // let elf = Elf::decode(include_bytes!("regex-elf"), MEM_SIZE as u32).unwrap();
+    // let exe = VmExe::from_elf(
+    //     elf,
+    //     Transpiler::<BabyBear>::default()
+    //         .with_extension(Rv32ITranspilerExtension)
+    //         .with_extension(Rv32MTranspilerExtension)
+    //         .with_extension(Rv32IoTranspilerExtension)
+    //         .with_extension(Keccak256TranspilerExtension),
+    // )
+    // .unwrap();
 
-    let config = Keccak256Rv32Config::default();
-    let executor = VmExecutor::<BabyBear, Keccak256Rv32Config>::new(config);
+    // let config = Keccak256Rv32Config::default();
+    // let executor = VmExecutor::<BabyBear, Keccak256Rv32Config>::new(config);
 
-    let data = include_str!("../../guest/regex/regex_email.txt");
+    // let data = include_str!("../../guest/regex/regex_email.txt");
 
-    let timer = std::time::Instant::now();
-    executor
-        .execute(exe.clone(), StdIn::from_bytes(data.as_bytes()))
-        .unwrap();
-    println!("execute_time: {:?}", timer.elapsed());
+    // let timer = std::time::Instant::now();
+    // executor
+    //     .execute(exe.clone(), StdIn::from_bytes(data.as_bytes()))
+    //     .unwrap();
+    // println!("execute_time: {:?}", timer.elapsed());
 }
diff --git a/benchmarks/execute/src/execute-leaf-verifier.rs b/benchmarks/execute/src/execute-leaf-verifier.rs
new file mode 100644
index 0000000000..adfed7fd05
--- /dev/null
+++ b/benchmarks/execute/src/execute-leaf-verifier.rs
@@ -0,0 +1,99 @@
+use std::fs;
+
+use clap::{arg, Parser, ValueEnum};
+use eyre::Result;
+use openvm_benchmarks_utils::get_fixtures_dir;
+use openvm_circuit::arch::{instructions::exe::VmExe, ContinuationVmProof, VirtualMachine};
+use openvm_continuations::{
+    verifier::{common::types::VmVerifierPvs, leaf::types::LeafVmVerifierInput},
+    SC,
+};
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder, NATIVE_MAX_TRACE_HEIGHTS};
+use openvm_sdk::config::{DEFAULT_LEAF_LOG_BLOWUP, SBOX_SIZE};
+use openvm_stark_sdk::{
+    config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
+    engine::{StarkEngine, StarkFriEngine},
+    openvm_stark_backend::prover::hal::DeviceDataTransporter,
+    p3_baby_bear::BabyBear,
+};
+use tracing_subscriber::{fmt, EnvFilter};
+
+const PROGRAM_NAME: &str = "kitchen-sink";
+
+#[derive(Clone, Debug, ValueEnum)]
+enum ExecutionMode {
+    Normal,
+    Metered,
+    Preflight,
+}
+
+#[derive(Parser)]
+#[command(author, version, about = "OpenVM leaf verifier execution")]
+struct Cli {
+    #[arg(short, long, value_enum, default_value = "preflight")]
+    mode: ExecutionMode,
+
+    #[arg(short, long)]
+    verbose: bool,
+}
+
+fn main() -> Result<()> {
+    let cli = Cli::parse();
+
+    // Set up logging
+    let filter = if cli.verbose {
+        EnvFilter::from_default_env()
+    } else {
+        EnvFilter::new("info")
+    };
+    fmt::fmt().with_env_filter(filter).init();
+
+    let fixtures_dir = get_fixtures_dir();
+    let app_proof_bytes =
+        fs::read(fixtures_dir.join(format!("{}.app.proof", PROGRAM_NAME))).unwrap();
+    let app_proof: ContinuationVmProof<SC> = bitcode::deserialize(&app_proof_bytes).unwrap();
+
+    let leaf_exe_bytes = fs::read(fixtures_dir.join(format!("{}.leaf.exe", PROGRAM_NAME))).unwrap();
+    let leaf_exe: VmExe<BabyBear> = bitcode::deserialize(&leaf_exe_bytes).unwrap();
+
+    let leaf_pk_bytes = fs::read(fixtures_dir.join(format!("{}.leaf.pk", PROGRAM_NAME))).unwrap();
+    let leaf_pk = bitcode::deserialize(&leaf_pk_bytes).unwrap();
+
+    let leaf_inputs = LeafVmVerifierInput::chunk_continuation_vm_proof(&app_proof, 2);
+    let leaf_input = leaf_inputs.first().expect("No leaf input available");
+
+    let config = NativeConfig::aggregation(
+        VmVerifierPvs::<u8>::width(),
+        SBOX_SIZE.min(FriParameters::standard_fast().max_constraint_degree()),
+    );
+    let fri_params =
+        FriParameters::standard_with_100_bits_conjectured_security(DEFAULT_LEAF_LOG_BLOWUP);
+    let engine = BabyBearPoseidon2Engine::new(fri_params);
+    let d_pk = engine.device().transport_pk_to_device(&leaf_pk);
+    let vm = VirtualMachine::new(engine, NativeCpuBuilder, config, d_pk)?;
+    let input_stream = leaf_input.write_to_stream();
+
+    match cli.mode {
+        ExecutionMode::Normal => {
+            tracing::info!("Running normal execute...");
+            let interpreter = vm.executor().instance(&leaf_exe)?;
+            interpreter.execute(input_stream, None)?;
+        }
+        ExecutionMode::Metered => {
+            tracing::info!("Running metered execute...");
+            let ctx = vm.build_metered_ctx();
+            let interpreter = vm.metered_interpreter(&leaf_exe)?;
+            interpreter.execute_metered(input_stream, ctx)?;
+        }
+        ExecutionMode::Preflight => {
+            tracing::info!("Running preflight execute...");
+            let state = vm.create_initial_state(&leaf_exe, input_stream);
+            let mut interpreter = vm.preflight_interpreter(&leaf_exe)?;
+            let _out = vm
+                .execute_preflight(&mut interpreter, state, None, NATIVE_MAX_TRACE_HEIGHTS)
+                .expect("Failed to execute preflight");
+        }
+    }
+
+    Ok(())
+}
diff --git a/benchmarks/execute/src/main.rs b/benchmarks/execute/src/main.rs
deleted file mode 100644
index a05baeea44..0000000000
--- a/benchmarks/execute/src/main.rs
+++ /dev/null
@@ -1,121 +0,0 @@
-use cargo_openvm::util::read_config_toml_or_default;
-use clap::{Parser, ValueEnum};
-use eyre::Result;
-use openvm_benchmarks_utils::{get_elf_path, get_programs_dir, read_elf_file};
-use openvm_circuit::arch::{instructions::exe::VmExe, VmExecutor};
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::bench::run_with_metric_collection;
-use openvm_transpiler::FromElf;
-
-#[derive(Debug, Clone, ValueEnum)]
-enum BuildProfile {
-    Debug,
-    Release,
-}
-
-static AVAILABLE_PROGRAMS: &[&str] = &[
-    "fibonacci_recursive",
-    "fibonacci_iterative",
-    "quicksort",
-    "bubblesort",
-    "pairing",
-    "keccak256",
-    "keccak256_iter",
-    "sha256",
-    "sha256_iter",
-    "revm_transfer",
-    "revm_snailtracer",
-];
-
-#[derive(Parser)]
-#[command(author, version, about = "OpenVM Benchmark CLI", long_about = None)]
-struct Cli {
-    /// Programs to benchmark (if not specified, all programs will be run)
-    #[arg(short, long)]
-    programs: Vec<String>,
-
-    /// Programs to skip from benchmarking
-    #[arg(short, long)]
-    skip: Vec<String>,
-
-    /// Output path for benchmark results
-    #[arg(short, long, default_value = "OUTPUT_PATH")]
-    output: String,
-
-    /// List available benchmark programs and exit
-    #[arg(short, long)]
-    list: bool,
-
-    /// Verbose output
-    #[arg(short, long)]
-    verbose: bool,
-}
-
-fn main() -> Result<()> {
-    let cli = Cli::parse();
-
-    if cli.list {
-        println!("Available benchmark programs:");
-        for program in AVAILABLE_PROGRAMS {
-            println!("  {}", program);
-        }
-        return Ok(());
-    }
-
-    // Set up logging based on verbosity
-    if cli.verbose {
-        tracing_subscriber::fmt::init();
-    }
-
-    let mut programs_to_run = if cli.programs.is_empty() {
-        AVAILABLE_PROGRAMS.to_vec()
-    } else {
-        // Validate provided programs
-        for program in &cli.programs {
-            if !AVAILABLE_PROGRAMS.contains(&program.as_str()) {
-                eprintln!("Unknown program: {}", program);
-                eprintln!("Use --list to see available programs");
-                std::process::exit(1);
-            }
-        }
-        cli.programs.iter().map(|s| s.as_str()).collect()
-    };
-
-    // Remove programs that should be skipped
-    if !cli.skip.is_empty() {
-        // Validate skipped programs
-        for program in &cli.skip {
-            if !AVAILABLE_PROGRAMS.contains(&program.as_str()) {
-                eprintln!("Unknown program to skip: {}", program);
-                eprintln!("Use --list to see available programs");
-                std::process::exit(1);
-            }
-        }
-
-        let skip_set: Vec<&str> = cli.skip.iter().map(|s| s.as_str()).collect();
-        programs_to_run.retain(|&program| !skip_set.contains(&program));
-    }
-
-    tracing::info!("Starting benchmarks with metric collection");
-
-    run_with_metric_collection(&cli.output, || -> Result<()> {
-        for program in &programs_to_run {
-            tracing::info!("Running program: {}", program);
-
-            let program_dir = get_programs_dir().join(program);
-            let elf_path = get_elf_path(&program_dir);
-            let elf = read_elf_file(&elf_path)?;
-
-            let config_path = program_dir.join("openvm.toml");
-            let vm_config = read_config_toml_or_default(&config_path)?.app_vm_config;
-
-            let exe = VmExe::from_elf(elf, vm_config.transpiler())?;
-
-            let executor = VmExecutor::new(vm_config);
-            executor.execute(exe, StdIn::default())?;
-            tracing::info!("Completed program: {}", program);
-        }
-        tracing::info!("All programs executed successfully");
-        Ok(())
-    })
-}
diff --git a/benchmarks/guest/Cargo.toml b/benchmarks/guest/Cargo.toml
new file mode 100644
index 0000000000..f27ae022c5
--- /dev/null
+++ b/benchmarks/guest/Cargo.toml
@@ -0,0 +1,31 @@
+[workspace.package]
+version = "0.0.0"
+edition = "2021"
+
+[workspace]
+members = ["base64_json", "bincode", "bubblesort", "ecrecover", "factorial_iterative_u256", "fibonacci", "fibonacci_iterative", "fibonacci_recursive", "keccak256", "keccak256_iter", "kitchen-sink", "pairing", "quicksort", "regex", "revm_snailtracer", "revm_transfer", "rkyv", "sha256", "sha256_iter"]
+resolver = "2"
+
+[workspace.dependencies]
+openvm = { path = "../../crates/toolchain/openvm" }
+openvm-algebra-guest = { path = "../../extensions/algebra/guest", default-features = false }
+openvm-ecc-guest = { path = "../../extensions/ecc/guest", default-features = false }
+openvm-keccak256 = { path = "../../guest-libs/keccak256/", default-features = false }
+openvm-ruint = { path = "../../guest-libs/ruint/", package = "ruint", default-features = false }
+openvm-pairing = { path = "../../guest-libs/pairing/", default-features = false }
+openvm-sha2 = { path = "../../guest-libs/sha2/", default-features = false }
+openvm-k256 = { path = "../../guest-libs/k256/", package = "k256" }
+openvm-p256 = { path = "../../guest-libs/p256/", package = "p256" }
+
+# patch for ecrecover
+[patch.crates-io]
+k256 = { path = "../../guest-libs/k256/" }
+
+[profile.release]
+panic = "abort"
+lto = "thin"    # faster compile time
+
+[profile.profiling]
+inherits = "release"
+debug = 2
+strip = false
diff --git a/benchmarks/guest/base64_json/Cargo.toml b/benchmarks/guest/base64_json/Cargo.toml
index f0f43b3479..9177070a63 100644
--- a/benchmarks/guest/base64_json/Cargo.toml
+++ b/benchmarks/guest/base64_json/Cargo.toml
@@ -1,19 +1,13 @@
-[workspace]
 [package]
-version = "0.1.0"
 name = "openvm-json-program"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 base64 = { version = "0.22.1", default-features = false, features = ["alloc"] }
 serde = { version = "1.0.214", default-features = false, features = ["derive"] }
 serde-json-core = "0.6.0"
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/base64_json/elf/openvm-json-program.elf b/benchmarks/guest/base64_json/elf/openvm-json-program.elf
index 29e6cac131..55335dca15 100755
Binary files a/benchmarks/guest/base64_json/elf/openvm-json-program.elf and b/benchmarks/guest/base64_json/elf/openvm-json-program.elf differ
diff --git a/benchmarks/guest/bincode/Cargo.toml b/benchmarks/guest/bincode/Cargo.toml
index eba3c918bf..3464800677 100644
--- a/benchmarks/guest/bincode/Cargo.toml
+++ b/benchmarks/guest/bincode/Cargo.toml
@@ -1,11 +1,10 @@
-[workspace]
 [package]
 name = "openvm-bincode-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 bincode = { version = "2.0.0-rc.3", default-features = false, features = [
     "derive",
     "alloc",
@@ -16,8 +15,3 @@ rand_pcg = "0.3.1"
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/bincode/elf/openvm-bincode-program.elf b/benchmarks/guest/bincode/elf/openvm-bincode-program.elf
index 085eb7ee4f..2d4b2ae67a 100755
Binary files a/benchmarks/guest/bincode/elf/openvm-bincode-program.elf and b/benchmarks/guest/bincode/elf/openvm-bincode-program.elf differ
diff --git a/benchmarks/guest/bubblesort/Cargo.toml b/benchmarks/guest/bubblesort/Cargo.toml
index 68a0af82ff..957719a7df 100644
--- a/benchmarks/guest/bubblesort/Cargo.toml
+++ b/benchmarks/guest/bubblesort/Cargo.toml
@@ -1,16 +1,10 @@
-[workspace]
 [package]
 name = "openvm-bubblesort-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/bubblesort/elf/openvm-bubblesort-program.elf b/benchmarks/guest/bubblesort/elf/openvm-bubblesort-program.elf
index 0f81a3926f..cec789e279 100755
Binary files a/benchmarks/guest/bubblesort/elf/openvm-bubblesort-program.elf and b/benchmarks/guest/bubblesort/elf/openvm-bubblesort-program.elf differ
diff --git a/benchmarks/guest/bubblesort/src/main.rs b/benchmarks/guest/bubblesort/src/main.rs
index 0dd7e51146..d859641504 100644
--- a/benchmarks/guest/bubblesort/src/main.rs
+++ b/benchmarks/guest/bubblesort/src/main.rs
@@ -1,7 +1,7 @@
 use core::hint::black_box;
 use openvm as _;
 
-const ARRAY_SIZE: usize = 100;
+const ARRAY_SIZE: usize = 1_000;
 
 fn bubblesort<T: Ord>(arr: &mut [T]) {
     let len = arr.len();
diff --git a/benchmarks/guest/ecrecover/Cargo.toml b/benchmarks/guest/ecrecover/Cargo.toml
index b9592028f7..0937e63f0a 100644
--- a/benchmarks/guest/ecrecover/Cargo.toml
+++ b/benchmarks/guest/ecrecover/Cargo.toml
@@ -1,14 +1,13 @@
-[workspace]
 [package]
 name = "openvm-ecdsa-recover-key-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
-openvm-algebra-guest = { path = "../../../extensions/algebra/guest", default-features = false }
-openvm-ecc-guest = { path = "../../../extensions/ecc/guest", default-features = false }
-openvm-keccak256 = { path = "../../../guest-libs/keccak256/", default-features = false }
+openvm = { workspace = true, features = ["std"] }
+openvm-algebra-guest.workspace = true
+openvm-ecc-guest.workspace = true
+openvm-keccak256.workspace = true
 revm-precompile = { git = "https://github.com/bluealloy/revm.git", tag = "v75", default-features = false }
 # IMPORTANT: must be same version as used by revm; revm does not re-export this feature so we enable it here
 alloy-primitives = { version = "1.2.0", default-features = false, features = [
@@ -18,15 +17,3 @@ k256 = { version = "0.13.3", default-features = false }
 
 [features]
 default = []
-
-[profile.release]
-panic = "abort"
-lto = "thin"    # faster compile time
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
-
-[patch.crates-io]
-k256 = { path = "../../../guest-libs/k256/" }
diff --git a/benchmarks/guest/ecrecover/elf/openvm-ecdsa-recover-key-program.elf b/benchmarks/guest/ecrecover/elf/openvm-ecdsa-recover-key-program.elf
index 4e54268ea4..88c87c6abc 100755
Binary files a/benchmarks/guest/ecrecover/elf/openvm-ecdsa-recover-key-program.elf and b/benchmarks/guest/ecrecover/elf/openvm-ecdsa-recover-key-program.elf differ
diff --git a/benchmarks/guest/factorial_iterative_u256/Cargo.toml b/benchmarks/guest/factorial_iterative_u256/Cargo.toml
new file mode 100644
index 0000000000..7acc9e66f8
--- /dev/null
+++ b/benchmarks/guest/factorial_iterative_u256/Cargo.toml
@@ -0,0 +1,11 @@
+[package]
+name = "openvm-factorial-iterative-u256-program"
+version.workspace = true
+edition.workspace = true
+
+[dependencies]
+openvm = { workspace = true, features = ["std"] }
+openvm-ruint.workspace = true
+
+[features]
+default = []
diff --git a/benchmarks/guest/factorial_iterative_u256/elf/openvm-factorial-iterative-u256-program.elf b/benchmarks/guest/factorial_iterative_u256/elf/openvm-factorial-iterative-u256-program.elf
new file mode 100755
index 0000000000..572f71b182
Binary files /dev/null and b/benchmarks/guest/factorial_iterative_u256/elf/openvm-factorial-iterative-u256-program.elf differ
diff --git a/benchmarks/guest/factorial_iterative_u256/openvm.toml b/benchmarks/guest/factorial_iterative_u256/openvm.toml
new file mode 100644
index 0000000000..b226887890
--- /dev/null
+++ b/benchmarks/guest/factorial_iterative_u256/openvm.toml
@@ -0,0 +1,4 @@
+[app_vm_config.rv32i]
+[app_vm_config.rv32m]
+[app_vm_config.io]
+[app_vm_config.bigint]
diff --git a/benchmarks/guest/factorial_iterative_u256/src/main.rs b/benchmarks/guest/factorial_iterative_u256/src/main.rs
new file mode 100644
index 0000000000..c92491d2da
--- /dev/null
+++ b/benchmarks/guest/factorial_iterative_u256/src/main.rs
@@ -0,0 +1,16 @@
+use core::hint::black_box;
+use openvm as _;
+use openvm_ruint::aliases::U256;
+
+// This will overflow but that is fine
+const N: u32 = 65_000;
+
+pub fn main() {
+    let mut acc = U256::from(1u32);
+    let mut i = U256::from(N);
+    while i > black_box(U256::ZERO) {
+        acc *= i.clone();
+        i -= U256::from(1u32);
+    }
+    black_box(acc);
+}
diff --git a/benchmarks/guest/fibonacci/Cargo.toml b/benchmarks/guest/fibonacci/Cargo.toml
index 4ea6659e73..469868a3b9 100644
--- a/benchmarks/guest/fibonacci/Cargo.toml
+++ b/benchmarks/guest/fibonacci/Cargo.toml
@@ -1,16 +1,10 @@
-[workspace]
 [package]
 name = "openvm-fibonacci-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/fibonacci/elf/openvm-fibonacci-program.elf b/benchmarks/guest/fibonacci/elf/openvm-fibonacci-program.elf
index 36ad8d359c..20335618e4 100755
Binary files a/benchmarks/guest/fibonacci/elf/openvm-fibonacci-program.elf and b/benchmarks/guest/fibonacci/elf/openvm-fibonacci-program.elf differ
diff --git a/benchmarks/guest/fibonacci_iterative/Cargo.toml b/benchmarks/guest/fibonacci_iterative/Cargo.toml
index 6f0c145061..75f564d2b9 100644
--- a/benchmarks/guest/fibonacci_iterative/Cargo.toml
+++ b/benchmarks/guest/fibonacci_iterative/Cargo.toml
@@ -1,16 +1,10 @@
-[workspace]
 [package]
 name = "openvm-fibonacci-iterative-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/fibonacci_iterative/elf/openvm-fibonacci-iterative-program.elf b/benchmarks/guest/fibonacci_iterative/elf/openvm-fibonacci-iterative-program.elf
index ac9fbf3e89..7c681ee313 100755
Binary files a/benchmarks/guest/fibonacci_iterative/elf/openvm-fibonacci-iterative-program.elf and b/benchmarks/guest/fibonacci_iterative/elf/openvm-fibonacci-iterative-program.elf differ
diff --git a/benchmarks/guest/fibonacci_iterative/src/main.rs b/benchmarks/guest/fibonacci_iterative/src/main.rs
index 09ceb5df41..f7ab8ec0f6 100644
--- a/benchmarks/guest/fibonacci_iterative/src/main.rs
+++ b/benchmarks/guest/fibonacci_iterative/src/main.rs
@@ -1,15 +1,15 @@
 use core::hint::black_box;
-use openvm as _;
+use openvm::io::reveal_u32;
 
-const N: u64 = 100_000;
+const N: u32 = 900_000;
 
 pub fn main() {
-    let mut a: u64 = 0;
-    let mut b: u64 = 1;
+    let mut a: u32 = 0;
+    let mut b: u32 = 1;
     for _ in 0..black_box(N) {
-        let c: u64 = a.wrapping_add(b);
+        let c: u32 = a.wrapping_add(b);
         a = b;
         b = c;
     }
-    black_box(a);
+    reveal_u32(a, 0);
 }
diff --git a/benchmarks/guest/fibonacci_recursive/Cargo.toml b/benchmarks/guest/fibonacci_recursive/Cargo.toml
index 95b124df43..2b8177d1c7 100644
--- a/benchmarks/guest/fibonacci_recursive/Cargo.toml
+++ b/benchmarks/guest/fibonacci_recursive/Cargo.toml
@@ -1,16 +1,10 @@
-[workspace]
 [package]
 name = "openvm-fibonacci-recursive-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/fibonacci_recursive/elf/openvm-fibonacci-recursive-program.elf b/benchmarks/guest/fibonacci_recursive/elf/openvm-fibonacci-recursive-program.elf
index 7dee9d4286..d14372657c 100755
Binary files a/benchmarks/guest/fibonacci_recursive/elf/openvm-fibonacci-recursive-program.elf and b/benchmarks/guest/fibonacci_recursive/elf/openvm-fibonacci-recursive-program.elf differ
diff --git a/benchmarks/guest/fibonacci_recursive/src/main.rs b/benchmarks/guest/fibonacci_recursive/src/main.rs
index fae64a1b0f..9020bc91ef 100644
--- a/benchmarks/guest/fibonacci_recursive/src/main.rs
+++ b/benchmarks/guest/fibonacci_recursive/src/main.rs
@@ -1,14 +1,15 @@
 use core::hint::black_box;
-use openvm as _;
+use openvm::io::reveal_u32;
 
-const N: u64 = 25;
+const N: u32 = 27;
 
 pub fn main() {
     let n = black_box(N);
-    black_box(fibonacci(n));
+    let result = fibonacci(n);
+    reveal_u32(result, 0);
 }
 
-fn fibonacci(n: u64) -> u64 {
+fn fibonacci(n: u32) -> u32 {
     if n == 0 {
         0
     } else if n == 1 {
diff --git a/benchmarks/guest/keccak256/Cargo.toml b/benchmarks/guest/keccak256/Cargo.toml
index 35bc10320a..486330ee17 100644
--- a/benchmarks/guest/keccak256/Cargo.toml
+++ b/benchmarks/guest/keccak256/Cargo.toml
@@ -1,18 +1,11 @@
-[workspace]
 [package]
 name = "openvm-keccak256-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
-openvm-keccak256 = { path = "../../../guest-libs/keccak256" }
+openvm = { workspace = true, features = ["std"] }
+openvm-keccak256.workspace = true
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-
-strip = false
diff --git a/benchmarks/guest/keccak256/elf/openvm-keccak256-program.elf b/benchmarks/guest/keccak256/elf/openvm-keccak256-program.elf
index 7425897f99..6e0fc26837 100755
Binary files a/benchmarks/guest/keccak256/elf/openvm-keccak256-program.elf and b/benchmarks/guest/keccak256/elf/openvm-keccak256-program.elf differ
diff --git a/benchmarks/guest/keccak256/src/main.rs b/benchmarks/guest/keccak256/src/main.rs
index 5a00ba4067..0d8c6d17b4 100644
--- a/benchmarks/guest/keccak256/src/main.rs
+++ b/benchmarks/guest/keccak256/src/main.rs
@@ -3,7 +3,7 @@ use openvm as _;
 
 use openvm_keccak256::keccak256;
 
-const INPUT_LENGTH_BYTES: usize = 100 * 1024; // 100 KB
+const INPUT_LENGTH_BYTES: usize = 384 * 1024;
 
 pub fn main() {
     let mut input = Vec::with_capacity(INPUT_LENGTH_BYTES);
diff --git a/benchmarks/guest/keccak256_iter/Cargo.toml b/benchmarks/guest/keccak256_iter/Cargo.toml
index 68c2cbb5dd..73e498e9cf 100644
--- a/benchmarks/guest/keccak256_iter/Cargo.toml
+++ b/benchmarks/guest/keccak256_iter/Cargo.toml
@@ -1,17 +1,11 @@
-[workspace]
 [package]
 name = "openvm-keccak256-iter-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
-openvm-keccak256 = { path = "../../../guest-libs/keccak256" }
+openvm = { workspace = true, features = ["std"] }
+openvm-keccak256.workspace = true
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/keccak256_iter/elf/openvm-keccak256-iter-program.elf b/benchmarks/guest/keccak256_iter/elf/openvm-keccak256-iter-program.elf
index 0cf372eec3..7a267a02ab 100755
Binary files a/benchmarks/guest/keccak256_iter/elf/openvm-keccak256-iter-program.elf and b/benchmarks/guest/keccak256_iter/elf/openvm-keccak256-iter-program.elf differ
diff --git a/benchmarks/guest/keccak256_iter/src/main.rs b/benchmarks/guest/keccak256_iter/src/main.rs
index ef36ff1d64..554179819a 100644
--- a/benchmarks/guest/keccak256_iter/src/main.rs
+++ b/benchmarks/guest/keccak256_iter/src/main.rs
@@ -3,7 +3,7 @@ use openvm as _;
 
 use openvm_keccak256::keccak256;
 
-const ITERATIONS: usize = 10_000;
+const ITERATIONS: usize = 65_000;
 
 pub fn main() {
     // Initialize with hash of an empty vector
diff --git a/benchmarks/guest/kitchen-sink/Cargo.toml b/benchmarks/guest/kitchen-sink/Cargo.toml
index f699305cea..9088004f45 100644
--- a/benchmarks/guest/kitchen-sink/Cargo.toml
+++ b/benchmarks/guest/kitchen-sink/Cargo.toml
@@ -1,35 +1,25 @@
-[workspace]
 [package]
 name = "openvm-kitchen-sink-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", default-features = false, features = [
+openvm = { workspace = true, features = [
     "std",
 ] }
-openvm-algebra-guest = { path = "../../../extensions/algebra/guest", default-features = false }
-openvm-ecc-guest = { path = "../../../extensions/ecc/guest", default-features = false }
-openvm-pairing = { path = "../../../guest-libs/pairing/", features = [
+openvm-algebra-guest.workspace = true
+openvm-ecc-guest.workspace = true
+openvm-pairing = { workspace = true, features = [
     "bn254",
     "bls12_381",
 ] }
-openvm-keccak256 = { path = "../../../guest-libs/keccak256/", default-features = false }
-openvm-sha2 = { path = "../../../guest-libs/sha2/", default-features = false }
-openvm-k256 = { path = "../../../guest-libs/k256/", package = "k256" }
-openvm-p256 = { path = "../../../guest-libs/p256/", package = "p256" }
-openvm-ruint = { path = "../../../guest-libs/ruint/", package = "ruint", default-features = false }
+openvm-keccak256.workspace = true
+openvm-sha2.workspace = true
+openvm-k256.workspace = true
+openvm-p256.workspace = true
+openvm-ruint.workspace = true
 hex = { version = "0.4.3", default-features = false, features = ["alloc"] }
 serde = "1.0"
 
 [features]
 default = []
-
-[profile.release]
-panic = "abort"
-lto = "thin"    # faster compile time
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/kitchen-sink/elf/openvm-kitchen-sink-program.elf b/benchmarks/guest/kitchen-sink/elf/openvm-kitchen-sink-program.elf
index 85f3509fa5..fb59df5d0a 100755
Binary files a/benchmarks/guest/kitchen-sink/elf/openvm-kitchen-sink-program.elf and b/benchmarks/guest/kitchen-sink/elf/openvm-kitchen-sink-program.elf differ
diff --git a/benchmarks/guest/pairing/Cargo.toml b/benchmarks/guest/pairing/Cargo.toml
index dfd73f5eb6..f616b19399 100644
--- a/benchmarks/guest/pairing/Cargo.toml
+++ b/benchmarks/guest/pairing/Cargo.toml
@@ -1,30 +1,16 @@
-[workspace]
 [package]
 name = "openvm-pairing-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
-openvm-algebra-guest = { path = "../../../extensions/algebra/guest", default-features = false }
-openvm-ecc-guest = { path = "../../../extensions/ecc/guest", default-features = false }
-openvm-pairing = { path = "../../../guest-libs/pairing/", default-features = false, features = [
-    "bn254",
-] }
-openvm-pairing-guest = { path = "../../../extensions/pairing/guest", default-features = false, features = [
+openvm = { workspace = true, features = ["std"] }
+openvm-algebra-guest.workspace = true
+openvm-ecc-guest.workspace = true
+openvm-pairing = { workspace = true, features = [
     "bn254",
 ] }
 hex = { version = "0.4.3", default-features = false, features = ["alloc"] }
 
 [features]
 default = []
-halo2curves = ["openvm-pairing-guest/halo2curves"]
-
-[profile.release]
-panic = "abort"
-lto = "thin"    # faster compile time
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/pairing/elf/openvm-pairing-program.elf b/benchmarks/guest/pairing/elf/openvm-pairing-program.elf
index bf30d5a003..69c3cd0106 100755
Binary files a/benchmarks/guest/pairing/elf/openvm-pairing-program.elf and b/benchmarks/guest/pairing/elf/openvm-pairing-program.elf differ
diff --git a/benchmarks/guest/pairing/src/main.rs b/benchmarks/guest/pairing/src/main.rs
index 2b30297248..09e4a259dd 100644
--- a/benchmarks/guest/pairing/src/main.rs
+++ b/benchmarks/guest/pairing/src/main.rs
@@ -1,9 +1,9 @@
 use openvm_algebra_guest::IntMod;
 use openvm_ecc_guest::AffinePoint;
 #[allow(unused_imports)]
-use {
-    openvm_pairing::bn254::{Bn254, Bn254G1Affine, Fp, Fp2},
-    openvm_pairing_guest::pairing::PairingCheck,
+use openvm_pairing::{
+    bn254::{Bn254, Bn254G1Affine, Fp, Fp2},
+    PairingCheck,
 };
 
 openvm::init!();
diff --git a/benchmarks/guest/quicksort/Cargo.toml b/benchmarks/guest/quicksort/Cargo.toml
index 8556264be0..729208640f 100644
--- a/benchmarks/guest/quicksort/Cargo.toml
+++ b/benchmarks/guest/quicksort/Cargo.toml
@@ -1,16 +1,10 @@
-[workspace]
 [package]
 name = "openvm-quicksort-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/quicksort/elf/openvm-quicksort-program.elf b/benchmarks/guest/quicksort/elf/openvm-quicksort-program.elf
index 54af6272d6..0e7d6e6143 100755
Binary files a/benchmarks/guest/quicksort/elf/openvm-quicksort-program.elf and b/benchmarks/guest/quicksort/elf/openvm-quicksort-program.elf differ
diff --git a/benchmarks/guest/quicksort/src/main.rs b/benchmarks/guest/quicksort/src/main.rs
index 30218cf40e..a6579306c7 100644
--- a/benchmarks/guest/quicksort/src/main.rs
+++ b/benchmarks/guest/quicksort/src/main.rs
@@ -1,7 +1,7 @@
 use core::hint::black_box;
 use openvm as _;
 
-const ARRAY_SIZE: usize = 1_000;
+const ARRAY_SIZE: usize = 3_500;
 
 fn quicksort<T: Ord>(arr: &mut [T]) {
     if arr.len() <= 1 {
diff --git a/benchmarks/guest/regex/Cargo.toml b/benchmarks/guest/regex/Cargo.toml
index 40831a592d..1ffb3fb440 100644
--- a/benchmarks/guest/regex/Cargo.toml
+++ b/benchmarks/guest/regex/Cargo.toml
@@ -1,18 +1,12 @@
-[workspace]
 [package]
-version = "0.1.0"
 name = "openvm-regex-program"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
-openvm-keccak256 = { path = "../../../guest-libs/keccak256/" }
+openvm = { workspace = true, features = ["std"] }
+openvm-keccak256.workspace = true
 regex = { version = "1.11.1", default-features = false }
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/regex/elf/openvm-regex-program.elf b/benchmarks/guest/regex/elf/openvm-regex-program.elf
index 6e6074e079..05388a8223 100755
Binary files a/benchmarks/guest/regex/elf/openvm-regex-program.elf and b/benchmarks/guest/regex/elf/openvm-regex-program.elf differ
diff --git a/benchmarks/guest/revm_snailtracer/Cargo.toml b/benchmarks/guest/revm_snailtracer/Cargo.toml
index e37595eb36..6f0a5176b5 100644
--- a/benchmarks/guest/revm_snailtracer/Cargo.toml
+++ b/benchmarks/guest/revm_snailtracer/Cargo.toml
@@ -1,11 +1,10 @@
-[workspace]
 [package]
 name = "openvm-revm-snailtracer"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 revm = { version = "18.0.0", default-features = false }
 # revm does not re-export this feature so we enable it here
 derive_more = { version = "1.0.0", default-features = false, features = [
@@ -15,8 +14,3 @@ derive_more = { version = "1.0.0", default-features = false, features = [
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/revm_snailtracer/elf/openvm-revm-snailtracer.elf b/benchmarks/guest/revm_snailtracer/elf/openvm-revm-snailtracer.elf
index 9255290412..26e1d4c515 100755
Binary files a/benchmarks/guest/revm_snailtracer/elf/openvm-revm-snailtracer.elf and b/benchmarks/guest/revm_snailtracer/elf/openvm-revm-snailtracer.elf differ
diff --git a/benchmarks/guest/revm_transfer/Cargo.toml b/benchmarks/guest/revm_transfer/Cargo.toml
index eea02dd155..c7dc11bdec 100644
--- a/benchmarks/guest/revm_transfer/Cargo.toml
+++ b/benchmarks/guest/revm_transfer/Cargo.toml
@@ -1,13 +1,12 @@
-[workspace]
 [package]
 name = "openvm-revm-transfer"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
+openvm = { workspace = true, features = ["std"] }
+openvm-keccak256.workspace = true
 revm = { version = "18.0.0", default-features = false }
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
-openvm-keccak256-guest = { path = "../../../extensions/keccak256/guest", default-features = false }
 tracing = { version = "0.1.40", default-features = false }
 alloy-primitives = { version = "0.8.10", default-features = false, features = [
     "native-keccak",
@@ -20,8 +19,3 @@ derive_more = { version = "1.0.0", default-features = false, features = [
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/revm_transfer/elf/openvm-revm-transfer.elf b/benchmarks/guest/revm_transfer/elf/openvm-revm-transfer.elf
index 0aa22396e6..96f7d328e9 100755
Binary files a/benchmarks/guest/revm_transfer/elf/openvm-revm-transfer.elf and b/benchmarks/guest/revm_transfer/elf/openvm-revm-transfer.elf differ
diff --git a/benchmarks/guest/revm_transfer/src/main.rs b/benchmarks/guest/revm_transfer/src/main.rs
index ff725efca3..88189cc3cd 100644
--- a/benchmarks/guest/revm_transfer/src/main.rs
+++ b/benchmarks/guest/revm_transfer/src/main.rs
@@ -2,7 +2,7 @@
 //! We run 100 transfers to take the average
 use alloy_primitives::{address, TxKind, U256};
 #[allow(unused_imports, clippy::single_component_path_imports)]
-use openvm_keccak256_guest; // export native keccak
+use openvm_keccak256; // export native keccak
 use revm::{db::BenchmarkDB, primitives::Bytecode, Evm};
 
 // Necessary so the linker doesn't skip importing openvm crate
diff --git a/benchmarks/guest/rkyv/Cargo.toml b/benchmarks/guest/rkyv/Cargo.toml
index c061e59e0f..ee61726cca 100644
--- a/benchmarks/guest/rkyv/Cargo.toml
+++ b/benchmarks/guest/rkyv/Cargo.toml
@@ -1,11 +1,10 @@
-[workspace]
 [package]
 name = "openvm-rkyv-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
+openvm = { workspace = true, features = ["std"] }
 rand = { version = "0.8.5", default-features = false }
 rand_pcg = "0.3.1"
 rkyv = { version = "0.8.8", default-features = false, features = [
@@ -15,8 +14,3 @@ rkyv = { version = "0.8.8", default-features = false, features = [
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/rkyv/elf/openvm-rkyv-program.elf b/benchmarks/guest/rkyv/elf/openvm-rkyv-program.elf
index 528106e233..f2b7f8d95d 100755
Binary files a/benchmarks/guest/rkyv/elf/openvm-rkyv-program.elf and b/benchmarks/guest/rkyv/elf/openvm-rkyv-program.elf differ
diff --git a/benchmarks/guest/sha256/Cargo.toml b/benchmarks/guest/sha256/Cargo.toml
index 1d5491f35a..4b711f2589 100644
--- a/benchmarks/guest/sha256/Cargo.toml
+++ b/benchmarks/guest/sha256/Cargo.toml
@@ -1,17 +1,11 @@
-[workspace]
 [package]
 name = "openvm-sha256-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
-openvm-sha2 = { path = "../../../guest-libs/sha2" }
+openvm = { workspace = true, features = ["std"] }
+openvm-sha2.workspace = true
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/sha256/elf/openvm-sha256-program.elf b/benchmarks/guest/sha256/elf/openvm-sha256-program.elf
index 9524e8f552..2c03e2dad6 100755
Binary files a/benchmarks/guest/sha256/elf/openvm-sha256-program.elf and b/benchmarks/guest/sha256/elf/openvm-sha256-program.elf differ
diff --git a/benchmarks/guest/sha256/src/main.rs b/benchmarks/guest/sha256/src/main.rs
index 0178771d09..fc0b3fab78 100644
--- a/benchmarks/guest/sha256/src/main.rs
+++ b/benchmarks/guest/sha256/src/main.rs
@@ -3,7 +3,7 @@ use openvm as _;
 
 use openvm_sha2::sha256;
 
-const INPUT_LENGTH_BYTES: usize = 100 * 1024; // 100 KB
+const INPUT_LENGTH_BYTES: usize = 384 * 1024;
 
 pub fn main() {
     let mut input = Vec::with_capacity(INPUT_LENGTH_BYTES);
diff --git a/benchmarks/guest/sha256_iter/Cargo.toml b/benchmarks/guest/sha256_iter/Cargo.toml
index 8e0273858a..7934b46f66 100644
--- a/benchmarks/guest/sha256_iter/Cargo.toml
+++ b/benchmarks/guest/sha256_iter/Cargo.toml
@@ -1,17 +1,11 @@
-[workspace]
 [package]
 name = "openvm-sha256-iter-program"
-version = "0.0.0"
-edition = "2021"
+version.workspace = true
+edition.workspace = true
 
 [dependencies]
-openvm = { path = "../../../crates/toolchain/openvm", features = ["std"] }
-openvm-sha2 = { path = "../../../guest-libs/sha2" }
+openvm = { workspace = true, features = ["std"] }
+openvm-sha2.workspace = true
 
 [features]
 default = []
-
-[profile.profiling]
-inherits = "release"
-debug = 2
-strip = false
diff --git a/benchmarks/guest/sha256_iter/elf/openvm-sha256-iter-program.elf b/benchmarks/guest/sha256_iter/elf/openvm-sha256-iter-program.elf
index 95b469ece5..677d9a3b7a 100755
Binary files a/benchmarks/guest/sha256_iter/elf/openvm-sha256-iter-program.elf and b/benchmarks/guest/sha256_iter/elf/openvm-sha256-iter-program.elf differ
diff --git a/benchmarks/guest/sha256_iter/src/main.rs b/benchmarks/guest/sha256_iter/src/main.rs
index 0b495a58a8..aea8b723e9 100644
--- a/benchmarks/guest/sha256_iter/src/main.rs
+++ b/benchmarks/guest/sha256_iter/src/main.rs
@@ -1,13 +1,13 @@
 use core::hint::black_box;
-use openvm as _;
 
+use openvm as _;
 use openvm_sha2::sha256;
 
-const ITERATIONS: usize = 20_000;
+const ITERATIONS: usize = 150_000;
 
 pub fn main() {
     // Initialize with hash of an empty vector
-    let mut hash = black_box(sha256(&vec![]));
+    let mut hash = black_box(sha256(&[]));
 
     // Iteratively apply sha256
     for _ in 0..ITERATIONS {
diff --git a/benchmarks/prove/Cargo.toml b/benchmarks/prove/Cargo.toml
index 9e745d3d80..88f0784e95 100644
--- a/benchmarks/prove/Cargo.toml
+++ b/benchmarks/prove/Cargo.toml
@@ -10,20 +10,11 @@ license.workspace = true
 [dependencies]
 openvm-benchmarks-utils.workspace = true
 openvm-circuit.workspace = true
+openvm-continuations.workspace = true
 openvm-sdk.workspace = true
 openvm-stark-backend.workspace = true
 openvm-stark-sdk.workspace = true
 openvm-transpiler.workspace = true
-openvm-rv32im-circuit.workspace = true
-openvm-rv32im-transpiler.workspace = true
-openvm-keccak256-circuit.workspace = true
-openvm-keccak256-transpiler.workspace = true
-openvm-algebra-circuit.workspace = true
-openvm-algebra-transpiler.workspace = true
-openvm-ecc-circuit.workspace = true
-openvm-ecc-transpiler.workspace = true
-openvm-pairing-circuit.workspace = true
-openvm-pairing-guest.workspace = true
 openvm-native-circuit.workspace = true
 openvm-native-compiler.workspace = true
 openvm-native-recursion = { workspace = true, features = ["test-utils"] }
@@ -34,19 +25,20 @@ tokio = { version = "1.43.1", features = ["rt", "rt-multi-thread", "macros"] }
 rand_chacha = { version = "0.3", default-features = false }
 k256 = { workspace = true, features = ["ecdsa"] }
 tiny-keccak.workspace = true
-derive-new.workspace = true
 derive_more = { workspace = true, features = ["from"] }
-num-bigint = { workspace = true, features = ["std", "serde"] }
-serde.workspace = true
+rand.workspace = true
 tracing.workspace = true
+metrics.workspace = true
 
 [dev-dependencies]
 
 [features]
-default = ["parallel", "jemalloc", "bench-metrics"]
-bench-metrics = ["openvm-sdk/bench-metrics"]
-profiling = ["openvm-sdk/profiling"]
-aggregation = []                                    # runs leaf aggregation benchmarks
+default = ["parallel", "jemalloc", "metrics", "evm"]
+metrics = ["openvm-sdk/metrics"]
+perf-metrics = ["openvm-sdk/perf-metrics", "metrics"]
+stark-debug = ["openvm-sdk/stark-debug"]
+# runs leaf aggregation benchmarks:
+aggregation = []
 evm = ["openvm-sdk/evm-verify"]
 parallel = ["openvm-sdk/parallel"]
 mimalloc = ["openvm-sdk/mimalloc"]
@@ -55,7 +47,7 @@ jemalloc-prof = ["openvm-sdk/jemalloc-prof"]
 nightly-features = ["openvm-sdk/nightly-features"]
 
 [package.metadata.cargo-shear]
-ignored = ["derive_more"]
+ignored = ["derive_more", "rand"]
 
 [[bin]]
 name = "fib_e2e"
diff --git a/benchmarks/prove/src/bin/base64_json.rs b/benchmarks/prove/src/bin/base64_json.rs
index ed366e51ca..dc92fb6b85 100644
--- a/benchmarks/prove/src/bin/base64_json.rs
+++ b/benchmarks/prove/src/bin/base64_json.rs
@@ -1,34 +1,28 @@
 use clap::Parser;
 use eyre::Result;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::instructions::exe::VmExe;
-use openvm_keccak256_circuit::Keccak256Rv32Config;
-use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+use openvm_sdk::{
+    config::{SdkVmConfig, SdkVmCpuBuilder},
+    StdIn,
 };
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::{bench::run_with_metric_collection, p3_baby_bear::BabyBear};
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+use openvm_stark_sdk::bench::run_with_metric_collection;
 
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
 
-    let config = Keccak256Rv32Config::default();
+    let config = SdkVmConfig::from_toml(include_str!("../../../guest/base64_json/openvm.toml"))?
+        .app_vm_config;
     let elf = args.build_bench_program("base64_json", &config, None)?;
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension)
-            .with_extension(Keccak256TranspilerExtension),
-    )?;
 
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
         let data = include_str!("../../../guest/base64_json/json_payload_encoded.txt");
 
         let fe_bytes = data.to_owned().into_bytes();
-        args.bench_from_exe("base64_json", config, exe, StdIn::from_bytes(&fe_bytes))
+        args.bench_from_exe::<SdkVmCpuBuilder, _>(
+            "base64_json",
+            config,
+            elf,
+            StdIn::from_bytes(&fe_bytes),
+        )
     })
 }
diff --git a/benchmarks/prove/src/bin/bincode.rs b/benchmarks/prove/src/bin/bincode.rs
index 3cc419c1e1..d4e2606af3 100644
--- a/benchmarks/prove/src/bin/bincode.rs
+++ b/benchmarks/prove/src/bin/bincode.rs
@@ -1,30 +1,21 @@
 use clap::Parser;
 use eyre::Result;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::instructions::exe::VmExe;
-use openvm_rv32im_circuit::Rv32ImConfig;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+use openvm_sdk::{
+    config::{SdkVmConfig, SdkVmCpuBuilder},
+    StdIn,
 };
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::{bench::run_with_metric_collection, p3_baby_bear::BabyBear};
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+use openvm_stark_sdk::bench::run_with_metric_collection;
 
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
 
-    let config = Rv32ImConfig::default();
+    let config =
+        SdkVmConfig::from_toml(include_str!("../../../guest/bincode/openvm.toml"))?.app_vm_config;
     let elf = args.build_bench_program("bincode", &config, None)?;
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension),
-    )?;
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
         let file_data = include_bytes!("../../../guest/bincode/minecraft_savedata.bin");
         let stdin = StdIn::from_bytes(file_data);
-        args.bench_from_exe("bincode", config, exe, stdin)
+        args.bench_from_exe::<SdkVmCpuBuilder, _>("bincode", config, elf, stdin)
     })
 }
diff --git a/benchmarks/prove/src/bin/ecrecover.rs b/benchmarks/prove/src/bin/ecrecover.rs
index 23fe2c82af..6ae7ecaf0c 100644
--- a/benchmarks/prove/src/bin/ecrecover.rs
+++ b/benchmarks/prove/src/bin/ecrecover.rs
@@ -1,35 +1,11 @@
 use clap::Parser;
 use eyre::Result;
 use k256::ecdsa::{SigningKey, VerifyingKey};
-use num_bigint::BigUint;
-use openvm_algebra_circuit::{
-    ModularExtension, ModularExtensionExecutor, ModularExtensionPeriphery,
-};
-use openvm_algebra_transpiler::ModularTranspilerExtension;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::{
-    arch::{instructions::exe::VmExe, InitFileGenerator, SystemConfig},
-    derive::VmConfig,
-};
-use openvm_ecc_circuit::{
-    CurveConfig, WeierstrassExtension, WeierstrassExtensionExecutor, WeierstrassExtensionPeriphery,
-    SECP256K1_CONFIG,
-};
-use openvm_ecc_transpiler::EccTranspilerExtension;
-use openvm_keccak256_circuit::{Keccak256, Keccak256Executor, Keccak256Periphery};
-use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
-use openvm_rv32im_circuit::{
-    Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32Io, Rv32IoExecutor, Rv32IoPeriphery, Rv32M,
-    Rv32MExecutor, Rv32MPeriphery,
-};
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
-};
-use openvm_stark_backend::p3_field::{FieldAlgebra, PrimeField32};
+use openvm_sdk::config::{SdkVmConfig, SdkVmCpuBuilder};
+use openvm_stark_backend::p3_field::FieldAlgebra;
 use openvm_stark_sdk::{bench::run_with_metric_collection, p3_baby_bear::BabyBear};
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
 use rand_chacha::{rand_core::SeedableRng, ChaCha8Rng};
-use serde::{Deserialize, Serialize};
 use tiny_keccak::{Hasher, Keccak};
 
 fn make_input(signing_key: &SigningKey, msg: &[u8]) -> Vec<BabyBear> {
@@ -48,68 +24,12 @@ fn make_input(signing_key: &SigningKey, msg: &[u8]) -> Vec<BabyBear> {
     input.into_iter().map(BabyBear::from_canonical_u8).collect()
 }
 
-#[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
-pub struct Rv32ImEcRecoverConfig {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub base: Rv32I,
-    #[extension]
-    pub mul: Rv32M,
-    #[extension]
-    pub io: Rv32Io,
-    #[extension]
-    pub modular: ModularExtension,
-    #[extension]
-    pub keccak: Keccak256,
-    #[extension]
-    pub weierstrass: WeierstrassExtension,
-}
-
-impl InitFileGenerator for Rv32ImEcRecoverConfig {
-    fn generate_init_file_contents(&self) -> Option<String> {
-        Some(format!(
-            "// This file is automatically generated by cargo openvm. Do not rename or edit.\n{}\n{}\n",
-            self.modular.generate_moduli_init(),
-            self.weierstrass.generate_sw_init()
-        ))
-    }
-}
-
-impl Rv32ImEcRecoverConfig {
-    pub fn for_curves(curves: Vec<CurveConfig>) -> Self {
-        let primes: Vec<BigUint> = curves
-            .iter()
-            .flat_map(|c| [c.modulus.clone(), c.scalar.clone()])
-            .collect();
-        Self {
-            system: SystemConfig::default().with_continuations(),
-            base: Default::default(),
-            mul: Default::default(),
-            io: Default::default(),
-            modular: ModularExtension::new(primes),
-            keccak: Default::default(),
-            weierstrass: WeierstrassExtension::new(curves),
-        }
-    }
-}
-
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
 
-    let config = Rv32ImEcRecoverConfig::for_curves(vec![SECP256K1_CONFIG.clone()]);
-
+    let config =
+        SdkVmConfig::from_toml(include_str!("../../../guest/ecrecover/openvm.toml"))?.app_vm_config;
     let elf = args.build_bench_program("ecrecover", &config, None)?;
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension)
-            .with_extension(Keccak256TranspilerExtension)
-            .with_extension(ModularTranspilerExtension)
-            .with_extension(EccTranspilerExtension),
-    )?;
 
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
         let mut rng = ChaCha8Rng::seed_from_u64(12345);
@@ -135,6 +55,11 @@ fn main() -> Result<()> {
                 .map(|s| make_input(&signing_key, s.as_bytes()))
                 .collect::<Vec<_>>(),
         );
-        args.bench_from_exe("ecrecover_program", config, exe, input_stream.into())
+        args.bench_from_exe::<SdkVmCpuBuilder, _>(
+            "ecrecover_program",
+            config,
+            elf,
+            input_stream.into(),
+        )
     })
 }
diff --git a/benchmarks/prove/src/bin/fib_e2e.rs b/benchmarks/prove/src/bin/fib_e2e.rs
index 41611d0970..1630dccb52 100644
--- a/benchmarks/prove/src/bin/fib_e2e.rs
+++ b/benchmarks/prove/src/bin/fib_e2e.rs
@@ -1,21 +1,9 @@
-use std::{path::PathBuf, sync::Arc};
-
 use clap::Parser;
 use eyre::Result;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::{instructions::exe::VmExe, DEFAULT_MAX_NUM_PUBLIC_VALUES};
-use openvm_native_recursion::halo2::utils::{CacheHalo2ParamsReader, DEFAULT_PARAMS_DIR};
-use openvm_rv32im_circuit::Rv32ImConfig;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
-};
-use openvm_sdk::{
-    commit::commit_app_exe, prover::EvmHalo2Prover, DefaultStaticVerifierPvHandler, Sdk, StdIn,
-};
-use openvm_stark_sdk::{
-    bench::run_with_metric_collection, config::baby_bear_poseidon2::BabyBearPoseidon2Engine,
-};
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+use openvm_circuit::arch::DEFAULT_MAX_NUM_PUBLIC_VALUES;
+use openvm_sdk::{config::SdkVmConfig, Sdk, StdIn};
+use openvm_stark_sdk::bench::run_with_metric_collection;
 
 const NUM_PUBLIC_VALUES: usize = DEFAULT_MAX_NUM_PUBLIC_VALUES;
 
@@ -26,49 +14,24 @@ async fn main() -> Result<()> {
     // Must be larger than RangeTupleCheckerAir.height == 524288
     let max_segment_length = args.max_segment_length.unwrap_or(1_000_000);
 
-    let app_config = args.app_config(Rv32ImConfig::with_public_values_and_segment_len(
-        NUM_PUBLIC_VALUES,
-        max_segment_length,
-    ));
-    let elf = args.build_bench_program("fibonacci", &app_config.app_vm_config, None)?;
+    let mut config =
+        SdkVmConfig::from_toml(include_str!("../../../guest/fibonacci/openvm.toml"))?.app_vm_config;
+    config.as_mut().segmentation_limits.max_trace_height = max_segment_length;
+    config.as_mut().num_public_values = NUM_PUBLIC_VALUES;
 
-    let agg_config = args.agg_config();
+    let elf = args.build_bench_program("fibonacci", &config, None)?;
+    let app_config = args.app_config(config);
 
-    let sdk = Sdk::new();
-    let halo2_params_reader = CacheHalo2ParamsReader::new(
-        args.kzg_params_dir
-            .clone()
-            .unwrap_or(PathBuf::from(DEFAULT_PARAMS_DIR)),
-    );
-    let app_pk = Arc::new(sdk.app_keygen(app_config)?);
-    let full_agg_pk = sdk.agg_keygen(
-        agg_config,
-        &halo2_params_reader,
-        &DefaultStaticVerifierPvHandler,
-    )?;
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension),
-    )?;
-    let app_committed_exe = commit_app_exe(app_pk.app_fri_params(), exe);
+    let sdk = Sdk::new(app_config)?;
 
     let n = 800_000u64;
     let mut stdin = StdIn::default();
     stdin.write(&n);
-    run_with_metric_collection("OUTPUT_PATH", || {
-        let mut e2e_prover = EvmHalo2Prover::<_, BabyBearPoseidon2Engine>::new(
-            &halo2_params_reader,
-            app_pk,
-            app_committed_exe,
-            full_agg_pk,
-            args.agg_tree_config,
-        );
-        e2e_prover.set_program_name("fib_e2e");
-        let _proof = e2e_prover.generate_proof_for_evm(stdin);
-    });
-
-    Ok(())
+    run_with_metric_collection("OUTPUT_PATH", || -> eyre::Result<_> {
+        let _proof = sdk
+            .evm_prover(elf)?
+            .with_program_name("fib_e2e")
+            .prove_evm(stdin)?;
+        Ok(())
+    })
 }
diff --git a/benchmarks/prove/src/bin/fibonacci.rs b/benchmarks/prove/src/bin/fibonacci.rs
index 1c886d8130..bfed141ff1 100644
--- a/benchmarks/prove/src/bin/fibonacci.rs
+++ b/benchmarks/prove/src/bin/fibonacci.rs
@@ -1,32 +1,23 @@
 use clap::Parser;
 use eyre::Result;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::instructions::exe::VmExe;
-use openvm_rv32im_circuit::Rv32ImConfig;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+use openvm_sdk::{
+    config::{SdkVmConfig, SdkVmCpuBuilder},
+    StdIn,
 };
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::{bench::run_with_metric_collection, p3_baby_bear::BabyBear};
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+use openvm_stark_sdk::bench::run_with_metric_collection;
 
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
 
-    let config = Rv32ImConfig::default();
+    let config =
+        SdkVmConfig::from_toml(include_str!("../../../guest/fibonacci/openvm.toml"))?.app_vm_config;
     let elf = args.build_bench_program("fibonacci", &config, None)?;
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension),
-    )?;
 
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
         let n = 100_000u64;
         let mut stdin = StdIn::default();
         stdin.write(&n);
-        args.bench_from_exe("fibonacci_program", config, exe, stdin)
+        args.bench_from_exe::<SdkVmCpuBuilder, _>("fibonacci_program", config, elf, stdin)
     })
 }
diff --git a/benchmarks/prove/src/bin/kitchen_sink.rs b/benchmarks/prove/src/bin/kitchen_sink.rs
index 3102c9e3fe..1bb1d30a98 100644
--- a/benchmarks/prove/src/bin/kitchen_sink.rs
+++ b/benchmarks/prove/src/bin/kitchen_sink.rs
@@ -1,104 +1,98 @@
-use std::{path::PathBuf, str::FromStr, sync::Arc};
+use std::sync::Arc;
 
 use clap::Parser;
 use eyre::Result;
-use num_bigint::BigUint;
-use openvm_algebra_circuit::{Fp2Extension, ModularExtension};
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::{instructions::exe::VmExe, SystemConfig};
-use openvm_ecc_circuit::{WeierstrassExtension, P256_CONFIG, SECP256K1_CONFIG};
-use openvm_native_recursion::halo2::utils::{CacheHalo2ParamsReader, DEFAULT_PARAMS_DIR};
-use openvm_pairing_circuit::{PairingCurve, PairingExtension};
-use openvm_pairing_guest::{
-    bls12_381::BLS12_381_COMPLEX_STRUCT_NAME, bn254::BN254_COMPLEX_STRUCT_NAME,
-};
+use openvm_circuit::arch::instructions::exe::VmExe;
+use openvm_continuations::verifier::leaf::types::LeafVmVerifierInput;
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder, NATIVE_MAX_TRACE_HEIGHTS};
 use openvm_sdk::{
-    commit::commit_app_exe, config::SdkVmConfig, prover::EvmHalo2Prover,
-    DefaultStaticVerifierPvHandler, Sdk, StdIn,
+    config::SdkVmConfig,
+    prover::vm::{new_local_prover, types::VmProvingKey},
+    Sdk, StdIn, F, SC,
 };
 use openvm_stark_sdk::{
     bench::run_with_metric_collection, config::baby_bear_poseidon2::BabyBearPoseidon2Engine,
 };
-use openvm_transpiler::FromElf;
+
+fn verify_native_max_trace_heights(
+    sdk: &Sdk,
+    app_exe: Arc<VmExe<F>>,
+    leaf_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
+    num_children_leaf: usize,
+) -> Result<()> {
+    let app_proof = sdk.app_prover(app_exe)?.prove(StdIn::default())?;
+    let leaf_inputs =
+        LeafVmVerifierInput::chunk_continuation_vm_proof(&app_proof, num_children_leaf);
+    let mut leaf_prover = new_local_prover::<BabyBearPoseidon2Engine, _>(
+        NativeCpuBuilder,
+        &leaf_vm_pk,
+        sdk.app_pk().leaf_committed_exe.exe.clone(),
+    )?;
+
+    for leaf_input in leaf_inputs {
+        let exe = leaf_prover.exe().clone();
+        let vm = &mut leaf_prover.vm;
+        let metered_ctx = vm.build_metered_ctx();
+        let (segments, _) = vm
+            .metered_interpreter(&exe)?
+            .execute_metered(leaf_input.write_to_stream(), metered_ctx)?;
+        assert_eq!(segments.len(), 1);
+        let estimated_trace_heights = &segments[0].trace_heights;
+        println!("estimated_trace_heights: {:?}", estimated_trace_heights);
+
+        // Tracegen without proving since leaf proofs take a while
+        let state = vm.create_initial_state(&exe, leaf_input.write_to_stream());
+        vm.transport_init_memory_to_device(&state.memory);
+        let mut interpreter = vm.preflight_interpreter(&exe)?;
+        let out = vm.execute_preflight(&mut interpreter, state, None, estimated_trace_heights)?;
+        let actual_trace_heights = vm
+            .generate_proving_ctx(out.system_records, out.record_arenas)?
+            .per_air
+            .into_iter()
+            .map(|(_, air_ctx)| air_ctx.main_trace_height())
+            .collect::<Vec<usize>>();
+        println!("actual_trace_heights: {:?}", actual_trace_heights);
+
+        actual_trace_heights
+            .iter()
+            .zip(NATIVE_MAX_TRACE_HEIGHTS)
+            .for_each(|(&actual, &expected)| {
+                assert!(
+                    actual <= (expected as usize),
+                    "Actual trace height {} exceeds expected height {}",
+                    actual,
+                    expected
+                );
+            });
+    }
+    Ok(())
+}
 
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
 
-    let bn_config = PairingCurve::Bn254.curve_config();
-    let bls_config = PairingCurve::Bls12_381.curve_config();
-    let vm_config = SdkVmConfig::builder()
-        .system(SystemConfig::default().with_continuations().into())
-        .rv32i(Default::default())
-        .rv32m(Default::default())
-        .io(Default::default())
-        .keccak(Default::default())
-        .sha256(Default::default())
-        .bigint(Default::default())
-        .modular(ModularExtension::new(vec![
-            BigUint::from_str("1000000000000000003").unwrap(),
-            SECP256K1_CONFIG.modulus.clone(),
-            SECP256K1_CONFIG.scalar.clone(),
-            P256_CONFIG.modulus.clone(),
-            P256_CONFIG.scalar.clone(),
-            bn_config.modulus.clone(),
-            bn_config.scalar.clone(),
-            bls_config.modulus.clone(),
-            bls_config.scalar.clone(),
-            BigUint::from(2u32).pow(61) - BigUint::from(1u32),
-            BigUint::from(7u32),
-        ]))
-        .fp2(Fp2Extension::new(vec![
-            (
-                BN254_COMPLEX_STRUCT_NAME.to_string(),
-                bn_config.modulus.clone(),
-            ),
-            (
-                BLS12_381_COMPLEX_STRUCT_NAME.to_string(),
-                bls_config.modulus.clone(),
-            ),
-        ]))
-        .ecc(WeierstrassExtension::new(vec![
-            SECP256K1_CONFIG.clone(),
-            P256_CONFIG.clone(),
-            bn_config.clone(),
-            bls_config.clone(),
-        ]))
-        .pairing(PairingExtension::new(vec![
-            PairingCurve::Bn254,
-            PairingCurve::Bls12_381,
-        ]))
-        .build();
-    let elf = args.build_bench_program("kitchen-sink", &vm_config, None)?;
-    let exe = VmExe::from_elf(elf, vm_config.transpiler())?;
-
-    let sdk = Sdk::new();
+    let vm_config =
+        SdkVmConfig::from_toml(include_str!("../../../guest/kitchen-sink/openvm.toml"))?
+            .app_vm_config;
     let app_config = args.app_config(vm_config.clone());
-    let app_pk = Arc::new(sdk.app_keygen(app_config)?);
-    let app_committed_exe = commit_app_exe(app_pk.app_fri_params(), exe);
+    let elf = args.build_bench_program("kitchen-sink", &vm_config, None)?;
+    let sdk = Sdk::new(app_config)?;
+    let exe = sdk.convert_to_exe(elf)?;
 
-    let agg_config = args.agg_config();
-    let halo2_params_reader = CacheHalo2ParamsReader::new(
-        args.kzg_params_dir
-            .clone()
-            .unwrap_or(PathBuf::from(DEFAULT_PARAMS_DIR)),
-    );
-    let full_agg_pk = sdk.agg_keygen(
-        agg_config,
-        &halo2_params_reader,
-        &DefaultStaticVerifierPvHandler,
+    let agg_pk = sdk.agg_pk();
+    // Verify that NATIVE_MAX_TRACE_HEIGHTS remains valid
+    verify_native_max_trace_heights(
+        &sdk,
+        exe.clone(),
+        agg_pk.leaf_vm_pk.clone(),
+        args.agg_tree_config.num_children_leaf,
     )?;
 
-    run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
-        let mut prover = EvmHalo2Prover::<_, BabyBearPoseidon2Engine>::new(
-            &halo2_params_reader,
-            app_pk,
-            app_committed_exe,
-            full_agg_pk,
-            args.agg_tree_config,
-        );
-        prover.set_program_name("kitchen_sink");
+    run_with_metric_collection("OUTPUT_PATH", || -> eyre::Result<()> {
+        let mut prover = sdk.evm_prover(exe)?.with_program_name("kitchen_sink");
         let stdin = StdIn::default();
-        let _proof = prover.generate_proof_for_evm(stdin);
+        let _proof = prover.prove_evm(stdin)?;
         Ok(())
     })
 }
diff --git a/benchmarks/prove/src/bin/pairing.rs b/benchmarks/prove/src/bin/pairing.rs
index 1db6d1b491..46302003ec 100644
--- a/benchmarks/prove/src/bin/pairing.rs
+++ b/benchmarks/prove/src/bin/pairing.rs
@@ -1,41 +1,20 @@
 use clap::Parser;
 use eyre::Result;
-use openvm_algebra_circuit::{Fp2Extension, ModularExtension};
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::SystemConfig;
-use openvm_ecc_circuit::WeierstrassExtension;
-use openvm_pairing_circuit::{PairingCurve, PairingExtension};
-use openvm_pairing_guest::bn254::{BN254_COMPLEX_STRUCT_NAME, BN254_MODULUS, BN254_ORDER};
-use openvm_sdk::{config::SdkVmConfig, Sdk, StdIn};
+use openvm_sdk::{
+    config::{SdkVmConfig, SdkVmCpuBuilder},
+    StdIn,
+};
 use openvm_stark_sdk::bench::run_with_metric_collection;
 
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
 
-    let vm_config = SdkVmConfig::builder()
-        .system(SystemConfig::default().with_continuations().into())
-        .rv32i(Default::default())
-        .rv32m(Default::default())
-        .io(Default::default())
-        .keccak(Default::default())
-        .modular(ModularExtension::new(vec![
-            BN254_MODULUS.clone(),
-            BN254_ORDER.clone(),
-        ]))
-        .fp2(Fp2Extension::new(vec![(
-            BN254_COMPLEX_STRUCT_NAME.to_string(),
-            BN254_MODULUS.clone(),
-        )]))
-        .ecc(WeierstrassExtension::new(vec![
-            PairingCurve::Bn254.curve_config()
-        ]))
-        .pairing(PairingExtension::new(vec![PairingCurve::Bn254]))
-        .build();
+    let vm_config =
+        SdkVmConfig::from_toml(include_str!("../../../guest/pairing/openvm.toml"))?.app_vm_config;
     let elf = args.build_bench_program("pairing", &vm_config, None)?;
-    let sdk = Sdk::new();
-    let exe = sdk.transpile(elf, vm_config.transpiler()).unwrap();
 
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
-        args.bench_from_exe("pairing", vm_config, exe, StdIn::default())
+        args.bench_from_exe::<SdkVmCpuBuilder, _>("pairing", vm_config, elf, StdIn::default())
     })
 }
diff --git a/benchmarks/prove/src/bin/regex.rs b/benchmarks/prove/src/bin/regex.rs
index d1de43dad5..c988450f58 100644
--- a/benchmarks/prove/src/bin/regex.rs
+++ b/benchmarks/prove/src/bin/regex.rs
@@ -1,33 +1,27 @@
 use clap::Parser;
 use eyre::Result;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::instructions::exe::VmExe;
-use openvm_keccak256_circuit::Keccak256Rv32Config;
-use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+use openvm_sdk::{
+    config::{SdkVmConfig, SdkVmCpuBuilder},
+    StdIn,
 };
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::{bench::run_with_metric_collection, p3_baby_bear::BabyBear};
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+use openvm_stark_sdk::bench::run_with_metric_collection;
 
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
 
-    let config = Keccak256Rv32Config::default();
+    let config =
+        SdkVmConfig::from_toml(include_str!("../../../guest/regex/openvm.toml"))?.app_vm_config;
     let elf = args.build_bench_program("regex", &config, None)?;
-    let exe = VmExe::from_elf(
-        elf.clone(),
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension)
-            .with_extension(Keccak256TranspilerExtension),
-    )?;
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
         let data = include_str!("../../../guest/regex/regex_email.txt");
 
         let fe_bytes = data.to_owned().into_bytes();
-        args.bench_from_exe("regex_program", config, exe, StdIn::from_bytes(&fe_bytes))
+        args.bench_from_exe::<SdkVmCpuBuilder, _>(
+            "regex_program",
+            config,
+            elf,
+            StdIn::from_bytes(&fe_bytes),
+        )
     })
 }
diff --git a/benchmarks/prove/src/bin/revm_transfer.rs b/benchmarks/prove/src/bin/revm_transfer.rs
index 1df994dc78..b59b4ce700 100644
--- a/benchmarks/prove/src/bin/revm_transfer.rs
+++ b/benchmarks/prove/src/bin/revm_transfer.rs
@@ -1,29 +1,23 @@
 use clap::Parser;
 use eyre::Result;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::instructions::exe::VmExe;
-use openvm_keccak256_circuit::Keccak256Rv32Config;
-use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+use openvm_sdk::{
+    config::{SdkVmConfig, SdkVmCpuBuilder},
+    StdIn,
 };
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::{bench::run_with_metric_collection, p3_baby_bear::BabyBear};
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+use openvm_stark_sdk::bench::run_with_metric_collection;
 
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
-    let config = Keccak256Rv32Config::default();
+    let config = SdkVmConfig::from_toml(include_str!("../../../guest/revm_transfer/openvm.toml"))?
+        .app_vm_config;
     let elf = args.build_bench_program("revm_transfer", &config, None)?;
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension)
-            .with_extension(Keccak256TranspilerExtension),
-    )?;
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
-        args.bench_from_exe("revm_100_transfers", config, exe, StdIn::default())
+        args.bench_from_exe::<SdkVmCpuBuilder, _>(
+            "revm_100_transfers",
+            config,
+            elf,
+            StdIn::default(),
+        )
     })
 }
diff --git a/benchmarks/prove/src/bin/rkyv.rs b/benchmarks/prove/src/bin/rkyv.rs
index 7bdf6ed920..d1e6fac6a3 100644
--- a/benchmarks/prove/src/bin/rkyv.rs
+++ b/benchmarks/prove/src/bin/rkyv.rs
@@ -1,31 +1,22 @@
 use clap::Parser;
 use eyre::Result;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::instructions::exe::VmExe;
-use openvm_rv32im_circuit::Rv32ImConfig;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+use openvm_sdk::{
+    config::{SdkVmConfig, SdkVmCpuBuilder},
+    StdIn,
 };
-use openvm_sdk::StdIn;
-use openvm_stark_sdk::{bench::run_with_metric_collection, p3_baby_bear::BabyBear};
-use openvm_transpiler::{transpiler::Transpiler, FromElf};
+use openvm_stark_sdk::bench::run_with_metric_collection;
 
 fn main() -> Result<()> {
     let args = BenchmarkCli::parse();
 
-    let config = Rv32ImConfig::default();
+    let config =
+        SdkVmConfig::from_toml(include_str!("../../../guest/rkyv/openvm.toml"))?.app_vm_config;
     let elf = args.build_bench_program("rkyv", &config, None)?;
-    let exe = VmExe::from_elf(
-        elf,
-        Transpiler::<BabyBear>::default()
-            .with_extension(Rv32ITranspilerExtension)
-            .with_extension(Rv32MTranspilerExtension)
-            .with_extension(Rv32IoTranspilerExtension),
-    )?;
 
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
         let file_data = include_bytes!("../../../guest/rkyv/minecraft_savedata.bin");
         let stdin = StdIn::from_bytes(file_data);
-        args.bench_from_exe("rkyv", config, exe, stdin)
+        args.bench_from_exe::<SdkVmCpuBuilder, _>("rkyv", config, elf, stdin)
     })
 }
diff --git a/benchmarks/prove/src/bin/verify_fibair.rs b/benchmarks/prove/src/bin/verify_fibair.rs
index 1d8d6072da..e60dec0d0f 100644
--- a/benchmarks/prove/src/bin/verify_fibair.rs
+++ b/benchmarks/prove/src/bin/verify_fibair.rs
@@ -1,23 +1,27 @@
+use std::sync::Arc;
+
 use clap::Parser;
 use eyre::Result;
 use openvm_benchmarks_prove::util::BenchmarkCli;
-use openvm_circuit::arch::DEFAULT_MAX_NUM_PUBLIC_VALUES;
-use openvm_native_circuit::NativeConfig;
+use openvm_circuit::arch::{
+    instructions::exe::VmExe, verify_single, SingleSegmentVmProver, DEFAULT_MAX_NUM_PUBLIC_VALUES,
+};
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder, NATIVE_MAX_TRACE_HEIGHTS};
 use openvm_native_compiler::conversion::CompilerOptions;
 use openvm_native_recursion::testing_utils::inner::build_verification_program;
 use openvm_sdk::{
     config::{AppConfig, DEFAULT_APP_LOG_BLOWUP, DEFAULT_LEAF_LOG_BLOWUP},
-    prover::AppProver,
-    Sdk,
+    keygen::AppProvingKey,
+    prover::vm::new_local_prover,
 };
 use openvm_stark_sdk::{
     bench::run_with_metric_collection,
-    collect_airs_and_inputs,
     config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
     dummy_airs::fib_air::chip::FibonacciChip,
     engine::StarkFriEngine,
     openvm_stark_backend::Chip,
 };
+use tracing::info_span;
 
 /// Benchmark of aggregation VM performance.
 /// Proofs:
@@ -37,8 +41,11 @@ fn main() -> Result<()> {
     run_with_metric_collection("OUTPUT_PATH", || -> Result<()> {
         // run_test tries to setup tracing, but it will be ignored since run_with_metric_collection
         // already sets it.
-        let (fib_air, fib_input) = collect_airs_and_inputs!(fib_chip);
-        let vdata = engine.run_test(fib_air, fib_input).unwrap();
+        let (fib_air, fib_ctx) = (
+            vec![fib_chip.air()],
+            vec![fib_chip.generate_proving_ctx(())],
+        );
+        let vdata = engine.run_test(fib_air, fib_ctx).unwrap();
         // Unlike other apps, this "app" does not have continuations enabled.
         let app_fri_params =
             FriParameters::standard_with_100_bits_conjectured_security(leaf_log_blowup);
@@ -47,6 +54,7 @@ fn main() -> Result<()> {
             app_fri_params.max_constraint_degree().min(7),
         );
         app_vm_config.system.profiling = args.profiling;
+        app_vm_config.system.max_constraint_degree = (1 << app_log_blowup) + 1;
 
         let compiler_options = CompilerOptions::default();
         let app_config = AppConfig {
@@ -56,14 +64,21 @@ fn main() -> Result<()> {
             compiler_options,
         };
         let (program, input_stream) = build_verification_program(vdata, compiler_options);
-        let sdk = Sdk::new();
-        let app_pk = sdk.app_keygen(app_config)?;
+        let app_pk = AppProvingKey::keygen(app_config)?;
         let app_vk = app_pk.get_app_vk();
-        let committed_exe = sdk.commit_app_exe(app_fri_params, program.into())?;
-        let prover = AppProver::<_, BabyBearPoseidon2Engine>::new(app_pk.app_vm_pk, committed_exe)
-            .with_program_name("verify_fibair");
-        let proof = prover.generate_app_proof_without_continuations(input_stream.into());
-        sdk.verify_app_proof_without_continuations(&app_vk, &proof)?;
+        let exe = Arc::new(VmExe::new(program));
+        let mut prover = new_local_prover::<BabyBearPoseidon2Engine, _>(
+            NativeCpuBuilder,
+            &app_pk.app_vm_pk,
+            exe,
+        )?;
+        let proof = info_span!("verify_fibair", group = "verify_fibair").in_scope(|| {
+            #[cfg(feature = "metrics")]
+            metrics::counter!("fri.log_blowup")
+                .absolute(prover.vm.engine.fri_params().log_blowup as u64);
+            SingleSegmentVmProver::prove(&mut prover, input_stream, NATIVE_MAX_TRACE_HEIGHTS)
+        })?;
+        verify_single(&prover.vm.engine, &app_vk.vk, &proof)?;
         Ok(())
     })?;
     Ok(())
diff --git a/benchmarks/prove/src/util.rs b/benchmarks/prove/src/util.rs
index b3c17ead85..dafe30876c 100644
--- a/benchmarks/prove/src/util.rs
+++ b/benchmarks/prove/src/util.rs
@@ -1,30 +1,31 @@
-use std::{path::PathBuf, sync::Arc};
+use std::path::PathBuf;
 
 use clap::{command, Parser};
 use eyre::Result;
 use openvm_benchmarks_utils::{build_elf, get_programs_dir};
-use openvm_circuit::arch::{instructions::exe::VmExe, DefaultSegmentationStrategy, VmConfig};
-use openvm_native_circuit::NativeConfig;
+use openvm_circuit::arch::{
+    verify_single, Executor, MatrixRecordArena, MeteredExecutor, PreflightExecutor, SystemConfig,
+    VmBuilder, VmConfig, VmExecutionConfig,
+};
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder};
 use openvm_native_compiler::conversion::CompilerOptions;
 use openvm_sdk::{
-    commit::commit_app_exe,
     config::{
-        AggConfig, AggStarkConfig, AggregationTreeConfig, AppConfig, Halo2Config,
-        DEFAULT_APP_LOG_BLOWUP, DEFAULT_INTERNAL_LOG_BLOWUP, DEFAULT_LEAF_LOG_BLOWUP,
-        DEFAULT_ROOT_LOG_BLOWUP,
+        AggregationConfig, AggregationTreeConfig, AppConfig, Halo2Config, TranspilerConfig,
+        DEFAULT_APP_LOG_BLOWUP, DEFAULT_HALO2_VERIFIER_K, DEFAULT_INTERNAL_LOG_BLOWUP,
+        DEFAULT_LEAF_LOG_BLOWUP, DEFAULT_ROOT_LOG_BLOWUP,
     },
-    keygen::{leaf_keygen, AppProvingKey},
-    prover::{vm::local::VmLocalProver, AppProver, LeafProvingController},
-    Sdk, StdIn,
+    keygen::_leaf_keygen,
+    prover::{verify_app_proof, vm::new_local_prover, LeafProvingController},
+    types::ExecutableFormat,
+    GenericSdk, StdIn,
 };
-use openvm_stark_backend::utils::metrics_span;
 use openvm_stark_sdk::{
     config::{
         baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
         FriParameters,
     },
     engine::StarkFriEngine,
-    openvm_stark_backend::Chip,
     p3_baby_bear::BabyBear,
 };
 use openvm_transpiler::elf::Elf;
@@ -61,9 +62,13 @@ pub struct BenchmarkCli {
     #[arg(long)]
     pub kzg_params_dir: Option<PathBuf>,
 
-    /// Max segment length for continuations
-    #[arg(short, long, alias = "max_segment_length")]
-    pub max_segment_length: Option<usize>,
+    /// Max trace height per chip in segment for continuations
+    #[arg(long, alias = "max_segment_length")]
+    pub max_segment_length: Option<u32>,
+
+    /// Max cells per chip in segment for continuations
+    #[arg(long)]
+    pub segment_max_cells: Option<usize>,
 
     /// Controls the arity (num_children) of the aggregation tree
     #[command(flatten)]
@@ -75,17 +80,20 @@ pub struct BenchmarkCli {
 }
 
 impl BenchmarkCli {
-    pub fn app_config<VC: VmConfig<BabyBear>>(&self, mut app_vm_config: VC) -> AppConfig<VC> {
+    pub fn app_config<VC>(&self, mut app_vm_config: VC) -> AppConfig<VC>
+    where
+        VC: AsMut<SystemConfig>,
+    {
         let app_log_blowup = self.app_log_blowup.unwrap_or(DEFAULT_APP_LOG_BLOWUP);
         let leaf_log_blowup = self.leaf_log_blowup.unwrap_or(DEFAULT_LEAF_LOG_BLOWUP);
 
-        app_vm_config.system_mut().profiling = self.profiling;
-        if let Some(max_segment_length) = self.max_segment_length {
-            app_vm_config
-                .system_mut()
-                .set_segmentation_strategy(Arc::new(
-                    DefaultSegmentationStrategy::new_with_max_segment_len(max_segment_length),
-                ));
+        app_vm_config.as_mut().profiling = self.profiling;
+        app_vm_config.as_mut().max_constraint_degree = (1 << app_log_blowup) + 1;
+        if let Some(max_height) = self.max_segment_length {
+            app_vm_config.as_mut().segmentation_limits.max_trace_height = max_height;
+        }
+        if let Some(max_cells) = self.segment_max_cells {
+            app_vm_config.as_mut().segmentation_limits.max_cells = max_cells;
         }
         AppConfig {
             app_fri_params: FriParameters::standard_with_100_bits_conjectured_security(
@@ -104,7 +112,7 @@ impl BenchmarkCli {
         }
     }
 
-    pub fn agg_config(&self) -> AggConfig {
+    pub fn agg_config(&self) -> AggregationConfig {
         let leaf_log_blowup = self.leaf_log_blowup.unwrap_or(DEFAULT_LEAF_LOG_BLOWUP);
         let internal_log_blowup = self
             .internal_log_blowup
@@ -115,24 +123,25 @@ impl BenchmarkCli {
             [leaf_log_blowup, internal_log_blowup, root_log_blowup]
                 .map(FriParameters::standard_with_100_bits_conjectured_security);
 
-        AggConfig {
-            agg_stark_config: AggStarkConfig {
-                leaf_fri_params,
-                internal_fri_params,
-                root_fri_params,
-                profiling: self.profiling,
-                compiler_options: CompilerOptions {
-                    enable_cycle_tracker: self.profiling,
-                    ..Default::default()
-                },
-                root_max_constraint_degree: root_fri_params.max_constraint_degree(),
+        AggregationConfig {
+            leaf_fri_params,
+            internal_fri_params,
+            root_fri_params,
+            profiling: self.profiling,
+            compiler_options: CompilerOptions {
+                enable_cycle_tracker: self.profiling,
                 ..Default::default()
             },
-            halo2_config: Halo2Config {
-                verifier_k: self.halo2_outer_k.unwrap_or(23),
-                wrapper_k: self.halo2_wrapper_k,
-                profiling: self.profiling,
-            },
+            root_max_constraint_degree: root_fri_params.max_constraint_degree(),
+            ..Default::default()
+        }
+    }
+
+    pub fn halo2_config(&self) -> Halo2Config {
+        Halo2Config {
+            verifier_k: self.halo2_outer_k.unwrap_or(DEFAULT_HALO2_VERIFIER_K),
+            wrapper_k: self.halo2_wrapper_k,
+            profiling: self.profiling,
         }
     }
 
@@ -143,7 +152,7 @@ impl BenchmarkCli {
         init_file_name: Option<&str>,
     ) -> Result<Elf>
     where
-        VC: VmConfig<F>,
+        VC: VmConfig<SC>,
     {
         let profile = if self.profiling {
             "profiling"
@@ -156,20 +165,23 @@ impl BenchmarkCli {
         build_elf(&manifest_dir, profile)
     }
 
-    pub fn bench_from_exe<VC>(
+    pub fn bench_from_exe<VB, VC>(
         &self,
         bench_name: impl ToString,
         vm_config: VC,
-        exe: impl Into<VmExe<F>>,
+        exe: impl Into<ExecutableFormat>,
         input_stream: StdIn,
     ) -> Result<()>
     where
-        VC: VmConfig<F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
+        VB: VmBuilder<BabyBearPoseidon2Engine, VmConfig = VC, RecordArena = MatrixRecordArena<F>>
+            + Clone
+            + Default,
+        VC: VmExecutionConfig<F> + VmConfig<SC> + TranspilerConfig<F>,
+        <VC as VmExecutionConfig<F>>::Executor:
+            Executor<F> + MeteredExecutor<F> + PreflightExecutor<F>,
     {
         let app_config = self.app_config(vm_config);
-        bench_from_exe::<VC, BabyBearPoseidon2Engine>(
+        bench_from_exe::<BabyBearPoseidon2Engine, VB, NativeCpuBuilder>(
             bench_name,
             app_config,
             exe,
@@ -177,7 +189,7 @@ impl BenchmarkCli {
             #[cfg(not(feature = "aggregation"))]
             None,
             #[cfg(feature = "aggregation")]
-            Some(self.agg_config().agg_stark_config.leaf_vm_config()),
+            Some(self.agg_config().leaf_vm_config()),
         )
     }
 }
@@ -190,51 +202,50 @@ impl BenchmarkCli {
 /// 6. Verify STARK proofs.
 ///
 /// Returns the data necessary for proof aggregation.
-pub fn bench_from_exe<VC, E: StarkFriEngine<SC>>(
+pub fn bench_from_exe<E, VB, NativeBuilder>(
     bench_name: impl ToString,
-    app_config: AppConfig<VC>,
-    exe: impl Into<VmExe<F>>,
+    app_config: AppConfig<VB::VmConfig>,
+    exe: impl Into<ExecutableFormat>,
     input_stream: StdIn,
     leaf_vm_config: Option<NativeConfig>,
 ) -> Result<()>
 where
-    VC: VmConfig<F>,
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
+    E: StarkFriEngine<SC = SC>,
+    VB: VmBuilder<E> + Clone + Default,
+    VB::VmConfig: TranspilerConfig<F>,
+    <VB::VmConfig as VmExecutionConfig<F>>::Executor:
+        Executor<F> + MeteredExecutor<F> + PreflightExecutor<F, VB::RecordArena>,
+    NativeBuilder: VmBuilder<E, VmConfig = NativeConfig> + Clone + Default,
+    <NativeConfig as VmExecutionConfig<F>>::Executor:
+        PreflightExecutor<F, <NativeBuilder as VmBuilder<E>>::RecordArena>,
 {
     let bench_name = bench_name.to_string();
+    let sdk = GenericSdk::<E, VB, NativeBuilder>::new(app_config.clone())?;
     // 1. Generate proving key from config.
-    let app_pk = info_span!("keygen", group = &bench_name).in_scope(|| {
-        metrics_span("keygen_time_ms", || {
-            AppProvingKey::keygen(app_config.clone())
-        })
-    });
-    // 2. Commit to the exe by generating cached trace for program.
-    let committed_exe = info_span!("commit_exe", group = &bench_name).in_scope(|| {
-        metrics_span("commit_exe_time_ms", || {
-            commit_app_exe(app_config.app_fri_params.fri_params, exe)
-        })
-    });
+    let (app_pk, app_vk) = info_span!("keygen", group = &bench_name).in_scope(|| sdk.app_keygen());
     // 3. Executes runtime
     // 4. Generate trace
     // 5. Generate STARK proofs for each segment (segmentation is determined by `config`), with
     //    timer.
-    let app_vk = app_pk.get_app_vk();
-    let prover =
-        AppProver::<VC, E>::new(app_pk.app_vm_pk, committed_exe).with_program_name(bench_name);
-    let app_proof = prover.generate_app_proof(input_stream);
+    let mut prover = sdk.app_prover(exe)?.with_program_name(bench_name);
+    let app_proof = prover.prove(input_stream)?;
     // 6. Verify STARK proofs, including boundary conditions.
-    let sdk = Sdk::new();
-    sdk.verify_app_proof(&app_vk, &app_proof)
-        .expect("Verification failed");
+    verify_app_proof(&app_vk, &app_proof)?;
     if let Some(leaf_vm_config) = leaf_vm_config {
-        let leaf_vm_pk = leaf_keygen(app_config.leaf_fri_params.fri_params, leaf_vm_config);
-        let leaf_prover =
-            VmLocalProver::<SC, NativeConfig, E>::new(leaf_vm_pk, app_pk.leaf_committed_exe);
+        let leaf_vm_pk = _leaf_keygen(app_config.leaf_fri_params.fri_params, leaf_vm_config)?;
+        let vk = leaf_vm_pk.vm_pk.get_vk();
+        let mut leaf_prover = new_local_prover(
+            sdk.native_builder().clone(),
+            &leaf_vm_pk,
+            app_pk.leaf_committed_exe.exe.clone(),
+        )?;
         let leaf_controller = LeafProvingController {
             num_children: AggregationTreeConfig::default().num_children_leaf,
         };
-        leaf_controller.generate_proof(&leaf_prover, &app_proof);
+        let leaf_proofs = leaf_controller.generate_proof(&mut leaf_prover, &app_proof)?;
+        for proof in leaf_proofs {
+            verify_single(&leaf_prover.vm.engine, &vk, &proof)?;
+        }
     }
     Ok(())
 }
diff --git a/benchmarks/utils/Cargo.toml b/benchmarks/utils/Cargo.toml
index 1b1d600a82..da1bf1866e 100644
--- a/benchmarks/utils/Cargo.toml
+++ b/benchmarks/utils/Cargo.toml
@@ -16,15 +16,35 @@ clap = { version = "4.5.9", features = ["derive", "env"] }
 eyre.workspace = true
 tempfile.workspace = true
 tracing.workspace = true
-tracing-subscriber = { version = "0.3.17", features = ["std", "env-filter"] }
+tracing-subscriber.workspace = true
+
+bitcode = { workspace = true, optional = true }
+openvm-circuit = { workspace = true, optional = true }
+openvm-continuations = { workspace = true, optional = true }
+openvm-native-circuit = { workspace = true, optional = true }
+openvm-sdk = { workspace = true, optional = true }
+openvm-stark-sdk = { workspace = true, optional = true }
 
 [dev-dependencies]
 
 [features]
 default = []
-build-binaries = []
+build-elfs = []
+generate-fixtures = [
+    "dep:bitcode",
+    "dep:openvm-circuit",
+    "dep:openvm-continuations",
+    "dep:openvm-native-circuit",
+    "dep:openvm-sdk",
+    "dep:openvm-stark-sdk",
+]
 
 [[bin]]
 name = "build-elfs"
 path = "src/build-elfs.rs"
-required-features = ["build-binaries"]
+required-features = ["build-elfs"]
+
+[[bin]]
+name = "generate-fixtures"
+path = "src/generate-fixtures.rs"
+required-features = ["generate-fixtures"]
diff --git a/benchmarks/utils/src/build-elfs.rs b/benchmarks/utils/src/build-elfs.rs
index 3bed7cf6fd..3ce24c7c5c 100644
--- a/benchmarks/utils/src/build-elfs.rs
+++ b/benchmarks/utils/src/build-elfs.rs
@@ -63,6 +63,12 @@ fn main() -> Result<()> {
     let programs_to_build = if cli.programs.is_empty() {
         available_programs
     } else {
+        for prog in &cli.programs {
+            if !available_programs.iter().any(|(name, _)| name == prog) {
+                tracing::warn!("Program '{}' not found in available programs", prog);
+            }
+        }
+
         available_programs
             .into_iter()
             .filter(|(name, _)| cli.programs.contains(name))
@@ -70,6 +76,12 @@ fn main() -> Result<()> {
     };
 
     // Filter out skipped programs
+    for prog in &cli.skip {
+        if !programs_to_build.iter().any(|(name, _)| name == prog) {
+            tracing::warn!("Program '{}' not found in programs to skip", prog);
+        }
+    }
+
     let programs_to_build = programs_to_build
         .into_iter()
         .filter(|(name, _)| !cli.skip.contains(name))
diff --git a/benchmarks/utils/src/generate-fixtures.rs b/benchmarks/utils/src/generate-fixtures.rs
new file mode 100644
index 0000000000..85c0ba0191
--- /dev/null
+++ b/benchmarks/utils/src/generate-fixtures.rs
@@ -0,0 +1,117 @@
+use std::{fs, sync::Arc};
+
+use eyre::Result;
+use openvm_benchmarks_utils::{get_elf_path, get_fixtures_dir, get_programs_dir, read_elf_file};
+use openvm_circuit::arch::{instructions::exe::VmExe, VmCircuitConfig};
+use openvm_continuations::verifier::common::types::VmVerifierPvs;
+use openvm_native_circuit::NativeConfig;
+use openvm_sdk::{
+    commit::commit_app_exe,
+    config::{
+        AppConfig, AppFriParams, LeafFriParams, SdkVmConfig, SdkVmCpuBuilder,
+        DEFAULT_APP_LOG_BLOWUP, DEFAULT_LEAF_LOG_BLOWUP, SBOX_SIZE,
+    },
+    Sdk, StdIn,
+};
+use openvm_stark_sdk::{
+    config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
+    engine::StarkFriEngine,
+};
+use openvm_transpiler::FromElf;
+use tracing_subscriber::{fmt, EnvFilter};
+
+const PROGRAM: &str = "kitchen-sink";
+
+fn main() -> Result<()> {
+    // Set up logging
+    fmt::fmt().with_env_filter(EnvFilter::new("info")).init();
+
+    let program_dir = get_programs_dir().join(PROGRAM);
+
+    tracing::info!("Loading VM config");
+    let config_path = program_dir.join("openvm.toml");
+    let config_content = fs::read_to_string(&config_path)?;
+    let vm_config = SdkVmConfig::from_toml(&config_content)?.app_vm_config;
+
+    tracing::info!("Preparing ELF");
+    let elf_path = get_elf_path(&program_dir);
+    let elf = read_elf_file(&elf_path)?;
+
+    let exe = VmExe::from_elf(elf, vm_config.transpiler())?;
+
+    let sdk = Sdk::new();
+
+    // Create app config with default parameters
+    let app_config = AppConfig {
+        app_fri_params: AppFriParams {
+            fri_params: FriParameters::standard_with_100_bits_conjectured_security(
+                DEFAULT_APP_LOG_BLOWUP,
+            ),
+        },
+        leaf_fri_params: LeafFriParams {
+            fri_params: FriParameters::standard_with_100_bits_conjectured_security(
+                DEFAULT_LEAF_LOG_BLOWUP,
+            ),
+        },
+        app_vm_config: vm_config,
+        compiler_options: Default::default(),
+    };
+
+    tracing::info!("Generating app proving key");
+    let app_pk = Arc::new(sdk.app_keygen(app_config.clone())?);
+    let app_committed_exe = commit_app_exe(app_pk.app_fri_params(), exe);
+
+    tracing::info!("Generating app proof");
+    let app_proof = sdk.generate_app_proof(
+        SdkVmCpuBuilder,
+        app_pk.clone(),
+        app_committed_exe,
+        StdIn::default(),
+    )?;
+
+    tracing::info!("Generating leaf proving key");
+    // Generate leaf VM proving key using the circuit keygen approach
+    let leaf_vm_config = NativeConfig::aggregation(
+        VmVerifierPvs::<u8>::width(),
+        SBOX_SIZE.min(
+            app_config
+                .leaf_fri_params
+                .fri_params
+                .max_constraint_degree(),
+        ),
+    );
+    let circuit = leaf_vm_config.create_airs()?;
+    let engine = BabyBearPoseidon2Engine::new(app_config.leaf_fri_params.fri_params);
+    let pk = circuit.keygen(&engine);
+
+    tracing::info!("Saving keys and proof to files");
+    // Create fixtures directory if it doesn't exist
+    let fixtures_dir = get_fixtures_dir();
+    fs::create_dir_all(&fixtures_dir)?;
+
+    // Serialize and write to files in fixtures directory
+    let leaf_exe_bytes = bitcode::serialize(&app_pk.leaf_committed_exe.exe)?;
+    fs::write(
+        fixtures_dir.join(&format!("{}.leaf.exe", PROGRAM)),
+        leaf_exe_bytes,
+    )?;
+
+    let leaf_pk_bytes = bitcode::serialize(&pk)?;
+    fs::write(
+        fixtures_dir.join(&format!("{}.leaf.pk", PROGRAM)),
+        leaf_pk_bytes,
+    )?;
+
+    let app_proof_bytes = bitcode::serialize(&app_proof)?;
+    fs::write(
+        fixtures_dir.join(&format!("{}.app.proof", PROGRAM)),
+        app_proof_bytes,
+    )?;
+
+    tracing::info!(
+        "Generated and saved {name}.leaf.committed.exe, {name}.leaf.pk, and {name}.app.proof",
+        name = PROGRAM
+    );
+
+    Ok(())
+}
diff --git a/benchmarks/utils/src/lib.rs b/benchmarks/utils/src/lib.rs
index 99e5ce917b..ad11ab2f99 100644
--- a/benchmarks/utils/src/lib.rs
+++ b/benchmarks/utils/src/lib.rs
@@ -9,6 +9,10 @@ use openvm_build::{build_guest_package, get_package, guest_methods, GuestOptions
 use openvm_transpiler::{elf::Elf, openvm_platform::memory::MEM_SIZE};
 use tempfile::tempdir;
 
+pub fn get_fixtures_dir() -> PathBuf {
+    PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../fixtures")
+}
+
 pub fn get_programs_dir() -> PathBuf {
     PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../guest")
 }
diff --git a/book/.gitignore b/book/.gitignore
deleted file mode 100644
index 402873062b..0000000000
--- a/book/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-node_modules/**
-book/**
diff --git a/book/.markdownlint.json b/book/.markdownlint.json
deleted file mode 100644
index 385ff1b06f..0000000000
--- a/book/.markdownlint.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-    "line_length": {
-      "line_length": 120,
-      "strict": false,
-      "stern": false,
-      "code_blocks": false,
-      "tables": false
-    },
-    "no-duplicate-heading": {
-      "siblings_only": true
-    },
-    "no-blanks-blockquote": false,
-    "no-empty-links": false,
-    "single-title": false,
-    "no-emphasis-as-heading": false,
-    "no-duplicate-header": false,
-    "no-inline-html": false,
-    "MD013": false,
-    "MD029": false
-  }
diff --git a/book/Justfile b/book/Justfile
deleted file mode 100644
index 313c8c929d..0000000000
--- a/book/Justfile
+++ /dev/null
@@ -1,50 +0,0 @@
-
-set positional-arguments
-
-# default recipe to display help information
-default:
-  @just --list
-
-# Install required dependencies
-deps:
-    cargo install mdbook mdbook-katex mdbook-linkcheck mdbook-mermaid
-    pnpm i --frozen-lockfile
-
-# Lint the workspace for all available targets
-lint: lint-book-md-check lint-filenames lint-book-spelling
-
-# Updates all files to fix linting issues
-lint-fix: lint-book-md-fix
-
-# Validates markdown file formatting
-lint-book-md-check:
-    npx markdownlint-cli2 "./src/**/*.md"
-
-# Updates markdown files formatting to satisfy lints
-lint-book-md-fix:
-    npx markdownlint-cli2 --fix "./src/**/*.md"
-
-# Validates spelling using cspell
-lint-book-spelling:
-    npx cspell "./**/*.md"
-
-# Updates cspell words file with new words
-lint-book-spelling-fix:
-    npx cspell --words-only --unique "./**/*.md" | sort --ignore-case | uniq > words.txt
-
-lint-filenames:
-    #!/usr/bin/env bash
-    for file in $(find ./specs -type f); do
-      if [[ "$file" == *_* ]]; then
-        echo "File with underscore found: $file"
-        exit 1
-      fi
-    done
-    echo "Filename linting complete"
-
-build:
-    mdbook build
-
-# Serves the mdbook locally
-serve *args='':
-    mdbook serve $@
diff --git a/book/book.toml b/book/book.toml
deleted file mode 100644
index c1c5a8953f..0000000000
--- a/book/book.toml
+++ /dev/null
@@ -1,10 +0,0 @@
-[book]
-authors = ["OpenVM Contributors"]
-language = "en"
-multilingual = false
-src = "src"
-title = "OpenVM Book"
-
-[output.html]
-site-url = "https://book.openvm.dev/"
-mathjax-support = true
\ No newline at end of file
diff --git a/book/cspell.json b/book/cspell.json
deleted file mode 100644
index 51fc1cd64d..0000000000
--- a/book/cspell.json
+++ /dev/null
@@ -1,24 +0,0 @@
-{
-    "$schema": "https://raw.githubusercontent.com/streetsidesoftware/cspell/main/packages/cspell-types/cspell.schema.json",
-    "version": "0.2",
-    "dictionaryDefinitions": [
-      {
-        "name": "words",
-        "path": "./words.txt",
-        "addWords": true
-      }
-    ],
-    "dictionaries": [
-      "words"
-    ],
-    "ignorePaths": [
-      "node_modules",
-      "./words.txt"
-    ],
-    "overrides": [
-      {
-        "filename": "**/*.md",
-        "ignoreRegExpList": []
-      }
-    ]
-}
diff --git a/book/package.json b/book/package.json
deleted file mode 100644
index 5dd7388530..0000000000
--- a/book/package.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
-  "name": "specs",
-  "version": "1.0.0",
-  "private": true,
-  "engines": {
-    "node": ">=16",
-    "pnpm": ">=9"
-  },
-  "dependencies": {
-    "cspell": "^8.18.0",
-    "markdownlint-cli2": "0.17.2"
-  }
-}
diff --git a/book/pnpm-lock.yaml b/book/pnpm-lock.yaml
deleted file mode 100644
index 43baeacfc0..0000000000
--- a/book/pnpm-lock.yaml
+++ /dev/null
@@ -1,1481 +0,0 @@
-lockfileVersion: '9.0'
-
-settings:
-  autoInstallPeers: true
-  excludeLinksFromLockfile: false
-
-importers:
-
-  .:
-    dependencies:
-      cspell:
-        specifier: ^8.18.0
-        version: 8.18.0
-      markdownlint-cli2:
-        specifier: 0.17.2
-        version: 0.17.2
-
-packages:
-
-  '@cspell/cspell-bundled-dicts@8.18.0':
-    resolution: {integrity: sha512-c7OkDgtYYg0rvj49YS/QzjKeGg/l/d+DjMDqP8BProhKDhAghCsvc6l3SHCGnvyw42+YSTVdL5uLBIaA1OKBlQ==}
-    engines: {node: '>=18'}
-
-  '@cspell/cspell-json-reporter@8.18.0':
-    resolution: {integrity: sha512-glUYtRJ/xIgvCvFqgcF04RJiTFhL95wl1uirP+Qa+oqxvg/zP9zxsZupLD3aeMFhyDpgYwtBiebUmhSqrLDyaw==}
-    engines: {node: '>=18'}
-
-  '@cspell/cspell-pipe@8.18.0':
-    resolution: {integrity: sha512-bSFncsV16B9nhHJdSMTCzdC0Su/TKs+JGTuCq2UiNi6vWxvsL/S1ueiRb+HT0WYKROwtAeHEKnjlw0G3OBBuFw==}
-    engines: {node: '>=18'}
-
-  '@cspell/cspell-resolver@8.18.0':
-    resolution: {integrity: sha512-kNQJtYYJg6WpLoDUZW8VodovOtKLiDrb+GXmcee01qQmrEPCuub9gwoiRuka0sHI2logz0S8l9eAittClpxReg==}
-    engines: {node: '>=18'}
-
-  '@cspell/cspell-service-bus@8.18.0':
-    resolution: {integrity: sha512-yGnb59tUadd1q9dSIvg+Q8yZc7N2ZloZ8Sc5lAfxKOJWUh91ugu2UizmL4lm82vDrOevL3kryiauVTsjoS+UOg==}
-    engines: {node: '>=18'}
-
-  '@cspell/cspell-types@8.18.0':
-    resolution: {integrity: sha512-z7ETwulTCAHpSNBqwD5d3Uoui4ClD+tfREoD0cKd9uMbLtN9W/WMVQAGDWJpOCUFrWsGYYM3/3/ob2oZhwdQag==}
-    engines: {node: '>=18'}
-
-  '@cspell/dict-ada@4.1.0':
-    resolution: {integrity: sha512-7SvmhmX170gyPd+uHXrfmqJBY5qLcCX8kTGURPVeGxmt8XNXT75uu9rnZO+jwrfuU2EimNoArdVy5GZRGljGNg==}
-
-  '@cspell/dict-al@1.1.0':
-    resolution: {integrity: sha512-PtNI1KLmYkELYltbzuoztBxfi11jcE9HXBHCpID2lou/J4VMYKJPNqe4ZjVzSI9NYbMnMnyG3gkbhIdx66VSXg==}
-
-  '@cspell/dict-aws@4.0.9':
-    resolution: {integrity: sha512-bDYdnnJGwSkIZ4gzrauu7qzOs/ZAY/FnU4k11LgdMI8BhwMfsbsy2EI1iS+sD/BI5ZnNT9kU5YR3WADeNOmhRg==}
-
-  '@cspell/dict-bash@4.2.0':
-    resolution: {integrity: sha512-HOyOS+4AbCArZHs/wMxX/apRkjxg6NDWdt0jF9i9XkvJQUltMwEhyA2TWYjQ0kssBsnof+9amax2lhiZnh3kCg==}
-
-  '@cspell/dict-companies@3.1.14':
-    resolution: {integrity: sha512-iqo1Ce4L7h0l0GFSicm2wCLtfuymwkvgFGhmu9UHyuIcTbdFkDErH+m6lH3Ed+QuskJlpQ9dM7puMIGqUlVERw==}
-
-  '@cspell/dict-cpp@6.0.6':
-    resolution: {integrity: sha512-HMV1chsExuZt5IL9rYBW7GmhNZDVdQJEd1WtFgOO6jqiNxbpTG3Is3Pkldl7FpusBQQZr4BdjMit5bnPpVRy3A==}
-
-  '@cspell/dict-cryptocurrencies@5.0.4':
-    resolution: {integrity: sha512-6iFu7Abu+4Mgqq08YhTKHfH59mpMpGTwdzDB2Y8bbgiwnGFCeoiSkVkgLn1Kel2++hYcZ8vsAW/MJS9oXxuMag==}
-
-  '@cspell/dict-csharp@4.0.6':
-    resolution: {integrity: sha512-w/+YsqOknjQXmIlWDRmkW+BHBPJZ/XDrfJhZRQnp0wzpPOGml7W0q1iae65P2AFRtTdPKYmvSz7AL5ZRkCnSIw==}
-
-  '@cspell/dict-css@4.0.17':
-    resolution: {integrity: sha512-2EisRLHk6X/PdicybwlajLGKF5aJf4xnX2uuG5lexuYKt05xV/J/OiBADmi8q9obhxf1nesrMQbqAt+6CsHo/w==}
-
-  '@cspell/dict-dart@2.3.0':
-    resolution: {integrity: sha512-1aY90lAicek8vYczGPDKr70pQSTQHwMFLbmWKTAI6iavmb1fisJBS1oTmMOKE4ximDf86MvVN6Ucwx3u/8HqLg==}
-
-  '@cspell/dict-data-science@2.0.7':
-    resolution: {integrity: sha512-XhAkK+nSW6zmrnWzusmZ1BpYLc62AWYHZc2p17u4nE2Z9XG5DleG55PCZxXQTKz90pmwlhFM9AfpkJsYaBWATA==}
-
-  '@cspell/dict-django@4.1.4':
-    resolution: {integrity: sha512-fX38eUoPvytZ/2GA+g4bbdUtCMGNFSLbdJJPKX2vbewIQGfgSFJKY56vvcHJKAvw7FopjvgyS/98Ta9WN1gckg==}
-
-  '@cspell/dict-docker@1.1.12':
-    resolution: {integrity: sha512-6d25ZPBnYZaT9D9An/x6g/4mk542R8bR3ipnby3QFCxnfdd6xaWiTcwDPsCgwN2aQZIQ1jX/fil9KmBEqIK/qA==}
-
-  '@cspell/dict-dotnet@5.0.9':
-    resolution: {integrity: sha512-JGD6RJW5sHtO5lfiJl11a5DpPN6eKSz5M1YBa1I76j4dDOIqgZB6rQexlDlK1DH9B06X4GdDQwdBfnpAB0r2uQ==}
-
-  '@cspell/dict-elixir@4.0.7':
-    resolution: {integrity: sha512-MAUqlMw73mgtSdxvbAvyRlvc3bYnrDqXQrx5K9SwW8F7fRYf9V4vWYFULh+UWwwkqkhX9w03ZqFYRTdkFku6uA==}
-
-  '@cspell/dict-en-common-misspellings@2.0.10':
-    resolution: {integrity: sha512-80mXJLtr0tVEtzowrI7ycVae/ULAYImZUlr0kUTpa8i57AUk7Zy3pYBs44EYIKW7ZC9AHu4Qjjfq4vriAtyTDQ==}
-
-  '@cspell/dict-en-gb@1.1.33':
-    resolution: {integrity: sha512-tKSSUf9BJEV+GJQAYGw5e+ouhEe2ZXE620S7BLKe3ZmpnjlNG9JqlnaBhkIMxKnNFkLY2BP/EARzw31AZnOv4g==}
-
-  '@cspell/dict-en_us@4.3.35':
-    resolution: {integrity: sha512-HF6QNyPHkxeo/SosaZXRQlnKDUTjIzrGKyqfbw/fPPlPYrXefAZZ40ofheb5HnbUicR7xqV/lsc/HQfqYshGIw==}
-
-  '@cspell/dict-filetypes@3.0.11':
-    resolution: {integrity: sha512-bBtCHZLo7MiSRUqx5KEiPdGOmXIlDGY+L7SJEtRWZENpAKE+96rT7hj+TUUYWBbCzheqHr0OXZJFEKDgsG/uZg==}
-
-  '@cspell/dict-flutter@1.1.0':
-    resolution: {integrity: sha512-3zDeS7zc2p8tr9YH9tfbOEYfopKY/srNsAa+kE3rfBTtQERAZeOhe5yxrnTPoufctXLyuUtcGMUTpxr3dO0iaA==}
-
-  '@cspell/dict-fonts@4.0.4':
-    resolution: {integrity: sha512-cHFho4hjojBcHl6qxidl9CvUb492IuSk7xIf2G2wJzcHwGaCFa2o3gRcxmIg1j62guetAeDDFELizDaJlVRIOg==}
-
-  '@cspell/dict-fsharp@1.1.0':
-    resolution: {integrity: sha512-oguWmHhGzgbgbEIBKtgKPrFSVAFtvGHaQS0oj+vacZqMObwkapcTGu7iwf4V3Bc2T3caf0QE6f6rQfIJFIAVsw==}
-
-  '@cspell/dict-fullstack@3.2.6':
-    resolution: {integrity: sha512-cSaq9rz5RIU9j+0jcF2vnKPTQjxGXclntmoNp4XB7yFX2621PxJcekGjwf/lN5heJwVxGLL9toR0CBlGKwQBgA==}
-
-  '@cspell/dict-gaming-terms@1.1.0':
-    resolution: {integrity: sha512-46AnDs9XkgJ2f1Sqol1WgfJ8gOqp60fojpc9Wxch7x+BA63g4JfMV5/M5x0sI0TLlLY8EBSglcr8wQF/7C80AQ==}
-
-  '@cspell/dict-git@3.0.4':
-    resolution: {integrity: sha512-C44M+m56rYn6QCsLbiKiedyPTMZxlDdEYAsPwwlL5bhMDDzXZ3Ic8OCQIhMbiunhCOJJT+er4URmOmM+sllnjg==}
-
-  '@cspell/dict-golang@6.0.19':
-    resolution: {integrity: sha512-VS+oinB2/CbgmHE06kMJlj52OVMZM0S2EEXph3oaroNTgTuclSwdFylQmOEjquZi55kW+n3FM9MyWXiitB7Dtg==}
-
-  '@cspell/dict-google@1.0.8':
-    resolution: {integrity: sha512-BnMHgcEeaLyloPmBs8phCqprI+4r2Jb8rni011A8hE+7FNk7FmLE3kiwxLFrcZnnb7eqM0agW4zUaNoB0P+z8A==}
-
-  '@cspell/dict-haskell@4.0.5':
-    resolution: {integrity: sha512-s4BG/4tlj2pPM9Ha7IZYMhUujXDnI0Eq1+38UTTCpatYLbQqDwRFf2KNPLRqkroU+a44yTUAe0rkkKbwy4yRtQ==}
-
-  '@cspell/dict-html-symbol-entities@4.0.3':
-    resolution: {integrity: sha512-aABXX7dMLNFdSE8aY844X4+hvfK7977sOWgZXo4MTGAmOzR8524fjbJPswIBK7GaD3+SgFZ2yP2o0CFvXDGF+A==}
-
-  '@cspell/dict-html@4.0.11':
-    resolution: {integrity: sha512-QR3b/PB972SRQ2xICR1Nw/M44IJ6rjypwzA4jn+GH8ydjAX9acFNfc+hLZVyNe0FqsE90Gw3evLCOIF0vy1vQw==}
-
-  '@cspell/dict-java@5.0.11':
-    resolution: {integrity: sha512-T4t/1JqeH33Raa/QK/eQe26FE17eUCtWu+JsYcTLkQTci2dk1DfcIKo8YVHvZXBnuM43ATns9Xs0s+AlqDeH7w==}
-
-  '@cspell/dict-julia@1.1.0':
-    resolution: {integrity: sha512-CPUiesiXwy3HRoBR3joUseTZ9giFPCydSKu2rkh6I2nVjXnl5vFHzOMLXpbF4HQ1tH2CNfnDbUndxD+I+7eL9w==}
-
-  '@cspell/dict-k8s@1.0.10':
-    resolution: {integrity: sha512-313haTrX9prep1yWO7N6Xw4D6tvUJ0Xsx+YhCP+5YrrcIKoEw5Rtlg8R4PPzLqe6zibw6aJ+Eqq+y76Vx5BZkw==}
-
-  '@cspell/dict-kotlin@1.1.0':
-    resolution: {integrity: sha512-vySaVw6atY7LdwvstQowSbdxjXG6jDhjkWVWSjg1XsUckyzH1JRHXe9VahZz1i7dpoFEUOWQrhIe5B9482UyJQ==}
-
-  '@cspell/dict-latex@4.0.3':
-    resolution: {integrity: sha512-2KXBt9fSpymYHxHfvhUpjUFyzrmN4c4P8mwIzweLyvqntBT3k0YGZJSriOdjfUjwSygrfEwiuPI1EMrvgrOMJw==}
-
-  '@cspell/dict-lorem-ipsum@4.0.4':
-    resolution: {integrity: sha512-+4f7vtY4dp2b9N5fn0za/UR0kwFq2zDtA62JCbWHbpjvO9wukkbl4rZg4YudHbBgkl73HRnXFgCiwNhdIA1JPw==}
-
-  '@cspell/dict-lua@4.0.7':
-    resolution: {integrity: sha512-Wbr7YSQw+cLHhTYTKV6cAljgMgcY+EUAxVIZW3ljKswEe4OLxnVJ7lPqZF5JKjlXdgCjbPSimsHqyAbC5pQN/Q==}
-
-  '@cspell/dict-makefile@1.0.4':
-    resolution: {integrity: sha512-E4hG/c0ekPqUBvlkrVvzSoAA+SsDA9bLi4xSV3AXHTVru7Y2bVVGMPtpfF+fI3zTkww/jwinprcU1LSohI3ylw==}
-
-  '@cspell/dict-markdown@2.0.9':
-    resolution: {integrity: sha512-j2e6Eg18BlTb1mMP1DkyRFMM/FLS7qiZjltpURzDckB57zDZbUyskOFdl4VX7jItZZEeY0fe22bSPOycgS1Z5A==}
-    peerDependencies:
-      '@cspell/dict-css': ^4.0.17
-      '@cspell/dict-html': ^4.0.11
-      '@cspell/dict-html-symbol-entities': ^4.0.3
-      '@cspell/dict-typescript': ^3.2.0
-
-  '@cspell/dict-monkeyc@1.0.10':
-    resolution: {integrity: sha512-7RTGyKsTIIVqzbvOtAu6Z/lwwxjGRtY5RkKPlXKHEoEAgIXwfDxb5EkVwzGQwQr8hF/D3HrdYbRT8MFBfsueZw==}
-
-  '@cspell/dict-node@5.0.6':
-    resolution: {integrity: sha512-CEbhPCpxGvRNByGolSBTrXXW2rJA4bGqZuTx1KKO85mwR6aadeOmUE7xf/8jiCkXSy+qvr9aJeh+jlfXcsrziQ==}
-
-  '@cspell/dict-npm@5.1.31':
-    resolution: {integrity: sha512-Oh9nrhgNV4UD1hlbgO3TFQqQRKziwc7qXKoQiC4oqOYIhMs2WL9Ezozku7FY1e7o5XbCIZX9nRH0ymNx/Rwj6w==}
-
-  '@cspell/dict-php@4.0.14':
-    resolution: {integrity: sha512-7zur8pyncYZglxNmqsRycOZ6inpDoVd4yFfz1pQRe5xaRWMiK3Km4n0/X/1YMWhh3e3Sl/fQg5Axb2hlN68t1g==}
-
-  '@cspell/dict-powershell@5.0.14':
-    resolution: {integrity: sha512-ktjjvtkIUIYmj/SoGBYbr3/+CsRGNXGpvVANrY0wlm/IoGlGywhoTUDYN0IsGwI2b8Vktx3DZmQkfb3Wo38jBA==}
-
-  '@cspell/dict-public-licenses@2.0.13':
-    resolution: {integrity: sha512-1Wdp/XH1ieim7CadXYE7YLnUlW0pULEjVl9WEeziZw3EKCAw8ZI8Ih44m4bEa5VNBLnuP5TfqC4iDautAleQzQ==}
-
-  '@cspell/dict-python@4.2.16':
-    resolution: {integrity: sha512-LkQssFt1hPOWXIQiD8ScTkz/41RL7Ti0V/2ytUzEW82dc0atIEksrBg8MuOjWXktp0Dk5tDwRLgmIvhV3CFFOA==}
-
-  '@cspell/dict-r@2.1.0':
-    resolution: {integrity: sha512-k2512wgGG0lTpTYH9w5Wwco+lAMf3Vz7mhqV8+OnalIE7muA0RSuD9tWBjiqLcX8zPvEJr4LdgxVju8Gk3OKyA==}
-
-  '@cspell/dict-ruby@5.0.8':
-    resolution: {integrity: sha512-ixuTneU0aH1cPQRbWJvtvOntMFfeQR2KxT8LuAv5jBKqQWIHSxzGlp+zX3SVyoeR0kOWiu64/O5Yn836A5yMcQ==}
-
-  '@cspell/dict-rust@4.0.11':
-    resolution: {integrity: sha512-OGWDEEzm8HlkSmtD8fV3pEcO2XBpzG2XYjgMCJCRwb2gRKvR+XIm6Dlhs04N/K2kU+iH8bvrqNpM8fS/BFl0uw==}
-
-  '@cspell/dict-scala@5.0.7':
-    resolution: {integrity: sha512-yatpSDW/GwulzO3t7hB5peoWwzo+Y3qTc0pO24Jf6f88jsEeKmDeKkfgPbYuCgbE4jisGR4vs4+jfQZDIYmXPA==}
-
-  '@cspell/dict-shell@1.1.0':
-    resolution: {integrity: sha512-D/xHXX7T37BJxNRf5JJHsvziFDvh23IF/KvkZXNSh8VqcRdod3BAz9VGHZf6VDqcZXr1VRqIYR3mQ8DSvs3AVQ==}
-
-  '@cspell/dict-software-terms@5.0.2':
-    resolution: {integrity: sha512-aCzP+M0WXRLYXTriDMZygUe5s4jKyau/nCA6gBGt4EoHfXn+Ua/+DrW766oXOkkESIlqTBtRgb9gWwQvUdOXSQ==}
-
-  '@cspell/dict-sql@2.2.0':
-    resolution: {integrity: sha512-MUop+d1AHSzXpBvQgQkCiok8Ejzb+nrzyG16E8TvKL2MQeDwnIvMe3bv90eukP6E1HWb+V/MA/4pnq0pcJWKqQ==}
-
-  '@cspell/dict-svelte@1.0.6':
-    resolution: {integrity: sha512-8LAJHSBdwHCoKCSy72PXXzz7ulGROD0rP1CQ0StOqXOOlTUeSFaJJlxNYjlONgd2c62XBQiN2wgLhtPN+1Zv7Q==}
-
-  '@cspell/dict-swift@2.0.5':
-    resolution: {integrity: sha512-3lGzDCwUmnrfckv3Q4eVSW3sK3cHqqHlPprFJZD4nAqt23ot7fic5ALR7J4joHpvDz36nHX34TgcbZNNZOC/JA==}
-
-  '@cspell/dict-terraform@1.1.1':
-    resolution: {integrity: sha512-07KFDwCU7EnKl4hOZLsLKlj6Zceq/IsQ3LRWUyIjvGFfZHdoGtFdCp3ZPVgnFaAcd/DKv+WVkrOzUBSYqHopQQ==}
-
-  '@cspell/dict-typescript@3.2.0':
-    resolution: {integrity: sha512-Pk3zNePLT8qg51l0M4g1ISowYAEGxTuNfZlgkU5SvHa9Cu7x/BWoyYq9Fvc3kAyoisCjRPyvWF4uRYrPitPDFw==}
-
-  '@cspell/dict-vue@3.0.4':
-    resolution: {integrity: sha512-0dPtI0lwHcAgSiQFx8CzvqjdoXROcH+1LyqgROCpBgppommWpVhbQ0eubnKotFEXgpUCONVkeZJ6Ql8NbTEu+w==}
-
-  '@cspell/dynamic-import@8.18.0':
-    resolution: {integrity: sha512-TPdY/x9l0DAWCSI8iXDEQSl0xlB9qSbEqIv3UYVpWqbQYCY7MdA15bmai8uKt08sZ8F9L6nYHPtbOGFExHvoSw==}
-    engines: {node: '>=18.0'}
-
-  '@cspell/filetypes@8.18.0':
-    resolution: {integrity: sha512-Qd+Fc9CfkCm4Kufe/H8jCLe5px3PwiKmJgdiZ6FJ0i06MU+0XHZGmzWayrL+EoTqfbQA3jLkvnSgWwF0C6Ci6Q==}
-    engines: {node: '>=18'}
-
-  '@cspell/strong-weak-map@8.18.0':
-    resolution: {integrity: sha512-u8j+1JsnzJv10c7KaGzCdp8mJ3IL0tJ601+ySdebqVL4VNVKE1OcEV+sYxMjrXbeXkPGlpSwr+yDKMW1WHaC7A==}
-    engines: {node: '>=18'}
-
-  '@cspell/url@8.18.0':
-    resolution: {integrity: sha512-jbo66L7Y5WImty4o2s5sL6LwTSHS6XjZDKEUayqxILyNb5XHKRUinyII1/EpglFRi9n7G5w4t714/Aeg1Y90Vg==}
-    engines: {node: '>=18.0'}
-
-  '@nodelib/fs.scandir@2.1.5':
-    resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==}
-    engines: {node: '>= 8'}
-
-  '@nodelib/fs.stat@2.0.5':
-    resolution: {integrity: sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==}
-    engines: {node: '>= 8'}
-
-  '@nodelib/fs.walk@1.2.8':
-    resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==}
-    engines: {node: '>= 8'}
-
-  '@sindresorhus/merge-streams@2.3.0':
-    resolution: {integrity: sha512-LtoMMhxAlorcGhmFYI+LhPgbPZCkgP6ra1YL604EeF6U98pLlQ3iWIGMdWSC+vWmPBWBNgmDBAhnAobLROJmwg==}
-    engines: {node: '>=18'}
-
-  '@types/debug@4.1.12':
-    resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==}
-
-  '@types/katex@0.16.7':
-    resolution: {integrity: sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==}
-
-  '@types/ms@2.1.0':
-    resolution: {integrity: sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==}
-
-  '@types/unist@2.0.11':
-    resolution: {integrity: sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==}
-
-  argparse@2.0.1:
-    resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
-
-  array-timsort@1.0.3:
-    resolution: {integrity: sha512-/+3GRL7dDAGEfM6TseQk/U+mi18TU2Ms9I3UlLdUMhz2hbvGNTKdj9xniwXfUqgYhHxRx0+8UnKkvlNwVU+cWQ==}
-
-  braces@3.0.3:
-    resolution: {integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==}
-    engines: {node: '>=8'}
-
-  callsites@3.1.0:
-    resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==}
-    engines: {node: '>=6'}
-
-  chalk-template@1.1.0:
-    resolution: {integrity: sha512-T2VJbcDuZQ0Tb2EWwSotMPJjgpy1/tGee1BTpUNsGZ/qgNjV2t7Mvu+d4600U564nbLesN1x2dPL+xii174Ekg==}
-    engines: {node: '>=14.16'}
-
-  chalk@5.4.1:
-    resolution: {integrity: sha512-zgVZuo2WcZgfUEmsn6eO3kINexW8RAE4maiQ8QNs8CtpPCSyMiYsULR3HQYkm3w8FIA3SberyMJMSldGsW+U3w==}
-    engines: {node: ^12.17.0 || ^14.13 || >=16.0.0}
-
-  character-entities-legacy@3.0.0:
-    resolution: {integrity: sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==}
-
-  character-entities@2.0.2:
-    resolution: {integrity: sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==}
-
-  character-reference-invalid@2.0.1:
-    resolution: {integrity: sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==}
-
-  clear-module@4.1.2:
-    resolution: {integrity: sha512-LWAxzHqdHsAZlPlEyJ2Poz6AIs384mPeqLVCru2p0BrP9G/kVGuhNyZYClLO6cXlnuJjzC8xtsJIuMjKqLXoAw==}
-    engines: {node: '>=8'}
-
-  commander@13.1.0:
-    resolution: {integrity: sha512-/rFeCpNJQbhSZjGVwO9RFV3xPqbnERS8MmIQzCtD/zl6gpJuV/bMLuN92oG3F7d8oDEHHRrujSXNUr8fpjntKw==}
-    engines: {node: '>=18'}
-
-  commander@8.3.0:
-    resolution: {integrity: sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==}
-    engines: {node: '>= 12'}
-
-  comment-json@4.2.5:
-    resolution: {integrity: sha512-bKw/r35jR3HGt5PEPm1ljsQQGyCrR8sFGNiN5L+ykDHdpO8Smxkrkla9Yi6NkQyUrb8V54PGhfMs6NrIwtxtdw==}
-    engines: {node: '>= 6'}
-
-  core-util-is@1.0.3:
-    resolution: {integrity: sha512-ZQBvi1DcpJ4GDqanjucZ2Hj3wEO5pZDS89BWbkcrvdxksJorwUDDZamX9ldFkp9aw2lmBDLgkObEA4DWNJ9FYQ==}
-
-  cspell-config-lib@8.18.0:
-    resolution: {integrity: sha512-Y0hos8IS1rzmU9lTl6v1q6MBr6v9nhJy5IacZXSJhBSTHRYemsvICTnn+PtksUFgrEEqWusOdAsgBhYV0nlSuw==}
-    engines: {node: '>=18'}
-
-  cspell-dictionary@8.18.0:
-    resolution: {integrity: sha512-yf7anUDHYFPuQ53619BILYswm4E08NzyNPO1cF0GyqGe5ZTTHf/rCNYcuZHtQ7yKmQuC/K8/y2kEeLsqNa0p6A==}
-    engines: {node: '>=18'}
-
-  cspell-gitignore@8.18.0:
-    resolution: {integrity: sha512-HYWAK7282o9CkcMwqC3w1wNjgae1v4CMgzF3ptpyBonjISkj1GdFGMno4Gu2uW43aKGTmyj4Fi9J94UZvzZa4w==}
-    engines: {node: '>=18'}
-    hasBin: true
-
-  cspell-glob@8.18.0:
-    resolution: {integrity: sha512-ox3ygu5+3tXR3+XRbYJy/z+YK1zo4TFQFkvUEr+aV8Ogyvgm7qE9uTaFz6krkkMLNG6l8EZ7mJtdn0ZsXF/WKQ==}
-    engines: {node: '>=18'}
-
-  cspell-grammar@8.18.0:
-    resolution: {integrity: sha512-/h8gLULvH+P+8N/cmIx8M85sqlER6AyO/RoCVudfq7lTkFneXXKmCoHSA2YQbod9ZSjL+voivBokN2yjMR+XEA==}
-    engines: {node: '>=18'}
-    hasBin: true
-
-  cspell-io@8.18.0:
-    resolution: {integrity: sha512-W6CfXY5dlGTd6XWgHl4B2qLD/gla9TXDVdSo3ViCMJoVu82UQD8b4mir5RfHqXiMrz7ItDeUy9BxFN42VB2YcA==}
-    engines: {node: '>=18'}
-
-  cspell-lib@8.18.0:
-    resolution: {integrity: sha512-346CAY12pVk40FWnfPOwajKug61EeawW3QMtJE/f6rMYGAjGxGExhZnl6eR/KuCMt/w60kqQMSjGDw2zJjJqUw==}
-    engines: {node: '>=18'}
-
-  cspell-trie-lib@8.18.0:
-    resolution: {integrity: sha512-zhrCAHyQ2uiGpFdp6E336/L2oDTh/0fM22VpGbkBS4uYKqG9jMy4eUZdSKS8Lg3St4YdGK14J7dv/PiMLqqxlw==}
-    engines: {node: '>=18'}
-
-  cspell@8.18.0:
-    resolution: {integrity: sha512-+6lJaR4zI/250vAR3qXwRj9O80Q4dHUuJWVXCZQV2L6HdF+s5ThS7+HYmE5zdf1YpPCtYJJ/6stkKsdUCQtkTA==}
-    engines: {node: '>=18'}
-    hasBin: true
-
-  debug@4.4.0:
-    resolution: {integrity: sha512-6WTZ/IxCY/T6BALoZHaE4ctp9xm+Z5kY/pzYaCHRFeyVhojxlrm+46y68HA6hr0TcwEssoxNiDEUJQjfPZ/RYA==}
-    engines: {node: '>=6.0'}
-    peerDependencies:
-      supports-color: '*'
-    peerDependenciesMeta:
-      supports-color:
-        optional: true
-
-  decode-named-character-reference@1.1.0:
-    resolution: {integrity: sha512-Wy+JTSbFThEOXQIR2L6mxJvEs+veIzpmqD7ynWxMXGpnk3smkHQOp6forLdHsKpAMW9iJpaBBIxz285t1n1C3w==}
-
-  dequal@2.0.3:
-    resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==}
-    engines: {node: '>=6'}
-
-  devlop@1.1.0:
-    resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==}
-
-  entities@4.5.0:
-    resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
-    engines: {node: '>=0.12'}
-
-  env-paths@3.0.0:
-    resolution: {integrity: sha512-dtJUTepzMW3Lm/NPxRf3wP4642UWhjL2sQxc+ym2YMj1m/H2zDNQOlezafzkHwn6sMstjHTwG6iQQsctDW/b1A==}
-    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
-
-  esprima@4.0.1:
-    resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==}
-    engines: {node: '>=4'}
-    hasBin: true
-
-  fast-equals@5.2.2:
-    resolution: {integrity: sha512-V7/RktU11J3I36Nwq2JnZEM7tNm17eBJz+u25qdxBZeCKiX6BkVSZQjwWIr+IobgnZy+ag73tTZgZi7tr0LrBw==}
-    engines: {node: '>=6.0.0'}
-
-  fast-glob@3.3.3:
-    resolution: {integrity: sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==}
-    engines: {node: '>=8.6.0'}
-
-  fast-json-stable-stringify@2.1.0:
-    resolution: {integrity: sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==}
-
-  fastq@1.19.1:
-    resolution: {integrity: sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==}
-
-  fdir@6.4.3:
-    resolution: {integrity: sha512-PMXmW2y1hDDfTSRc9gaXIuCCRpuoz3Kaz8cUelp3smouvfT632ozg2vrT6lJsHKKOF59YLbOGfAWGUcKEfRMQw==}
-    peerDependencies:
-      picomatch: ^3 || ^4
-    peerDependenciesMeta:
-      picomatch:
-        optional: true
-
-  file-entry-cache@9.1.0:
-    resolution: {integrity: sha512-/pqPFG+FdxWQj+/WSuzXSDaNzxgTLr/OrR1QuqfEZzDakpdYE70PwUxL7BPUa8hpjbvY1+qvCl8k+8Tq34xJgg==}
-    engines: {node: '>=18'}
-
-  fill-range@7.1.1:
-    resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==}
-    engines: {node: '>=8'}
-
-  flat-cache@5.0.0:
-    resolution: {integrity: sha512-JrqFmyUl2PnPi1OvLyTVHnQvwQ0S+e6lGSwu8OkAZlSaNIZciTY2H/cOOROxsBA1m/LZNHDsqAgDZt6akWcjsQ==}
-    engines: {node: '>=18'}
-
-  flatted@3.3.3:
-    resolution: {integrity: sha512-GX+ysw4PBCz0PzosHDepZGANEuFCMLrnRTiEy9McGjmkCQYwRq4A/X786G/fjM/+OjsWSU1ZrY5qyARZmO/uwg==}
-
-  gensequence@7.0.0:
-    resolution: {integrity: sha512-47Frx13aZh01afHJTB3zTtKIlFI6vWY+MYCN9Qpew6i52rfKjnhCF/l1YlC8UmEMvvntZZ6z4PiCcmyuedR2aQ==}
-    engines: {node: '>=18'}
-
-  get-stdin@9.0.0:
-    resolution: {integrity: sha512-dVKBjfWisLAicarI2Sf+JuBE/DghV4UzNAVe9yhEJuzeREd3JhOTE9cUaJTeSa77fsbQUK3pcOpJfM59+VKZaA==}
-    engines: {node: '>=12'}
-
-  glob-parent@5.1.2:
-    resolution: {integrity: sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==}
-    engines: {node: '>= 6'}
-
-  global-directory@4.0.1:
-    resolution: {integrity: sha512-wHTUcDUoZ1H5/0iVqEudYW4/kAlN5cZ3j/bXn0Dpbizl9iaUVeWSHqiOjsgk6OW2bkLclbBjzewBz6weQ1zA2Q==}
-    engines: {node: '>=18'}
-
-  globby@14.0.2:
-    resolution: {integrity: sha512-s3Fq41ZVh7vbbe2PN3nrW7yC7U7MFVc5c98/iTl9c2GawNMKx/J648KQRW6WKkuU8GIbbh2IXfIRQjOZnXcTnw==}
-    engines: {node: '>=18'}
-
-  has-own-prop@2.0.0:
-    resolution: {integrity: sha512-Pq0h+hvsVm6dDEa8x82GnLSYHOzNDt7f0ddFa3FqcQlgzEiptPqL+XrOJNavjOzSYiYWIrgeVYYgGlLmnxwilQ==}
-    engines: {node: '>=8'}
-
-  ignore@5.3.2:
-    resolution: {integrity: sha512-hsBTNUqQTDwkWtcdYI2i06Y/nUBEsNEDJKjWdigLvegy8kDuJAS8uRlpkkcQpyEXL0Z/pjDy5HBmMjRCJ2gq+g==}
-    engines: {node: '>= 4'}
-
-  import-fresh@3.3.1:
-    resolution: {integrity: sha512-TR3KfrTZTYLPB6jUjfx6MF9WcWrHL9su5TObK4ZkYgBdWKPOFoSoQIdEuTuR82pmtxH2spWG9h6etwfr1pLBqQ==}
-    engines: {node: '>=6'}
-
-  import-meta-resolve@4.1.0:
-    resolution: {integrity: sha512-I6fiaX09Xivtk+THaMfAwnA3MVA5Big1WHF1Dfx9hFuvNIWpXnorlkzhcQf6ehrqQiiZECRt1poOAkPmer3ruw==}
-
-  ini@4.1.1:
-    resolution: {integrity: sha512-QQnnxNyfvmHFIsj7gkPcYymR8Jdw/o7mp5ZFihxn6h8Ci6fh3Dx4E1gPjpQEpIuPo9XVNY/ZUwh4BPMjGyL01g==}
-    engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
-
-  is-alphabetical@2.0.1:
-    resolution: {integrity: sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==}
-
-  is-alphanumerical@2.0.1:
-    resolution: {integrity: sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==}
-
-  is-decimal@2.0.1:
-    resolution: {integrity: sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==}
-
-  is-extglob@2.1.1:
-    resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==}
-    engines: {node: '>=0.10.0'}
-
-  is-glob@4.0.3:
-    resolution: {integrity: sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==}
-    engines: {node: '>=0.10.0'}
-
-  is-hexadecimal@2.0.1:
-    resolution: {integrity: sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==}
-
-  is-number@7.0.0:
-    resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==}
-    engines: {node: '>=0.12.0'}
-
-  js-yaml@4.1.0:
-    resolution: {integrity: sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==}
-    hasBin: true
-
-  json-buffer@3.0.1:
-    resolution: {integrity: sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==}
-
-  jsonc-parser@3.3.1:
-    resolution: {integrity: sha512-HUgH65KyejrUFPvHFPbqOY0rsFip3Bo5wb4ngvdi1EpCYWUQDC5V+Y7mZws+DLkr4M//zQJoanu1SP+87Dv1oQ==}
-
-  katex@0.16.21:
-    resolution: {integrity: sha512-XvqR7FgOHtWupfMiigNzmh+MgUVmDGU2kXZm899ZkPfcuoPuFxyHmXsgATDpFZDAXCI8tvinaVcDo8PIIJSo4A==}
-    hasBin: true
-
-  keyv@4.5.4:
-    resolution: {integrity: sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==}
-
-  linkify-it@5.0.0:
-    resolution: {integrity: sha512-5aHCbzQRADcdP+ATqnDuhhJ/MRIqDkZX5pyjFHRRysS8vZ5AbqGEoFIb6pYHPZ+L/OC2Lc+xT8uHVVR5CAK/wQ==}
-
-  markdown-it@14.1.0:
-    resolution: {integrity: sha512-a54IwgWPaeBCAAsv13YgmALOF1elABB08FxO9i+r4VFk5Vl4pKokRPeX8u5TCgSsPi6ec1otfLjdOpVcgbpshg==}
-    hasBin: true
-
-  markdownlint-cli2-formatter-default@0.0.5:
-    resolution: {integrity: sha512-4XKTwQ5m1+Txo2kuQ3Jgpo/KmnG+X90dWt4acufg6HVGadTUG5hzHF/wssp9b5MBYOMCnZ9RMPaU//uHsszF8Q==}
-    peerDependencies:
-      markdownlint-cli2: '>=0.0.4'
-
-  markdownlint-cli2@0.17.2:
-    resolution: {integrity: sha512-XH06ZOi8wCrtOSSj3p8y3yJzwgzYOSa7lglNyS3fP05JPRzRGyjauBb5UvlLUSCGysMmULS1moxdRHHudV+g/Q==}
-    engines: {node: '>=18'}
-    hasBin: true
-
-  markdownlint@0.37.4:
-    resolution: {integrity: sha512-u00joA/syf3VhWh6/ybVFkib5Zpj2e5KB/cfCei8fkSRuums6nyisTWGqjTWIOFoFwuXoTBQQiqlB4qFKp8ncQ==}
-    engines: {node: '>=18'}
-
-  mdurl@2.0.0:
-    resolution: {integrity: sha512-Lf+9+2r+Tdp5wXDXC4PcIBjTDtq4UKjCPMQhKIuzpJNW0b96kVqSwW0bT7FhRSfmAiFYgP+SCRvdrDozfh0U5w==}
-
-  merge2@1.4.1:
-    resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==}
-    engines: {node: '>= 8'}
-
-  micromark-core-commonmark@2.0.2:
-    resolution: {integrity: sha512-FKjQKbxd1cibWMM1P9N+H8TwlgGgSkWZMmfuVucLCHaYqeSvJ0hFeHsIa65pA2nYbes0f8LDHPMrd9X7Ujxg9w==}
-
-  micromark-extension-directive@3.0.2:
-    resolution: {integrity: sha512-wjcXHgk+PPdmvR58Le9d7zQYWy+vKEU9Se44p2CrCDPiLr2FMyiT4Fyb5UFKFC66wGB3kPlgD7q3TnoqPS7SZA==}
-
-  micromark-extension-gfm-autolink-literal@2.1.0:
-    resolution: {integrity: sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==}
-
-  micromark-extension-gfm-footnote@2.1.0:
-    resolution: {integrity: sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==}
-
-  micromark-extension-gfm-table@2.1.0:
-    resolution: {integrity: sha512-Ub2ncQv+fwD70/l4ou27b4YzfNaCJOvyX4HxXU15m7mpYY+rjuWzsLIPZHJL253Z643RpbcP1oeIJlQ/SKW67g==}
-
-  micromark-extension-math@3.1.0:
-    resolution: {integrity: sha512-lvEqd+fHjATVs+2v/8kg9i5Q0AP2k85H0WUOwpIVvUML8BapsMvh1XAogmQjOCsLpoKRCVQqEkQBB3NhVBcsOg==}
-
-  micromark-factory-destination@2.0.1:
-    resolution: {integrity: sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==}
-
-  micromark-factory-label@2.0.1:
-    resolution: {integrity: sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==}
-
-  micromark-factory-space@2.0.1:
-    resolution: {integrity: sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==}
-
-  micromark-factory-title@2.0.1:
-    resolution: {integrity: sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==}
-
-  micromark-factory-whitespace@2.0.1:
-    resolution: {integrity: sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==}
-
-  micromark-util-character@2.1.1:
-    resolution: {integrity: sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==}
-
-  micromark-util-chunked@2.0.1:
-    resolution: {integrity: sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==}
-
-  micromark-util-classify-character@2.0.1:
-    resolution: {integrity: sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==}
-
-  micromark-util-combine-extensions@2.0.1:
-    resolution: {integrity: sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==}
-
-  micromark-util-decode-numeric-character-reference@2.0.2:
-    resolution: {integrity: sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==}
-
-  micromark-util-encode@2.0.1:
-    resolution: {integrity: sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==}
-
-  micromark-util-html-tag-name@2.0.1:
-    resolution: {integrity: sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==}
-
-  micromark-util-normalize-identifier@2.0.1:
-    resolution: {integrity: sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==}
-
-  micromark-util-resolve-all@2.0.1:
-    resolution: {integrity: sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==}
-
-  micromark-util-sanitize-uri@2.0.1:
-    resolution: {integrity: sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==}
-
-  micromark-util-subtokenize@2.1.0:
-    resolution: {integrity: sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==}
-
-  micromark-util-symbol@2.0.1:
-    resolution: {integrity: sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==}
-
-  micromark-util-types@2.0.1:
-    resolution: {integrity: sha512-534m2WhVTddrcKVepwmVEVnUAmtrx9bfIjNoQHRqfnvdaHQiFytEhJoTgpWJvDEXCO5gLTQh3wYC1PgOJA4NSQ==}
-
-  micromark@4.0.1:
-    resolution: {integrity: sha512-eBPdkcoCNvYcxQOAKAlceo5SNdzZWfF+FcSupREAzdAh9rRmE239CEQAiTwIgblwnoM8zzj35sZ5ZwvSEOF6Kw==}
-
-  micromatch@4.0.8:
-    resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==}
-    engines: {node: '>=8.6'}
-
-  ms@2.1.3:
-    resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
-
-  parent-module@1.0.1:
-    resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==}
-    engines: {node: '>=6'}
-
-  parent-module@2.0.0:
-    resolution: {integrity: sha512-uo0Z9JJeWzv8BG+tRcapBKNJ0dro9cLyczGzulS6EfeyAdeC9sbojtW6XwvYxJkEne9En+J2XEl4zyglVeIwFg==}
-    engines: {node: '>=8'}
-
-  parse-entities@4.0.2:
-    resolution: {integrity: sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==}
-
-  path-type@5.0.0:
-    resolution: {integrity: sha512-5HviZNaZcfqP95rwpv+1HDgUamezbqdSYTyzjTvwtJSnIH+3vnbmWsItli8OFEndS984VT55M3jduxZbX351gg==}
-    engines: {node: '>=12'}
-
-  picomatch@2.3.1:
-    resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==}
-    engines: {node: '>=8.6'}
-
-  picomatch@4.0.2:
-    resolution: {integrity: sha512-M7BAV6Rlcy5u+m6oPhAPFgJTzAioX/6B0DxyvDlo9l8+T3nLKbrczg2WLUyzd45L8RqfUMyGPzekbMvX2Ldkwg==}
-    engines: {node: '>=12'}
-
-  punycode.js@2.3.1:
-    resolution: {integrity: sha512-uxFIHU0YlHYhDQtV4R9J6a52SLx28BCjT+4ieh7IGbgwVJWO+km431c4yRlREUAsAmt/uMjQUyQHNEPf0M39CA==}
-    engines: {node: '>=6'}
-
-  queue-microtask@1.2.3:
-    resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
-
-  repeat-string@1.6.1:
-    resolution: {integrity: sha512-PV0dzCYDNfRi1jCDbJzpW7jNNDRuCOG/jI5ctQcGKt/clZD+YcPS3yIlWuTJMmESC8aevCFmWJy5wjAFgNqN6w==}
-    engines: {node: '>=0.10'}
-
-  resolve-from@4.0.0:
-    resolution: {integrity: sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==}
-    engines: {node: '>=4'}
-
-  resolve-from@5.0.0:
-    resolution: {integrity: sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw==}
-    engines: {node: '>=8'}
-
-  reusify@1.1.0:
-    resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==}
-    engines: {iojs: '>=1.0.0', node: '>=0.10.0'}
-
-  run-parallel@1.2.0:
-    resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==}
-
-  semver@7.7.1:
-    resolution: {integrity: sha512-hlq8tAfn0m/61p4BVRcPzIGr6LKiMwo4VM6dGi6pt4qcRkmNzTcWq6eCEjEh+qXjkMDvPlOFFSGwQjoEa6gyMA==}
-    engines: {node: '>=10'}
-    hasBin: true
-
-  slash@5.1.0:
-    resolution: {integrity: sha512-ZA6oR3T/pEyuqwMgAKT0/hAv8oAXckzbkmR0UkUosQ+Mc4RxGoJkRmwHgHufaenlyAgE1Mxgpdcrf75y6XcnDg==}
-    engines: {node: '>=14.16'}
-
-  tinyglobby@0.2.12:
-    resolution: {integrity: sha512-qkf4trmKSIiMTs/E63cxH+ojC2unam7rJ0WrauAzpT3ECNTxGRMlaXxVbfxMUC/w0LaYk6jQ4y/nGR9uBO3tww==}
-    engines: {node: '>=12.0.0'}
-
-  to-regex-range@5.0.1:
-    resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
-    engines: {node: '>=8.0'}
-
-  uc.micro@2.1.0:
-    resolution: {integrity: sha512-ARDJmphmdvUk6Glw7y9DQ2bFkKBHwQHLi2lsaH6PPmz/Ka9sFOBsBluozhDltWmnv9u/cF6Rt87znRTPV+yp/A==}
-
-  unicorn-magic@0.1.0:
-    resolution: {integrity: sha512-lRfVq8fE8gz6QMBuDM6a+LO3IAzTi05H6gCVaUpir2E1Rwpo4ZUog45KpNXKC/Mn3Yb9UDuHumeFTo9iV/D9FQ==}
-    engines: {node: '>=18'}
-
-  vscode-languageserver-textdocument@1.0.12:
-    resolution: {integrity: sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA==}
-
-  vscode-uri@3.1.0:
-    resolution: {integrity: sha512-/BpdSx+yCQGnCvecbyXdxHDkuk55/G3xwnC0GqY4gmQ3j+A+g8kzzgB4Nk/SINjqn6+waqw3EgbVF2QKExkRxQ==}
-
-  xdg-basedir@5.1.0:
-    resolution: {integrity: sha512-GCPAHLvrIH13+c0SuacwvRYj2SxJXQ4kaVTT5xgL3kPrz56XxkF21IGhjSE1+W0aw7gpBWRGXLCPnPby6lSpmQ==}
-    engines: {node: '>=12'}
-
-  yaml@2.7.0:
-    resolution: {integrity: sha512-+hSoy/QHluxmC9kCIJyL/uyFmLmc+e5CFR5Wa+bpIhIj85LVb9ZH2nVnqrHoSvKogwODv0ClqZkmiSSaIH5LTA==}
-    engines: {node: '>= 14'}
-    hasBin: true
-
-snapshots:
-
-  '@cspell/cspell-bundled-dicts@8.18.0':
-    dependencies:
-      '@cspell/dict-ada': 4.1.0
-      '@cspell/dict-al': 1.1.0
-      '@cspell/dict-aws': 4.0.9
-      '@cspell/dict-bash': 4.2.0
-      '@cspell/dict-companies': 3.1.14
-      '@cspell/dict-cpp': 6.0.6
-      '@cspell/dict-cryptocurrencies': 5.0.4
-      '@cspell/dict-csharp': 4.0.6
-      '@cspell/dict-css': 4.0.17
-      '@cspell/dict-dart': 2.3.0
-      '@cspell/dict-data-science': 2.0.7
-      '@cspell/dict-django': 4.1.4
-      '@cspell/dict-docker': 1.1.12
-      '@cspell/dict-dotnet': 5.0.9
-      '@cspell/dict-elixir': 4.0.7
-      '@cspell/dict-en-common-misspellings': 2.0.10
-      '@cspell/dict-en-gb': 1.1.33
-      '@cspell/dict-en_us': 4.3.35
-      '@cspell/dict-filetypes': 3.0.11
-      '@cspell/dict-flutter': 1.1.0
-      '@cspell/dict-fonts': 4.0.4
-      '@cspell/dict-fsharp': 1.1.0
-      '@cspell/dict-fullstack': 3.2.6
-      '@cspell/dict-gaming-terms': 1.1.0
-      '@cspell/dict-git': 3.0.4
-      '@cspell/dict-golang': 6.0.19
-      '@cspell/dict-google': 1.0.8
-      '@cspell/dict-haskell': 4.0.5
-      '@cspell/dict-html': 4.0.11
-      '@cspell/dict-html-symbol-entities': 4.0.3
-      '@cspell/dict-java': 5.0.11
-      '@cspell/dict-julia': 1.1.0
-      '@cspell/dict-k8s': 1.0.10
-      '@cspell/dict-kotlin': 1.1.0
-      '@cspell/dict-latex': 4.0.3
-      '@cspell/dict-lorem-ipsum': 4.0.4
-      '@cspell/dict-lua': 4.0.7
-      '@cspell/dict-makefile': 1.0.4
-      '@cspell/dict-markdown': 2.0.9(@cspell/dict-css@4.0.17)(@cspell/dict-html-symbol-entities@4.0.3)(@cspell/dict-html@4.0.11)(@cspell/dict-typescript@3.2.0)
-      '@cspell/dict-monkeyc': 1.0.10
-      '@cspell/dict-node': 5.0.6
-      '@cspell/dict-npm': 5.1.31
-      '@cspell/dict-php': 4.0.14
-      '@cspell/dict-powershell': 5.0.14
-      '@cspell/dict-public-licenses': 2.0.13
-      '@cspell/dict-python': 4.2.16
-      '@cspell/dict-r': 2.1.0
-      '@cspell/dict-ruby': 5.0.8
-      '@cspell/dict-rust': 4.0.11
-      '@cspell/dict-scala': 5.0.7
-      '@cspell/dict-shell': 1.1.0
-      '@cspell/dict-software-terms': 5.0.2
-      '@cspell/dict-sql': 2.2.0
-      '@cspell/dict-svelte': 1.0.6
-      '@cspell/dict-swift': 2.0.5
-      '@cspell/dict-terraform': 1.1.1
-      '@cspell/dict-typescript': 3.2.0
-      '@cspell/dict-vue': 3.0.4
-
-  '@cspell/cspell-json-reporter@8.18.0':
-    dependencies:
-      '@cspell/cspell-types': 8.18.0
-
-  '@cspell/cspell-pipe@8.18.0': {}
-
-  '@cspell/cspell-resolver@8.18.0':
-    dependencies:
-      global-directory: 4.0.1
-
-  '@cspell/cspell-service-bus@8.18.0': {}
-
-  '@cspell/cspell-types@8.18.0': {}
-
-  '@cspell/dict-ada@4.1.0': {}
-
-  '@cspell/dict-al@1.1.0': {}
-
-  '@cspell/dict-aws@4.0.9': {}
-
-  '@cspell/dict-bash@4.2.0':
-    dependencies:
-      '@cspell/dict-shell': 1.1.0
-
-  '@cspell/dict-companies@3.1.14': {}
-
-  '@cspell/dict-cpp@6.0.6': {}
-
-  '@cspell/dict-cryptocurrencies@5.0.4': {}
-
-  '@cspell/dict-csharp@4.0.6': {}
-
-  '@cspell/dict-css@4.0.17': {}
-
-  '@cspell/dict-dart@2.3.0': {}
-
-  '@cspell/dict-data-science@2.0.7': {}
-
-  '@cspell/dict-django@4.1.4': {}
-
-  '@cspell/dict-docker@1.1.12': {}
-
-  '@cspell/dict-dotnet@5.0.9': {}
-
-  '@cspell/dict-elixir@4.0.7': {}
-
-  '@cspell/dict-en-common-misspellings@2.0.10': {}
-
-  '@cspell/dict-en-gb@1.1.33': {}
-
-  '@cspell/dict-en_us@4.3.35': {}
-
-  '@cspell/dict-filetypes@3.0.11': {}
-
-  '@cspell/dict-flutter@1.1.0': {}
-
-  '@cspell/dict-fonts@4.0.4': {}
-
-  '@cspell/dict-fsharp@1.1.0': {}
-
-  '@cspell/dict-fullstack@3.2.6': {}
-
-  '@cspell/dict-gaming-terms@1.1.0': {}
-
-  '@cspell/dict-git@3.0.4': {}
-
-  '@cspell/dict-golang@6.0.19': {}
-
-  '@cspell/dict-google@1.0.8': {}
-
-  '@cspell/dict-haskell@4.0.5': {}
-
-  '@cspell/dict-html-symbol-entities@4.0.3': {}
-
-  '@cspell/dict-html@4.0.11': {}
-
-  '@cspell/dict-java@5.0.11': {}
-
-  '@cspell/dict-julia@1.1.0': {}
-
-  '@cspell/dict-k8s@1.0.10': {}
-
-  '@cspell/dict-kotlin@1.1.0': {}
-
-  '@cspell/dict-latex@4.0.3': {}
-
-  '@cspell/dict-lorem-ipsum@4.0.4': {}
-
-  '@cspell/dict-lua@4.0.7': {}
-
-  '@cspell/dict-makefile@1.0.4': {}
-
-  '@cspell/dict-markdown@2.0.9(@cspell/dict-css@4.0.17)(@cspell/dict-html-symbol-entities@4.0.3)(@cspell/dict-html@4.0.11)(@cspell/dict-typescript@3.2.0)':
-    dependencies:
-      '@cspell/dict-css': 4.0.17
-      '@cspell/dict-html': 4.0.11
-      '@cspell/dict-html-symbol-entities': 4.0.3
-      '@cspell/dict-typescript': 3.2.0
-
-  '@cspell/dict-monkeyc@1.0.10': {}
-
-  '@cspell/dict-node@5.0.6': {}
-
-  '@cspell/dict-npm@5.1.31': {}
-
-  '@cspell/dict-php@4.0.14': {}
-
-  '@cspell/dict-powershell@5.0.14': {}
-
-  '@cspell/dict-public-licenses@2.0.13': {}
-
-  '@cspell/dict-python@4.2.16':
-    dependencies:
-      '@cspell/dict-data-science': 2.0.7
-
-  '@cspell/dict-r@2.1.0': {}
-
-  '@cspell/dict-ruby@5.0.8': {}
-
-  '@cspell/dict-rust@4.0.11': {}
-
-  '@cspell/dict-scala@5.0.7': {}
-
-  '@cspell/dict-shell@1.1.0': {}
-
-  '@cspell/dict-software-terms@5.0.2': {}
-
-  '@cspell/dict-sql@2.2.0': {}
-
-  '@cspell/dict-svelte@1.0.6': {}
-
-  '@cspell/dict-swift@2.0.5': {}
-
-  '@cspell/dict-terraform@1.1.1': {}
-
-  '@cspell/dict-typescript@3.2.0': {}
-
-  '@cspell/dict-vue@3.0.4': {}
-
-  '@cspell/dynamic-import@8.18.0':
-    dependencies:
-      '@cspell/url': 8.18.0
-      import-meta-resolve: 4.1.0
-
-  '@cspell/filetypes@8.18.0': {}
-
-  '@cspell/strong-weak-map@8.18.0': {}
-
-  '@cspell/url@8.18.0': {}
-
-  '@nodelib/fs.scandir@2.1.5':
-    dependencies:
-      '@nodelib/fs.stat': 2.0.5
-      run-parallel: 1.2.0
-
-  '@nodelib/fs.stat@2.0.5': {}
-
-  '@nodelib/fs.walk@1.2.8':
-    dependencies:
-      '@nodelib/fs.scandir': 2.1.5
-      fastq: 1.19.1
-
-  '@sindresorhus/merge-streams@2.3.0': {}
-
-  '@types/debug@4.1.12':
-    dependencies:
-      '@types/ms': 2.1.0
-
-  '@types/katex@0.16.7': {}
-
-  '@types/ms@2.1.0': {}
-
-  '@types/unist@2.0.11': {}
-
-  argparse@2.0.1: {}
-
-  array-timsort@1.0.3: {}
-
-  braces@3.0.3:
-    dependencies:
-      fill-range: 7.1.1
-
-  callsites@3.1.0: {}
-
-  chalk-template@1.1.0:
-    dependencies:
-      chalk: 5.4.1
-
-  chalk@5.4.1: {}
-
-  character-entities-legacy@3.0.0: {}
-
-  character-entities@2.0.2: {}
-
-  character-reference-invalid@2.0.1: {}
-
-  clear-module@4.1.2:
-    dependencies:
-      parent-module: 2.0.0
-      resolve-from: 5.0.0
-
-  commander@13.1.0: {}
-
-  commander@8.3.0: {}
-
-  comment-json@4.2.5:
-    dependencies:
-      array-timsort: 1.0.3
-      core-util-is: 1.0.3
-      esprima: 4.0.1
-      has-own-prop: 2.0.0
-      repeat-string: 1.6.1
-
-  core-util-is@1.0.3: {}
-
-  cspell-config-lib@8.18.0:
-    dependencies:
-      '@cspell/cspell-types': 8.18.0
-      comment-json: 4.2.5
-      yaml: 2.7.0
-
-  cspell-dictionary@8.18.0:
-    dependencies:
-      '@cspell/cspell-pipe': 8.18.0
-      '@cspell/cspell-types': 8.18.0
-      cspell-trie-lib: 8.18.0
-      fast-equals: 5.2.2
-
-  cspell-gitignore@8.18.0:
-    dependencies:
-      '@cspell/url': 8.18.0
-      cspell-glob: 8.18.0
-      cspell-io: 8.18.0
-
-  cspell-glob@8.18.0:
-    dependencies:
-      '@cspell/url': 8.18.0
-      micromatch: 4.0.8
-
-  cspell-grammar@8.18.0:
-    dependencies:
-      '@cspell/cspell-pipe': 8.18.0
-      '@cspell/cspell-types': 8.18.0
-
-  cspell-io@8.18.0:
-    dependencies:
-      '@cspell/cspell-service-bus': 8.18.0
-      '@cspell/url': 8.18.0
-
-  cspell-lib@8.18.0:
-    dependencies:
-      '@cspell/cspell-bundled-dicts': 8.18.0
-      '@cspell/cspell-pipe': 8.18.0
-      '@cspell/cspell-resolver': 8.18.0
-      '@cspell/cspell-types': 8.18.0
-      '@cspell/dynamic-import': 8.18.0
-      '@cspell/filetypes': 8.18.0
-      '@cspell/strong-weak-map': 8.18.0
-      '@cspell/url': 8.18.0
-      clear-module: 4.1.2
-      comment-json: 4.2.5
-      cspell-config-lib: 8.18.0
-      cspell-dictionary: 8.18.0
-      cspell-glob: 8.18.0
-      cspell-grammar: 8.18.0
-      cspell-io: 8.18.0
-      cspell-trie-lib: 8.18.0
-      env-paths: 3.0.0
-      fast-equals: 5.2.2
-      gensequence: 7.0.0
-      import-fresh: 3.3.1
-      resolve-from: 5.0.0
-      vscode-languageserver-textdocument: 1.0.12
-      vscode-uri: 3.1.0
-      xdg-basedir: 5.1.0
-
-  cspell-trie-lib@8.18.0:
-    dependencies:
-      '@cspell/cspell-pipe': 8.18.0
-      '@cspell/cspell-types': 8.18.0
-      gensequence: 7.0.0
-
-  cspell@8.18.0:
-    dependencies:
-      '@cspell/cspell-json-reporter': 8.18.0
-      '@cspell/cspell-pipe': 8.18.0
-      '@cspell/cspell-types': 8.18.0
-      '@cspell/dynamic-import': 8.18.0
-      '@cspell/url': 8.18.0
-      chalk: 5.4.1
-      chalk-template: 1.1.0
-      commander: 13.1.0
-      cspell-dictionary: 8.18.0
-      cspell-gitignore: 8.18.0
-      cspell-glob: 8.18.0
-      cspell-io: 8.18.0
-      cspell-lib: 8.18.0
-      fast-json-stable-stringify: 2.1.0
-      file-entry-cache: 9.1.0
-      get-stdin: 9.0.0
-      semver: 7.7.1
-      tinyglobby: 0.2.12
-
-  debug@4.4.0:
-    dependencies:
-      ms: 2.1.3
-
-  decode-named-character-reference@1.1.0:
-    dependencies:
-      character-entities: 2.0.2
-
-  dequal@2.0.3: {}
-
-  devlop@1.1.0:
-    dependencies:
-      dequal: 2.0.3
-
-  entities@4.5.0: {}
-
-  env-paths@3.0.0: {}
-
-  esprima@4.0.1: {}
-
-  fast-equals@5.2.2: {}
-
-  fast-glob@3.3.3:
-    dependencies:
-      '@nodelib/fs.stat': 2.0.5
-      '@nodelib/fs.walk': 1.2.8
-      glob-parent: 5.1.2
-      merge2: 1.4.1
-      micromatch: 4.0.8
-
-  fast-json-stable-stringify@2.1.0: {}
-
-  fastq@1.19.1:
-    dependencies:
-      reusify: 1.1.0
-
-  fdir@6.4.3(picomatch@4.0.2):
-    optionalDependencies:
-      picomatch: 4.0.2
-
-  file-entry-cache@9.1.0:
-    dependencies:
-      flat-cache: 5.0.0
-
-  fill-range@7.1.1:
-    dependencies:
-      to-regex-range: 5.0.1
-
-  flat-cache@5.0.0:
-    dependencies:
-      flatted: 3.3.3
-      keyv: 4.5.4
-
-  flatted@3.3.3: {}
-
-  gensequence@7.0.0: {}
-
-  get-stdin@9.0.0: {}
-
-  glob-parent@5.1.2:
-    dependencies:
-      is-glob: 4.0.3
-
-  global-directory@4.0.1:
-    dependencies:
-      ini: 4.1.1
-
-  globby@14.0.2:
-    dependencies:
-      '@sindresorhus/merge-streams': 2.3.0
-      fast-glob: 3.3.3
-      ignore: 5.3.2
-      path-type: 5.0.0
-      slash: 5.1.0
-      unicorn-magic: 0.1.0
-
-  has-own-prop@2.0.0: {}
-
-  ignore@5.3.2: {}
-
-  import-fresh@3.3.1:
-    dependencies:
-      parent-module: 1.0.1
-      resolve-from: 4.0.0
-
-  import-meta-resolve@4.1.0: {}
-
-  ini@4.1.1: {}
-
-  is-alphabetical@2.0.1: {}
-
-  is-alphanumerical@2.0.1:
-    dependencies:
-      is-alphabetical: 2.0.1
-      is-decimal: 2.0.1
-
-  is-decimal@2.0.1: {}
-
-  is-extglob@2.1.1: {}
-
-  is-glob@4.0.3:
-    dependencies:
-      is-extglob: 2.1.1
-
-  is-hexadecimal@2.0.1: {}
-
-  is-number@7.0.0: {}
-
-  js-yaml@4.1.0:
-    dependencies:
-      argparse: 2.0.1
-
-  json-buffer@3.0.1: {}
-
-  jsonc-parser@3.3.1: {}
-
-  katex@0.16.21:
-    dependencies:
-      commander: 8.3.0
-
-  keyv@4.5.4:
-    dependencies:
-      json-buffer: 3.0.1
-
-  linkify-it@5.0.0:
-    dependencies:
-      uc.micro: 2.1.0
-
-  markdown-it@14.1.0:
-    dependencies:
-      argparse: 2.0.1
-      entities: 4.5.0
-      linkify-it: 5.0.0
-      mdurl: 2.0.0
-      punycode.js: 2.3.1
-      uc.micro: 2.1.0
-
-  markdownlint-cli2-formatter-default@0.0.5(markdownlint-cli2@0.17.2):
-    dependencies:
-      markdownlint-cli2: 0.17.2
-
-  markdownlint-cli2@0.17.2:
-    dependencies:
-      globby: 14.0.2
-      js-yaml: 4.1.0
-      jsonc-parser: 3.3.1
-      markdownlint: 0.37.4
-      markdownlint-cli2-formatter-default: 0.0.5(markdownlint-cli2@0.17.2)
-      micromatch: 4.0.8
-    transitivePeerDependencies:
-      - supports-color
-
-  markdownlint@0.37.4:
-    dependencies:
-      markdown-it: 14.1.0
-      micromark: 4.0.1
-      micromark-core-commonmark: 2.0.2
-      micromark-extension-directive: 3.0.2
-      micromark-extension-gfm-autolink-literal: 2.1.0
-      micromark-extension-gfm-footnote: 2.1.0
-      micromark-extension-gfm-table: 2.1.0
-      micromark-extension-math: 3.1.0
-      micromark-util-types: 2.0.1
-    transitivePeerDependencies:
-      - supports-color
-
-  mdurl@2.0.0: {}
-
-  merge2@1.4.1: {}
-
-  micromark-core-commonmark@2.0.2:
-    dependencies:
-      decode-named-character-reference: 1.1.0
-      devlop: 1.1.0
-      micromark-factory-destination: 2.0.1
-      micromark-factory-label: 2.0.1
-      micromark-factory-space: 2.0.1
-      micromark-factory-title: 2.0.1
-      micromark-factory-whitespace: 2.0.1
-      micromark-util-character: 2.1.1
-      micromark-util-chunked: 2.0.1
-      micromark-util-classify-character: 2.0.1
-      micromark-util-html-tag-name: 2.0.1
-      micromark-util-normalize-identifier: 2.0.1
-      micromark-util-resolve-all: 2.0.1
-      micromark-util-subtokenize: 2.1.0
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-extension-directive@3.0.2:
-    dependencies:
-      devlop: 1.1.0
-      micromark-factory-space: 2.0.1
-      micromark-factory-whitespace: 2.0.1
-      micromark-util-character: 2.1.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-      parse-entities: 4.0.2
-
-  micromark-extension-gfm-autolink-literal@2.1.0:
-    dependencies:
-      micromark-util-character: 2.1.1
-      micromark-util-sanitize-uri: 2.0.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-extension-gfm-footnote@2.1.0:
-    dependencies:
-      devlop: 1.1.0
-      micromark-core-commonmark: 2.0.2
-      micromark-factory-space: 2.0.1
-      micromark-util-character: 2.1.1
-      micromark-util-normalize-identifier: 2.0.1
-      micromark-util-sanitize-uri: 2.0.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-extension-gfm-table@2.1.0:
-    dependencies:
-      devlop: 1.1.0
-      micromark-factory-space: 2.0.1
-      micromark-util-character: 2.1.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-extension-math@3.1.0:
-    dependencies:
-      '@types/katex': 0.16.7
-      devlop: 1.1.0
-      katex: 0.16.21
-      micromark-factory-space: 2.0.1
-      micromark-util-character: 2.1.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-factory-destination@2.0.1:
-    dependencies:
-      micromark-util-character: 2.1.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-factory-label@2.0.1:
-    dependencies:
-      devlop: 1.1.0
-      micromark-util-character: 2.1.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-factory-space@2.0.1:
-    dependencies:
-      micromark-util-character: 2.1.1
-      micromark-util-types: 2.0.1
-
-  micromark-factory-title@2.0.1:
-    dependencies:
-      micromark-factory-space: 2.0.1
-      micromark-util-character: 2.1.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-factory-whitespace@2.0.1:
-    dependencies:
-      micromark-factory-space: 2.0.1
-      micromark-util-character: 2.1.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-util-character@2.1.1:
-    dependencies:
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-util-chunked@2.0.1:
-    dependencies:
-      micromark-util-symbol: 2.0.1
-
-  micromark-util-classify-character@2.0.1:
-    dependencies:
-      micromark-util-character: 2.1.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-util-combine-extensions@2.0.1:
-    dependencies:
-      micromark-util-chunked: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-util-decode-numeric-character-reference@2.0.2:
-    dependencies:
-      micromark-util-symbol: 2.0.1
-
-  micromark-util-encode@2.0.1: {}
-
-  micromark-util-html-tag-name@2.0.1: {}
-
-  micromark-util-normalize-identifier@2.0.1:
-    dependencies:
-      micromark-util-symbol: 2.0.1
-
-  micromark-util-resolve-all@2.0.1:
-    dependencies:
-      micromark-util-types: 2.0.1
-
-  micromark-util-sanitize-uri@2.0.1:
-    dependencies:
-      micromark-util-character: 2.1.1
-      micromark-util-encode: 2.0.1
-      micromark-util-symbol: 2.0.1
-
-  micromark-util-subtokenize@2.1.0:
-    dependencies:
-      devlop: 1.1.0
-      micromark-util-chunked: 2.0.1
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-
-  micromark-util-symbol@2.0.1: {}
-
-  micromark-util-types@2.0.1: {}
-
-  micromark@4.0.1:
-    dependencies:
-      '@types/debug': 4.1.12
-      debug: 4.4.0
-      decode-named-character-reference: 1.1.0
-      devlop: 1.1.0
-      micromark-core-commonmark: 2.0.2
-      micromark-factory-space: 2.0.1
-      micromark-util-character: 2.1.1
-      micromark-util-chunked: 2.0.1
-      micromark-util-combine-extensions: 2.0.1
-      micromark-util-decode-numeric-character-reference: 2.0.2
-      micromark-util-encode: 2.0.1
-      micromark-util-normalize-identifier: 2.0.1
-      micromark-util-resolve-all: 2.0.1
-      micromark-util-sanitize-uri: 2.0.1
-      micromark-util-subtokenize: 2.1.0
-      micromark-util-symbol: 2.0.1
-      micromark-util-types: 2.0.1
-    transitivePeerDependencies:
-      - supports-color
-
-  micromatch@4.0.8:
-    dependencies:
-      braces: 3.0.3
-      picomatch: 2.3.1
-
-  ms@2.1.3: {}
-
-  parent-module@1.0.1:
-    dependencies:
-      callsites: 3.1.0
-
-  parent-module@2.0.0:
-    dependencies:
-      callsites: 3.1.0
-
-  parse-entities@4.0.2:
-    dependencies:
-      '@types/unist': 2.0.11
-      character-entities-legacy: 3.0.0
-      character-reference-invalid: 2.0.1
-      decode-named-character-reference: 1.1.0
-      is-alphanumerical: 2.0.1
-      is-decimal: 2.0.1
-      is-hexadecimal: 2.0.1
-
-  path-type@5.0.0: {}
-
-  picomatch@2.3.1: {}
-
-  picomatch@4.0.2: {}
-
-  punycode.js@2.3.1: {}
-
-  queue-microtask@1.2.3: {}
-
-  repeat-string@1.6.1: {}
-
-  resolve-from@4.0.0: {}
-
-  resolve-from@5.0.0: {}
-
-  reusify@1.1.0: {}
-
-  run-parallel@1.2.0:
-    dependencies:
-      queue-microtask: 1.2.3
-
-  semver@7.7.1: {}
-
-  slash@5.1.0: {}
-
-  tinyglobby@0.2.12:
-    dependencies:
-      fdir: 6.4.3(picomatch@4.0.2)
-      picomatch: 4.0.2
-
-  to-regex-range@5.0.1:
-    dependencies:
-      is-number: 7.0.0
-
-  uc.micro@2.1.0: {}
-
-  unicorn-magic@0.1.0: {}
-
-  vscode-languageserver-textdocument@1.0.12: {}
-
-  vscode-uri@3.1.0: {}
-
-  xdg-basedir@5.1.0: {}
-
-  yaml@2.7.0: {}
diff --git a/book/src/SUMMARY.md b/book/src/SUMMARY.md
deleted file mode 100644
index 08c9faefb1..0000000000
--- a/book/src/SUMMARY.md
+++ /dev/null
@@ -1,46 +0,0 @@
-# Summary
-
-[Introduction](./introduction.md)
-
-# Getting Started
-
-- [Install](./getting-started/install.md)
-- [Quickstart](./getting-started/quickstart.md)
-
-# Writing Apps
-
-- [Overview](./writing-apps/overview.md)
-- [Writing a Program](./writing-apps/write-program.md)
-- [Compiling](./writing-apps/build.md)
-- [Running a Program](./writing-apps/run.md)
-- [Generating Proofs](./writing-apps/prove.md)
-- [Verifying Proofs](./writing-apps/verify.md)
-- [Solidity SDK](./writing-apps/solidity.md)
-
-# Acceleration Using Extensions
-
-- [Overview](./custom-extensions/overview.md)
-- [Keccak](./custom-extensions/keccak.md)
-- [SHA-256](./custom-extensions/sha256.md)
-- [Big Integer](./custom-extensions/bigint.md)
-- [Algebra (Modular Arithmetic)](./custom-extensions/algebra.md)
-- [Elliptic Curve Cryptography](./custom-extensions/ecc.md)
-- [Elliptic Curve Pairing](./custom-extensions/pairing.md)
-
-# Guest Libraries
-
-- [Keccak256](./guest-libs/keccak256.md)
-- [SHA2](./guest-libs/sha2.md)
-- [Ruint](./guest-libs/ruint.md)
-- [K256](./guest-libs/k256.md)
-- [P256](./guest-libs/p256.md)
-- [Pairing](./guest-libs/pairing.md)
-- [Verify STARK](./guest-libs/verify-stark.md)
-
-# Advanced Usage
-
-- [SDK](./advanced-usage/sdk.md)
-- [Creating a New Extension](./advanced-usage/new-extension.md)
-- [Recursive Verification](./advanced-usage/recursion.md)
-
-
diff --git a/book/src/getting-started/install.md b/book/src/getting-started/install.md
deleted file mode 100644
index 0e1261db88..0000000000
--- a/book/src/getting-started/install.md
+++ /dev/null
@@ -1,44 +0,0 @@
-# Install
-
-To use OpenVM for generating proofs, you must install the OpenVM command line tool `cargo-openvm`.
-
-`cargo-openvm` can be installed in two different ways. You can either install via git URL or build from source.
-
-## Option 1: Install Via Git URL (Recommended)
-
-Begin the installation:
-
-```bash
-cargo +1.85 install --locked --git http://github.com/openvm-org/openvm.git --tag v1.3.0 cargo-openvm
-```
-
-This will globally install `cargo-openvm`. You can validate a successful installation with:
-
-```bash
-cargo openvm --version
-```
-
-## Option 2: Build from source
-
-To build from source, clone the repository and begin the installation.
-
-```bash
-git clone --branch v1.3.0 --single-branch https://github.com/openvm-org/openvm.git
-cd openvm
-cargo install --locked --force --path crates/cli
-```
-
-This will globally install `cargo-openvm`. You can validate a successful installation with:
-
-```bash
-cargo openvm --version
-```
-
-## Install Rust Toolchain
-
-In order for the `cargo-openvm` build command to work, you must install certain Rust nightly components:
-
-```bash
-rustup install nightly-2025-02-14
-rustup component add rust-src --toolchain nightly-2025-02-14
-```
diff --git a/book/words.txt b/book/words.txt
deleted file mode 100644
index 556a8af01a..0000000000
--- a/book/words.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-openvm
-Revm
-println
-vmexe
-zkvm
-rustup
-usize
-mathbb
-keccak
-Keccak
-transpiles
-transpiling
-Transpiles
-secp
-serde
-eprintln
-unvalidated
-xlarge
-noplayground
-Repr
-riscv
-EVMMAX
-prehash
-prehashed
-Uninit
-noverify
-repr
-insn
\ No newline at end of file
diff --git a/ci/scripts/bench.py b/ci/scripts/bench.py
index 97db5180e6..9bf87f622f 100644
--- a/ci/scripts/bench.py
+++ b/ci/scripts/bench.py
@@ -32,7 +32,7 @@ def run_cargo_command(
         command.extend(["--max_segment_length", max_segment_length])
     if kzg_params_dir is not None:
         command.extend(["--kzg-params-dir", kzg_params_dir])
-    if "profiling" in feature_flags:
+    if "perf-metrics" in feature_flags:
         # set guest build args and vm config to profiling
         command.extend(["--profiling"])
 
@@ -50,7 +50,7 @@ def run_cargo_command(
     # Prepare the environment variables
     env = os.environ.copy()  # Copy current environment variables
     env["OUTPUT_PATH"] = output_path
-    if "profiling" in feature_flags:
+    if "perf-metrics" in feature_flags:
         env["GUEST_SYMBOLS_PATH"] = os.path.splitext(output_path)[0] + ".syms"
     env["RUSTFLAGS"] = "-Ctarget-cpu=native"
 
@@ -73,7 +73,7 @@ def bench():
     parser.add_argument('--output_path', type=str, required=True, help="The path to write the metrics to")
     args = parser.parse_args()
 
-    feature_flags = ["bench-metrics", "parallel"] + (args.features.split(",") if args.features else [])
+    feature_flags = ["metrics", "parallel"] + (args.features.split(",") if args.features else [])
     assert (feature_flags.count("mimalloc") + feature_flags.count("jemalloc")) == 1
 
     run_cargo_command(
diff --git a/ci/scripts/metric_unify/flamegraph.py b/ci/scripts/metric_unify/flamegraph.py
index f5054d864c..fe5dc157c2 100644
--- a/ci/scripts/metric_unify/flamegraph.py
+++ b/ci/scripts/metric_unify/flamegraph.py
@@ -60,6 +60,9 @@ def get_stack_lines(metrics_dict, group_by_kvs, stack_keys, metric_name, sum_met
                     function_symbols = [get_function_symbol(string_table, offset) for offset in symbol_offsets]
                     stack_values.extend(function_symbols)
             else:
+                # don't make a stack frame for empty label
+                if labels[key] == '':
+                    continue
                 stack_values.append(labels[key])
         if filter:
             continue
diff --git a/crates/circuits/mod-builder/Cargo.toml b/crates/circuits/mod-builder/Cargo.toml
index d756db326b..cfd5434dde 100644
--- a/crates/circuits/mod-builder/Cargo.toml
+++ b/crates/circuits/mod-builder/Cargo.toml
@@ -23,8 +23,6 @@ num-traits.workspace = true
 tracing.workspace = true
 
 itertools.workspace = true
-serde = { workspace = true, features = ["derive"] }
-serde_with.workspace = true
 
 [dev-dependencies]
 openvm-circuit-primitives = { workspace = true }
@@ -35,4 +33,8 @@ openvm-circuit = { workspace = true, features = ["test-utils"] }
 [features]
 default = []
 parallel = ["openvm-stark-backend/parallel"]
-test-utils = ["dep:halo2curves-axiom", "dep:openvm-pairing-guest"]
+test-utils = [
+    "dep:halo2curves-axiom",
+    "dep:openvm-pairing-guest",
+    "openvm-circuit/test-utils",
+]
diff --git a/crates/circuits/mod-builder/src/builder.rs b/crates/circuits/mod-builder/src/builder.rs
index 6e1c22a009..5d337130bb 100644
--- a/crates/circuits/mod-builder/src/builder.rs
+++ b/crates/circuits/mod-builder/src/builder.rs
@@ -289,6 +289,22 @@ impl FieldExpr {
         ret.setup_values = setup_values;
         ret
     }
+
+    pub fn num_inputs(&self) -> usize {
+        self.builder.num_input
+    }
+
+    pub fn num_vars(&self) -> usize {
+        self.builder.num_variables
+    }
+
+    pub fn num_flags(&self) -> usize {
+        self.builder.num_flags
+    }
+
+    pub fn output_indices(&self) -> &[usize] {
+        &self.builder.output_indices
+    }
 }
 
 impl Deref for FieldExpr {
diff --git a/crates/circuits/mod-builder/src/core_chip.rs b/crates/circuits/mod-builder/src/core_chip.rs
index 30e9c65dbb..8cff1f1b16 100644
--- a/crates/circuits/mod-builder/src/core_chip.rs
+++ b/crates/circuits/mod-builder/src/core_chip.rs
@@ -1,28 +1,32 @@
+use std::{
+    marker::PhantomData,
+    mem::{align_of, size_of},
+    sync::Arc,
+};
+
 use itertools::Itertools;
 use num_bigint::BigUint;
 use num_traits::Zero;
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, DynAdapterInterface, DynArray, MinimalInstruction,
-    Result, VmAdapterInterface, VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
-    var_range::SharedVariableRangeCheckerChip, SubAir, TraceSubRowGenerator,
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerChip},
+    SubAir, TraceSubRowGenerator,
 };
-use openvm_instructions::instruction::Instruction;
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
-    p3_matrix::{dense::RowMajorMatrix, Matrix},
     rap::BaseAirWithPublicValues,
 };
 use openvm_stark_sdk::p3_baby_bear::BabyBear;
-use serde::{Deserialize, Serialize};
-use serde_with::{serde_as, DisplayFromStr};
 
 use crate::{
-    utils::{biguint_to_limbs_vec, limbs_to_biguint},
-    FieldExpr, FieldExprCols,
+    builder::{FieldExpr, FieldExprCols},
+    utils::biguint_to_limbs_vec,
 };
 
 #[derive(Clone)]
@@ -165,174 +169,411 @@ where
     }
 }
 
-#[serde_as]
-#[derive(Serialize, Deserialize, Clone, PartialEq, Debug)]
-pub struct FieldExpressionRecord {
-    #[serde_as(as = "Vec<DisplayFromStr>")]
-    pub inputs: Vec<BigUint>,
-    pub flags: Vec<bool>,
+pub struct FieldExpressionMetadata<F, A> {
+    pub total_input_limbs: usize, // num_inputs * limbs_per_input
+    _phantom: PhantomData<(F, A)>,
 }
 
-pub struct FieldExpressionCoreChip {
-    pub air: FieldExpressionCoreAir,
-    pub range_checker: SharedVariableRangeCheckerChip,
+impl<F, A> Clone for FieldExpressionMetadata<F, A> {
+    fn clone(&self) -> Self {
+        Self {
+            total_input_limbs: self.total_input_limbs,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<F, A> Default for FieldExpressionMetadata<F, A> {
+    fn default() -> Self {
+        Self {
+            total_input_limbs: 0,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<F, A> FieldExpressionMetadata<F, A> {
+    pub fn new(total_input_limbs: usize) -> Self {
+        Self {
+            total_input_limbs,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<F, A> AdapterCoreMetadata for FieldExpressionMetadata<F, A>
+where
+    A: AdapterTraceExecutor<F>,
+{
+    #[inline(always)]
+    fn get_adapter_width() -> usize {
+        A::WIDTH * size_of::<F>()
+    }
+}
+
+pub type FieldExpressionRecordLayout<F, A> = AdapterCoreLayout<FieldExpressionMetadata<F, A>>;
+
+pub struct FieldExpressionCoreRecordMut<'a> {
+    pub opcode: &'a mut u8,
+    pub input_limbs: &'a mut [u8],
+}
+
+impl<'a, F, A> CustomBorrow<'a, FieldExpressionCoreRecordMut<'a>, FieldExpressionRecordLayout<F, A>>
+    for [u8]
+{
+    fn custom_borrow(
+        &'a mut self,
+        layout: FieldExpressionRecordLayout<F, A>,
+    ) -> FieldExpressionCoreRecordMut<'a> {
+        let (opcode_buf, input_limbs_buff) = unsafe { self.split_at_mut_unchecked(1) };
+
+        FieldExpressionCoreRecordMut {
+            opcode: &mut opcode_buf[0],
+            input_limbs: &mut input_limbs_buff[..layout.metadata.total_input_limbs],
+        }
+    }
+
+    unsafe fn extract_layout(&self) -> FieldExpressionRecordLayout<F, A> {
+        panic!("Should get the Layout information from FieldExpressionExecutor");
+    }
+}
+
+impl<F, A> SizedRecord<FieldExpressionRecordLayout<F, A>> for FieldExpressionCoreRecordMut<'_> {
+    fn size(layout: &FieldExpressionRecordLayout<F, A>) -> usize {
+        layout.metadata.total_input_limbs + 1
+    }
+
+    fn alignment(_layout: &FieldExpressionRecordLayout<F, A>) -> usize {
+        align_of::<u8>()
+    }
+}
 
+impl<'a> FieldExpressionCoreRecordMut<'a> {
+    // This method is only used in testing
+    pub fn new_from_execution_data(
+        buffer: &'a mut [u8],
+        inputs: &[BigUint],
+        limbs_per_input: usize,
+    ) -> Self {
+        let record_info = FieldExpressionMetadata::<(), ()>::new(inputs.len() * limbs_per_input);
+
+        let record: Self = buffer.custom_borrow(FieldExpressionRecordLayout {
+            metadata: record_info,
+        });
+        record
+    }
+
+    #[inline(always)]
+    pub fn fill_from_execution_data(&mut self, opcode: u8, data: &[u8]) {
+        // Rust will assert that length of `data` and `self.input_limbs` are the same
+        // That is `data.len() == num_inputs * limbs_per_input`
+        *self.opcode = opcode;
+        self.input_limbs.copy_from_slice(data);
+    }
+}
+
+#[derive(Clone)]
+pub struct FieldExpressionExecutor<A> {
+    adapter: A,
+    pub expr: FieldExpr,
+    pub offset: usize,
+    pub local_opcode_idx: Vec<usize>,
+    pub opcode_flag_idx: Vec<usize>,
     pub name: String,
+}
 
-    /// Whether to finalize the trace. True if all-zero rows don't satisfy the constraints (e.g.
-    /// there is int_add)
+impl<A> FieldExpressionExecutor<A> {
+    #[allow(clippy::too_many_arguments)]
+    pub fn new(
+        adapter: A,
+        expr: FieldExpr,
+        offset: usize,
+        local_opcode_idx: Vec<usize>,
+        opcode_flag_idx: Vec<usize>,
+        name: &str,
+    ) -> Self {
+        let opcode_flag_idx = if opcode_flag_idx.is_empty() && expr.needs_setup() {
+            // single op chip that needs setup, so there is only one default flag, must be 0.
+            vec![0]
+        } else {
+            // multi ops chip or no-setup chip, use as is.
+            opcode_flag_idx
+        };
+        assert_eq!(opcode_flag_idx.len(), local_opcode_idx.len() - 1);
+        tracing::debug!(
+            "FieldExpressionCoreExecutor: opcode={name}, main_width={}",
+            BaseAir::<BabyBear>::width(&expr)
+        );
+        Self {
+            adapter,
+            expr,
+            offset,
+            local_opcode_idx,
+            opcode_flag_idx,
+            name: name.to_string(),
+        }
+    }
+
+    pub fn get_record_layout<F>(&self) -> FieldExpressionRecordLayout<F, A> {
+        FieldExpressionRecordLayout {
+            metadata: FieldExpressionMetadata::new(
+                self.expr.builder.num_input * self.expr.canonical_num_limbs(),
+            ),
+        }
+    }
+}
+
+pub struct FieldExpressionFiller<A> {
+    adapter: A,
+    pub expr: FieldExpr,
+    pub local_opcode_idx: Vec<usize>,
+    pub opcode_flag_idx: Vec<usize>,
+    pub range_checker: SharedVariableRangeCheckerChip,
     pub should_finalize: bool,
 }
 
-impl FieldExpressionCoreChip {
+impl<A> FieldExpressionFiller<A> {
+    #[allow(clippy::too_many_arguments)]
     pub fn new(
+        adapter: A,
         expr: FieldExpr,
-        offset: usize,
         local_opcode_idx: Vec<usize>,
         opcode_flag_idx: Vec<usize>,
         range_checker: SharedVariableRangeCheckerChip,
-        name: &str,
         should_finalize: bool,
     ) -> Self {
-        let air = FieldExpressionCoreAir::new(expr, offset, local_opcode_idx, opcode_flag_idx);
-        tracing::info!(
-            "FieldExpressionCoreChip: opcode={name}, main_width={}",
-            BaseAir::<BabyBear>::width(&air)
-        );
+        let opcode_flag_idx = if opcode_flag_idx.is_empty() && expr.needs_setup() {
+            // single op chip that needs setup, so there is only one default flag, must be 0.
+            vec![0]
+        } else {
+            // multi ops chip or no-setup chip, use as is.
+            opcode_flag_idx
+        };
+        assert_eq!(opcode_flag_idx.len(), local_opcode_idx.len() - 1);
         Self {
-            air,
+            adapter,
+            expr,
+            local_opcode_idx,
+            opcode_flag_idx,
             range_checker,
-            name: name.to_string(),
             should_finalize,
         }
     }
+    pub fn num_inputs(&self) -> usize {
+        self.expr.builder.num_input
+    }
+
+    pub fn num_flags(&self) -> usize {
+        self.expr.builder.num_flags
+    }
 
-    pub fn expr(&self) -> &FieldExpr {
-        &self.air.expr
+    pub fn get_record_layout<F>(&self) -> FieldExpressionRecordLayout<F, A> {
+        FieldExpressionRecordLayout {
+            metadata: FieldExpressionMetadata::new(
+                self.num_inputs() * self.expr.canonical_num_limbs(),
+            ),
+        }
     }
 }
 
-impl<F: PrimeField32, I> VmCoreChip<F, I> for FieldExpressionCoreChip
+impl<F, A, RA> PreflightExecutor<F, RA> for FieldExpressionExecutor<A>
 where
-    I: VmAdapterInterface<F>,
-    I::Reads: Into<DynArray<F>>,
-    AdapterRuntimeContext<F, I>: From<AdapterRuntimeContext<F, DynAdapterInterface<F>>>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<F, ReadData: Into<DynArray<u8>>, WriteData: From<DynArray<u8>>>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        FieldExpressionRecordLayout<F, A>,
+        (A::RecordMut<'buf>, FieldExpressionCoreRecordMut<'buf>),
+    >,
 {
-    type Record = FieldExpressionRecord;
-    type Air = FieldExpressionCoreAir;
-
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let field_element_limbs = self.air.expr.canonical_num_limbs();
-        let limb_bits = self.air.expr.canonical_limb_bits();
-        let data: DynArray<_> = reads.into();
-        let data = data.0;
-        assert_eq!(data.len(), self.air.num_inputs() * field_element_limbs);
-        let data_u32: Vec<u32> = data.iter().map(|x| x.as_canonical_u32()).collect();
-
-        let mut inputs = vec![];
-        for i in 0..self.air.num_inputs() {
-            let start = i * field_element_limbs;
-            let end = start + field_element_limbs;
-            let limb_slice = &data_u32[start..end];
-            let input = limbs_to_biguint(limb_slice, limb_bits);
-            inputs.push(input);
-        }
+    ) -> Result<(), ExecutionError> {
+        let (mut adapter_record, mut core_record) = state.ctx.alloc(self.get_record_layout());
 
-        let Instruction { opcode, .. } = instruction;
-        let local_opcode_idx = opcode.local_opcode_idx(self.air.offset);
-        let mut flags = vec![];
-
-        // If the chip doesn't need setup, (right now) it must be single op chip and thus no flag is
-        // needed. Otherwise, there is a flag for each opcode and will be derived by
-        // is_valid - sum(flags).
-        if self.expr().needs_setup() {
-            flags = vec![false; self.air.num_flags()];
-            self.air
-                .opcode_flag_idx
-                .iter()
-                .enumerate()
-                .for_each(|(i, &flag_idx)| {
-                    flags[flag_idx] = local_opcode_idx == self.air.local_opcode_idx[i]
-                });
-        }
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        let vars = self.air.expr.execute(inputs.clone(), flags.clone());
-        assert_eq!(vars.len(), self.air.num_vars());
+        let data: DynArray<_> = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
 
-        let outputs: Vec<BigUint> = self
-            .air
-            .output_indices()
-            .iter()
-            .map(|&i| vars[i].clone())
-            .collect();
-        let writes: Vec<F> = outputs
-            .iter()
-            .map(|x| biguint_to_limbs_vec(x.clone(), limb_bits, field_element_limbs))
-            .concat()
-            .into_iter()
-            .map(|x| F::from_canonical_u32(x))
-            .collect();
+        core_record.fill_from_execution_data(
+            instruction.opcode.local_opcode_idx(self.offset) as u8,
+            &data.0,
+        );
+
+        let (writes, _, _) = run_field_expression(
+            &self.expr,
+            &self.local_opcode_idx,
+            &self.opcode_flag_idx,
+            core_record.input_limbs,
+            *core_record.opcode as usize,
+        );
+
+        self.adapter.write(
+            state.memory,
+            instruction,
+            writes.into(),
+            &mut adapter_record,
+        );
 
-        let ctx = AdapterRuntimeContext::<_, DynAdapterInterface<_>>::without_pc(writes);
-        Ok((ctx.into(), FieldExpressionRecord { inputs, flags }))
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+        Ok(())
     }
 
     fn get_opcode_name(&self, _opcode: usize) -> String {
         self.name.clone()
     }
+}
+
+impl<F, A> TraceFiller<F> for FieldExpressionFiller<A>
+where
+    F: PrimeField32 + Send + Sync + Clone,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        // Get the core record from the row slice
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        self.air.expr.generate_subrow(
-            (self.range_checker.as_ref(), record.inputs, record.flags),
-            row_slice,
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: FieldExpressionCoreRecordMut =
+            unsafe { get_record_from_slice(&mut core_row, self.get_record_layout::<F>()) };
+
+        let (_, inputs, flags) = run_field_expression(
+            &self.expr,
+            &self.local_opcode_idx,
+            &self.opcode_flag_idx,
+            record.input_limbs,
+            *record.opcode as usize,
         );
-    }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        let range_checker = self.range_checker.as_ref();
+        self.expr
+            .generate_subrow((range_checker, inputs, flags), core_row);
     }
 
-    fn finalize(&self, trace: &mut RowMajorMatrix<F>, num_records: usize) {
-        if !self.should_finalize || num_records == 0 {
+    fn fill_dummy_trace_row(&self, row_slice: &mut [F]) {
+        if !self.should_finalize {
             return;
         }
 
-        let core_width = <Self::Air as BaseAir<F>>::width(&self.air);
-        let adapter_width = trace.width() - core_width;
-        let dummy_row = self.generate_dummy_trace_row(adapter_width, core_width);
-        for row in trace.rows_mut().skip(num_records) {
-            row.copy_from_slice(&dummy_row);
+        let inputs: Vec<BigUint> = vec![BigUint::zero(); self.num_inputs()];
+        let flags: Vec<bool> = vec![false; self.num_flags()];
+        let core_row = &mut row_slice[A::WIDTH..];
+        // We **do not** want this trace row to update the range checker
+        // so we must create a temporary range checker
+        let tmp_range_checker = Arc::new(VariableRangeCheckerChip::new(self.range_checker.bus()));
+        self.expr
+            .generate_subrow((&tmp_range_checker, inputs, flags), core_row);
+        core_row[0] = F::ZERO; // is_valid = 0
+    }
+}
+
+fn run_field_expression(
+    expr: &FieldExpr,
+    local_opcode_flags: &[usize],
+    opcode_flag_idx: &[usize],
+    data: &[u8],
+    local_opcode_idx: usize,
+) -> (DynArray<u8>, Vec<BigUint>, Vec<bool>) {
+    let field_element_limbs = expr.canonical_num_limbs();
+    assert_eq!(data.len(), expr.builder.num_input * field_element_limbs);
+
+    let mut inputs = Vec::with_capacity(expr.builder.num_input);
+    for i in 0..expr.builder.num_input {
+        let start = i * field_element_limbs;
+        let end = start + field_element_limbs;
+        let limb_slice = &data[start..end];
+        let input = BigUint::from_bytes_le(limb_slice);
+        inputs.push(input);
+    }
+
+    let mut flags = vec![];
+    if expr.needs_setup() {
+        flags = vec![false; expr.builder.num_flags];
+
+        // Find which opcode this is in our local_opcode_idx list
+        if let Some(opcode_position) = local_opcode_flags
+            .iter()
+            .position(|&idx| idx == local_opcode_idx)
+        {
+            // If this is NOT the last opcode (setup), set the corresponding flag
+            if opcode_position < opcode_flag_idx.len() {
+                let flag_idx = opcode_flag_idx[opcode_position];
+                flags[flag_idx] = true;
+            }
+            // If opcode_position == step.opcode_flag_idx.len(), it's the setup operation
+            // and all flags should remain false (which they already are)
         }
     }
+
+    let vars = expr.execute(inputs.clone(), flags.clone());
+    assert_eq!(vars.len(), expr.builder.num_variables);
+
+    let outputs: Vec<BigUint> = expr
+        .builder
+        .output_indices
+        .iter()
+        .map(|&i| vars[i].clone())
+        .collect();
+    let writes: DynArray<_> = outputs
+        .iter()
+        .map(|x| biguint_to_limbs_vec(x, field_element_limbs))
+        .concat()
+        .into_iter()
+        .collect::<Vec<_>>()
+        .into();
+
+    (writes, inputs, flags)
 }
 
-impl FieldExpressionCoreChip {
-    // We will be setting is_valid = 0. That forces all flags be 0 (otherwise setup will be -1).
-    // We generate a dummy row with all flags set to 0, then we set is_valid = 0.
-    fn generate_dummy_trace_row<F: PrimeField32>(
-        &self,
-        adapter_width: usize,
-        core_width: usize,
-    ) -> Vec<F> {
-        let record = FieldExpressionRecord {
-            inputs: vec![BigUint::zero(); self.air.num_inputs()],
-            flags: vec![false; self.air.num_flags()],
-        };
-        let mut row = vec![F::ZERO; adapter_width + core_width];
-        let core_row = &mut row[adapter_width..];
-        // We **do not** want this trace row to update the range checker
-        // so we must create a temporary range checker
-        let tmp_range_checker = SharedVariableRangeCheckerChip::new(self.range_checker.bus());
-        self.air.expr.generate_subrow(
-            (tmp_range_checker.as_ref(), record.inputs, record.flags),
-            core_row,
-        );
-        core_row[0] = F::ZERO; // is_valid = 0
-        row
+#[inline(always)]
+pub fn run_field_expression_precomputed<const NEEDS_SETUP: bool>(
+    expr: &FieldExpr,
+    flag_idx: usize,
+    data: &[u8],
+) -> DynArray<u8> {
+    let field_element_limbs = expr.canonical_num_limbs();
+    assert_eq!(data.len(), expr.num_inputs() * field_element_limbs);
+
+    let mut inputs = Vec::with_capacity(expr.num_inputs());
+    for i in 0..expr.num_inputs() {
+        let start = i * expr.canonical_num_limbs();
+        let end = start + expr.canonical_num_limbs();
+        let limb_slice = &data[start..end];
+        let input = BigUint::from_bytes_le(limb_slice);
+        inputs.push(input);
     }
+
+    let flags = if NEEDS_SETUP {
+        let mut flags = vec![false; expr.num_flags()];
+        if flag_idx < expr.num_flags() {
+            flags[flag_idx] = true;
+        }
+        flags
+    } else {
+        vec![]
+    };
+
+    let vars = expr.execute(inputs, flags);
+    assert_eq!(vars.len(), expr.num_vars());
+
+    let outputs: Vec<BigUint> = expr
+        .output_indices()
+        .iter()
+        .map(|&i| vars[i].clone())
+        .collect();
+
+    outputs
+        .iter()
+        .map(|x| biguint_to_limbs_vec(x, field_element_limbs))
+        .concat()
+        .into_iter()
+        .collect::<Vec<_>>()
+        .into()
 }
diff --git a/crates/circuits/mod-builder/src/tests.rs b/crates/circuits/mod-builder/src/tests.rs
index d217c0c5c2..628043256d 100644
--- a/crates/circuits/mod-builder/src/tests.rs
+++ b/crates/circuits/mod-builder/src/tests.rs
@@ -11,14 +11,126 @@ use openvm_stark_sdk::{
     p3_baby_bear::BabyBear,
 };
 
-use crate::{test_utils::*, ExprBuilder, FieldExpr, FieldExprCols, FieldVariable, SymbolicExpr};
+use crate::{
+    test_utils::*, utils::biguint_to_limbs_vec, ExprBuilder, FieldExpr, FieldExprCols,
+    FieldExpressionCoreRecordMut, FieldVariable, SymbolicExpr,
+};
 
 const LIMB_BITS: usize = 8;
+use std::sync::Arc;
+
+use openvm_circuit_primitives::var_range::VariableRangeCheckerChip;
+
+fn create_field_expr_with_setup(
+    builder: ExprBuilder,
+) -> (FieldExpr, Arc<VariableRangeCheckerChip>, usize) {
+    let prime = secp256k1_coord_prime();
+    let (range_checker, _) = setup(&prime);
+    let expr = FieldExpr::new(builder, range_checker.bus(), false);
+    let width = BaseAir::<BabyBear>::width(&expr);
+    (expr, range_checker, width)
+}
+
+fn create_field_expr_with_flags_setup(
+    builder: ExprBuilder,
+) -> (FieldExpr, Arc<VariableRangeCheckerChip>, usize) {
+    let prime = secp256k1_coord_prime();
+    let (range_checker, _) = setup(&prime);
+    let expr = FieldExpr::new(builder, range_checker.bus(), true);
+    let width = BaseAir::<BabyBear>::width(&expr);
+    (expr, range_checker, width)
+}
+
+fn generate_direct_trace(
+    expr: &FieldExpr,
+    range_checker: &Arc<VariableRangeCheckerChip>,
+    inputs: Vec<BigUint>,
+    flags: Vec<bool>,
+    width: usize,
+) -> Vec<BabyBear> {
+    let mut row = BabyBear::zero_vec(width);
+    expr.generate_subrow((range_checker, inputs, flags), &mut row);
+    row
+}
+
+fn generate_recorded_trace(
+    expr: &FieldExpr,
+    range_checker: &Arc<VariableRangeCheckerChip>,
+    inputs: &[BigUint],
+    flags: Vec<bool>,
+    width: usize,
+) -> Vec<BabyBear> {
+    let mut buffer = vec![0u8; 1024];
+    let mut record = FieldExpressionCoreRecordMut::new_from_execution_data(
+        &mut buffer,
+        inputs,
+        expr.canonical_num_limbs(),
+    );
+    let data: Vec<u8> = inputs
+        .iter()
+        .flat_map(|x| biguint_to_limbs_vec(x, expr.canonical_num_limbs()))
+        .collect();
+    record.fill_from_execution_data(0, &data);
+
+    let reconstructed_inputs: Vec<BigUint> = record
+        .input_limbs
+        .chunks(expr.canonical_num_limbs())
+        .map(BigUint::from_bytes_le)
+        .collect();
+
+    let mut row = BabyBear::zero_vec(width);
+    expr.generate_subrow((range_checker, reconstructed_inputs, flags), &mut row);
+    row
+}
+
+fn verify_stark_with_traces(
+    expr: FieldExpr,
+    range_checker: Arc<VariableRangeCheckerChip>,
+    trace: Vec<BabyBear>,
+    width: usize,
+) {
+    let trace_matrix = RowMajorMatrix::new(trace, width);
+    let range_trace = range_checker.generate_trace();
+    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
+        any_rap_arc_vec![expr, range_checker.air],
+        vec![trace_matrix, range_trace],
+    )
+    .expect("Verification failed");
+}
+
+fn extract_and_verify_result(
+    expr: &FieldExpr,
+    trace: &[BabyBear],
+    expected: &BigUint,
+    var_index: usize,
+) {
+    let FieldExprCols { vars, .. } = expr.load_vars(trace);
+    assert!(var_index < vars.len(), "Variable index out of bounds");
+    let generated = evaluate_biguint(&vars[var_index], LIMB_BITS);
+    assert_eq!(generated, *expected);
+}
+
+fn test_trace_equivalence(
+    expr: &FieldExpr,
+    range_checker: &Arc<VariableRangeCheckerChip>,
+    inputs: Vec<BigUint>,
+    flags: Vec<bool>,
+    width: usize,
+) {
+    let direct_trace =
+        generate_direct_trace(expr, range_checker, inputs.clone(), flags.clone(), width);
+    let recorded_trace = generate_recorded_trace(expr, range_checker, &inputs, flags, width);
+    assert_eq!(
+        direct_trace, recorded_trace,
+        "Direct and recorded traces must be identical for inputs: {:?}",
+        inputs
+    );
+}
 
 #[test]
 fn test_add() {
     let prime = secp256k1_coord_prime();
-    let (range_checker, builder) = setup(&prime);
+    let (_, builder) = setup(&prime);
 
     let x1 = ExprBuilder::new_input(builder.clone());
     let x2 = ExprBuilder::new_input(builder.clone());
@@ -26,70 +138,45 @@ fn test_add() {
     x3.save();
     let builder = builder.borrow().clone();
 
-    let expr = FieldExpr::new(builder, range_checker.bus(), false);
-    let width = BaseAir::<BabyBear>::width(&expr);
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder);
 
     let x = generate_random_biguint(&prime);
     let y = generate_random_biguint(&prime);
-    let expected = (&x + &y) % prime;
+    let expected = (&x + &y) % &prime;
     let inputs = vec![x, y];
 
-    let mut row = BabyBear::zero_vec(width);
-    expr.generate_subrow((&range_checker, inputs, vec![]), &mut row);
-    let FieldExprCols { vars, .. } = expr.load_vars(&row);
-    assert_eq!(vars.len(), 1);
-    let generated = evaluate_biguint(&vars[0], LIMB_BITS);
-    assert_eq!(generated, expected);
-
-    let trace = RowMajorMatrix::new(row, width);
-    let range_trace = range_checker.generate_trace();
-
-    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
-        any_rap_arc_vec![expr, range_checker.air],
-        vec![trace, range_trace],
-    )
-    .expect("Verification failed");
+    let trace = generate_direct_trace(&expr, &range_checker, inputs, vec![], width);
+    extract_and_verify_result(&expr, &trace, &expected, 0);
+    verify_stark_with_traces(expr, range_checker, trace, width);
 }
 
 #[test]
 fn test_div() {
     let prime = secp256k1_coord_prime();
-    let (range_checker, builder) = setup(&prime);
+    let (_, builder) = setup(&prime);
 
     let x1 = ExprBuilder::new_input(builder.clone());
     let x2 = ExprBuilder::new_input(builder.clone());
     let _x3 = x1 / x2; // auto save on division.
     let builder = builder.borrow().clone();
-    let expr = FieldExpr::new(builder, range_checker.bus(), false);
-    let width = BaseAir::<BabyBear>::width(&expr);
+
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder);
 
     let x = generate_random_biguint(&prime);
     let y = generate_random_biguint(&prime);
     let y_inv = y.modinv(&prime).unwrap();
-    let expected = (&x * &y_inv) % prime;
+    let expected = (&x * &y_inv) % &prime;
     let inputs = vec![x, y];
 
-    let mut row = BabyBear::zero_vec(width);
-    expr.generate_subrow((&range_checker, inputs, vec![]), &mut row);
-    let FieldExprCols { vars, .. } = expr.load_vars(&row);
-    assert_eq!(vars.len(), 1);
-    let generated = evaluate_biguint(&vars[0], LIMB_BITS);
-    assert_eq!(generated, expected);
-
-    let trace = RowMajorMatrix::new(row, width);
-    let range_trace = range_checker.generate_trace();
-
-    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
-        any_rap_arc_vec![expr, range_checker.air],
-        vec![trace, range_trace],
-    )
-    .expect("Verification failed");
+    let trace = generate_direct_trace(&expr, &range_checker, inputs, vec![], width);
+    extract_and_verify_result(&expr, &trace, &expected, 0);
+    verify_stark_with_traces(expr, range_checker, trace, width);
 }
 
 #[test]
 fn test_auto_carry_mul() {
     let prime = secp256k1_coord_prime();
-    let (range_checker, builder) = setup(&prime);
+    let (_, builder) = setup(&prime);
 
     let mut x1 = ExprBuilder::new_input(builder.clone());
     let mut x2 = ExprBuilder::new_input(builder.clone());
@@ -101,36 +188,25 @@ fn test_auto_carry_mul() {
     assert_eq!(x4.expr, SymbolicExpr::Var(1));
 
     let builder = builder.borrow().clone();
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder);
 
-    let expr = FieldExpr::new(builder, range_checker.bus(), false);
-    let width = BaseAir::<BabyBear>::width(&expr);
     let x = generate_random_biguint(&prime);
     let y = generate_random_biguint(&prime);
-    let expected = (&x * &x * &y) % prime; // x4 = x3 * x1 = (x1 * x2) * x1
+    let expected = (&x * &x * &y) % &prime; // x4 = x3 * x1 = (x1 * x2) * x1
     let inputs = vec![x, y];
 
-    let mut row = BabyBear::zero_vec(width);
-    expr.generate_subrow((&range_checker, inputs, vec![]), &mut row);
-    let FieldExprCols { vars, .. } = expr.load_vars(&row);
+    let trace = generate_direct_trace(&expr, &range_checker, inputs, vec![], width);
+    let FieldExprCols { vars, .. } = expr.load_vars(&trace);
     assert_eq!(vars.len(), 2);
-    let generated = evaluate_biguint(&vars[1], LIMB_BITS);
-    assert_eq!(generated, expected);
-
-    let trace = RowMajorMatrix::new(row, width);
-    let range_trace = range_checker.generate_trace();
-
-    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
-        any_rap_arc_vec![expr, range_checker.air],
-        vec![trace, range_trace],
-    )
-    .expect("Verification failed");
+    extract_and_verify_result(&expr, &trace, &expected, 1);
+    verify_stark_with_traces(expr, range_checker, trace, width);
 }
 
 #[test]
 fn test_auto_carry_intmul() {
     let prime = secp256k1_coord_prime();
-    let (range_checker, builder) = setup(&prime);
-    let mut x1 = ExprBuilder::new_input(builder.clone());
+    let (_, builder) = setup(&prime);
+    let mut x1: FieldVariable = ExprBuilder::new_input(builder.clone());
     let mut x2 = ExprBuilder::new_input(builder.clone());
     let mut x3 = &mut x1 * &mut x2;
     // The int_mul below will overflow:
@@ -143,35 +219,24 @@ fn test_auto_carry_intmul() {
     assert_eq!(x4.expr, SymbolicExpr::Var(1));
 
     let builder = builder.borrow().clone();
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder);
 
-    let expr = FieldExpr::new(builder, range_checker.bus(), false);
-    let width = BaseAir::<BabyBear>::width(&expr);
     let x = generate_random_biguint(&prime);
     let y = generate_random_biguint(&prime);
-    let expected = (&x * &x * BigUint::from(9u32)) % prime;
+    let expected = (&x * &x * BigUint::from(9u32)) % &prime;
     let inputs = vec![x, y];
 
-    let mut row = BabyBear::zero_vec(width);
-    expr.generate_subrow((&range_checker, inputs, vec![]), &mut row);
-    let FieldExprCols { vars, .. } = expr.load_vars(&row);
+    let trace = generate_direct_trace(&expr, &range_checker, inputs, vec![], width);
+    let FieldExprCols { vars, .. } = expr.load_vars(&trace);
     assert_eq!(vars.len(), 2);
-    let generated = evaluate_biguint(&vars[1], LIMB_BITS);
-    assert_eq!(generated, expected);
-
-    let trace = RowMajorMatrix::new(row, width);
-    let range_trace = range_checker.generate_trace();
-
-    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
-        any_rap_arc_vec![expr, range_checker.air],
-        vec![trace, range_trace],
-    )
-    .expect("Verification failed");
+    extract_and_verify_result(&expr, &trace, &expected, 1);
+    verify_stark_with_traces(expr, range_checker, trace, width);
 }
 
 #[test]
 fn test_auto_carry_add() {
     let prime = secp256k1_coord_prime();
-    let (range_checker, builder) = setup(&prime);
+    let (_, builder) = setup(&prime);
 
     let mut x1 = ExprBuilder::new_input(builder.clone());
     let mut x2 = ExprBuilder::new_input(builder.clone());
@@ -194,36 +259,24 @@ fn test_auto_carry_add() {
     assert_eq!(x5.expr, SymbolicExpr::Var(1));
 
     let builder = builder.borrow().clone();
-
-    let expr = FieldExpr::new(builder, range_checker.bus(), false);
-    let width = BaseAir::<BabyBear>::width(&expr);
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder);
 
     let x = generate_random_biguint(&prime);
     let y = generate_random_biguint(&prime);
-    let expected = (&x * &x * BigUint::from(10u32)) % prime;
+    let expected = (&x * &x * BigUint::from(10u32)) % &prime;
     let inputs = vec![x, y];
 
-    let mut row = BabyBear::zero_vec(width);
-    expr.generate_subrow((&range_checker, inputs, vec![]), &mut row);
-    let FieldExprCols { vars, .. } = expr.load_vars(&row);
+    let trace = generate_direct_trace(&expr, &range_checker, inputs, vec![], width);
+    let FieldExprCols { vars, .. } = expr.load_vars(&trace);
     assert_eq!(vars.len(), 2);
-    let generated = evaluate_biguint(&vars[x5_id], LIMB_BITS);
-    assert_eq!(generated, expected);
-
-    let trace = RowMajorMatrix::new(row, width);
-    let range_trace = range_checker.generate_trace();
-
-    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
-        any_rap_arc_vec![expr, range_checker.air],
-        vec![trace, range_trace],
-    )
-    .expect("Verification failed");
+    extract_and_verify_result(&expr, &trace, &expected, x5_id);
+    verify_stark_with_traces(expr, range_checker, trace, width);
 }
 
 #[test]
 fn test_auto_carry_div() {
     let prime = secp256k1_coord_prime();
-    let (range_checker, builder) = setup(&prime);
+    let (_, builder) = setup(&prime);
 
     let mut x1 = ExprBuilder::new_input(builder.clone());
     let x2 = ExprBuilder::new_input(builder.clone());
@@ -237,29 +290,16 @@ fn test_auto_carry_div() {
     let builder = builder.borrow().clone();
     assert_eq!(builder.num_variables, 2); // numerator autosaved, and the final division
 
-    let expr = FieldExpr::new(builder, range_checker.bus(), false);
-    let width = BaseAir::<BabyBear>::width(&expr);
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder);
 
     let x = generate_random_biguint(&prime);
     let y = generate_random_biguint(&prime);
-    // let expected = (&x * &x * BigUint::from(10u32)) % prime;
     let inputs = vec![x, y];
 
-    let mut row = BabyBear::zero_vec(width);
-    expr.generate_subrow((&range_checker, inputs, vec![]), &mut row);
-    let FieldExprCols { vars, .. } = expr.load_vars(&row);
+    let trace = generate_direct_trace(&expr, &range_checker, inputs, vec![], width);
+    let FieldExprCols { vars, .. } = expr.load_vars(&trace);
     assert_eq!(vars.len(), 2);
-    // let generated = evaluate_biguint(&vars[x5_id], LIMB_BITS);
-    // assert_eq!(generated, expected);
-
-    let trace = RowMajorMatrix::new(row, width);
-    let range_trace = range_checker.generate_trace();
-
-    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
-        any_rap_arc_vec![expr, range_checker.air],
-        vec![trace, range_trace],
-    )
-    .expect("Verification failed");
+    verify_stark_with_traces(expr, range_checker, trace, width);
 }
 
 fn make_addsub_chip(builder: Rc<RefCell<ExprBuilder>>) -> ExprBuilder {
@@ -283,65 +323,39 @@ fn make_addsub_chip(builder: Rc<RefCell<ExprBuilder>>) -> ExprBuilder {
 #[test]
 fn test_select() {
     let prime = secp256k1_coord_prime();
-    let (range_checker, builder) = setup(&prime);
+    let (_, builder) = setup(&prime);
     let builder = make_addsub_chip(builder);
 
-    let expr = FieldExpr::new(builder, range_checker.bus(), true);
-    let width = BaseAir::<BabyBear>::width(&expr);
+    let (expr, range_checker, width) = create_field_expr_with_flags_setup(builder);
 
     let x = generate_random_biguint(&prime);
     let y = generate_random_biguint(&prime);
-    let expected = (&x + &prime - &y) % prime;
+    let expected = (&x + &prime - &y) % &prime;
     let inputs = vec![x, y];
-    let flags = vec![false, true];
+    let flags: Vec<bool> = vec![false, true];
 
-    let mut row = BabyBear::zero_vec(width);
-    expr.generate_subrow((&range_checker, inputs, flags), &mut row);
-    let FieldExprCols { vars, .. } = expr.load_vars(&row);
-    assert_eq!(vars.len(), 1);
-    let generated = evaluate_biguint(&vars[0], LIMB_BITS);
-    assert_eq!(generated, expected);
-
-    let trace = RowMajorMatrix::new(row, width);
-    let range_trace = range_checker.generate_trace();
-
-    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
-        any_rap_arc_vec![expr, range_checker.air],
-        vec![trace, range_trace],
-    )
-    .expect("Verification failed");
+    let trace = generate_direct_trace(&expr, &range_checker, inputs, flags, width);
+    extract_and_verify_result(&expr, &trace, &expected, 0);
+    verify_stark_with_traces(expr, range_checker, trace, width);
 }
 
 #[test]
 fn test_select2() {
     let prime = secp256k1_coord_prime();
-    let (range_checker, builder) = setup(&prime);
+    let (_, builder) = setup(&prime);
     let builder = make_addsub_chip(builder);
 
-    let expr = FieldExpr::new(builder, range_checker.bus(), true);
-    let width = BaseAir::<BabyBear>::width(&expr);
+    let (expr, range_checker, width) = create_field_expr_with_flags_setup(builder);
 
     let x = generate_random_biguint(&prime);
     let y = generate_random_biguint(&prime);
-    let expected = (&x + &y) % prime;
+    let expected = (&x + &y) % &prime;
     let inputs = vec![x, y];
-    let flags = vec![true, false];
-
-    let mut row = BabyBear::zero_vec(width);
-    expr.generate_subrow((&range_checker, inputs, flags), &mut row);
-    let FieldExprCols { vars, .. } = expr.load_vars(&row);
-    assert_eq!(vars.len(), 1);
-    let generated = evaluate_biguint(&vars[0], LIMB_BITS);
-    assert_eq!(generated, expected);
+    let flags: Vec<bool> = vec![true, false];
 
-    let trace = RowMajorMatrix::new(row, width);
-    let range_trace = range_checker.generate_trace();
-
-    BabyBearBlake3Engine::run_simple_test_no_pis_fast(
-        any_rap_arc_vec![expr, range_checker.air],
-        vec![trace, range_trace],
-    )
-    .expect("Verification failed");
+    let trace = generate_direct_trace(&expr, &range_checker, inputs, flags, width);
+    extract_and_verify_result(&expr, &trace, &expected, 0);
+    verify_stark_with_traces(expr, range_checker, trace, width);
 }
 
 fn test_symbolic_limbs(expr: SymbolicExpr, expected_q: usize, expected_carry: usize) {
@@ -395,3 +409,299 @@ fn test_symbolic_limbs_mul() {
     let expected_carry = 64;
     test_symbolic_limbs(expr, expected_q, expected_carry);
 }
+
+#[test]
+fn test_recorded_execution_records() {
+    let prime = secp256k1_coord_prime();
+    let (_, builder) = setup(&prime);
+
+    let x1 = ExprBuilder::new_input(builder.clone());
+    let x2 = ExprBuilder::new_input(builder.clone());
+    let mut x3 = x1 + x2;
+    x3.save();
+    let builder = builder.borrow().clone();
+
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder);
+
+    let x = generate_random_biguint(&prime);
+    let y = generate_random_biguint(&prime);
+    let expected = (&x + &y) % &prime;
+    let inputs = vec![x.clone(), y.clone()];
+    let flags: Vec<bool> = vec![];
+
+    // Test record creation and reconstruction
+    let mut buffer = vec![0u8; 1024];
+    let mut record = FieldExpressionCoreRecordMut::new_from_execution_data(
+        &mut buffer,
+        &inputs,
+        expr.canonical_num_limbs(),
+    );
+    let data: Vec<u8> = inputs
+        .iter()
+        .flat_map(|x| biguint_to_limbs_vec(x, expr.canonical_num_limbs()))
+        .collect();
+    record.fill_from_execution_data(0, &data);
+    assert_eq!(*record.opcode, 0);
+
+    // Verify input reconstruction preserves data
+    let reconstructed_inputs: Vec<BigUint> = record
+        .input_limbs
+        .chunks(expr.canonical_num_limbs())
+        .map(BigUint::from_bytes_le)
+        .collect();
+    assert_eq!(reconstructed_inputs.len(), inputs.len());
+    for (original, reconstructed) in inputs.iter().zip(reconstructed_inputs.iter()) {
+        assert_eq!(original, reconstructed);
+    }
+
+    // Test standard execution and verification using reconstructed inputs
+    let trace = generate_direct_trace(&expr, &range_checker, reconstructed_inputs, flags, width);
+    extract_and_verify_result(&expr, &trace, &expected, 0);
+    verify_stark_with_traces(expr, range_checker, trace, width);
+}
+
+#[test]
+fn test_trace_mathematical_equivalence() {
+    let prime = secp256k1_coord_prime();
+    let (_, builder) = setup(&prime);
+
+    let x1 = ExprBuilder::new_input(builder.clone());
+    let x2 = ExprBuilder::new_input(builder.clone());
+    let x3 = &mut (x1.clone() * x2.clone()) + &mut (x1.clone().square());
+    let mut x4 = x3.clone() / x2.clone(); // This will trigger auto-save
+    x4.save();
+    let builder = builder.borrow().clone();
+
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder);
+
+    for _ in 0..10 {
+        let x = generate_random_biguint(&prime);
+        let y = generate_random_biguint(&prime);
+
+        let expected = {
+            let temp = (&x * &y + &x * &x) % &prime;
+            let y_inv = y.modinv(&prime).unwrap();
+            (temp * y_inv) % &prime
+        };
+
+        let inputs = vec![x.clone(), y.clone()];
+        let flags: Vec<bool> = vec![];
+
+        // Test direct/recorded equivalence
+        test_trace_equivalence(&expr, &range_checker, inputs.clone(), flags.clone(), width);
+
+        // Verify the actual computation is correct
+        let direct_row = generate_direct_trace(&expr, &range_checker, inputs.clone(), flags, width);
+        let FieldExprCols { vars, .. } = expr.load_vars(&direct_row);
+        extract_and_verify_result(&expr, &direct_row, &expected, vars.len() - 1);
+    }
+}
+
+#[test]
+fn test_record_arena_allocation_patterns() {
+    let prime = secp256k1_coord_prime();
+    let (_, builder) = setup(&prime);
+
+    let x1 = ExprBuilder::new_input(builder.clone());
+    let x2 = ExprBuilder::new_input(builder.clone());
+    let mut x3 = x1 + x2;
+    x3.save();
+    let builder = builder.borrow().clone();
+
+    let (expr, _range_checker, _width) = create_field_expr_with_setup(builder);
+
+    let inputs = vec![
+        generate_random_biguint(&prime),
+        generate_random_biguint(&prime),
+    ];
+
+    // Test record creation with various input sizes
+    let mut buffer = vec![0u8; 1024];
+    let mut record = FieldExpressionCoreRecordMut::new_from_execution_data(
+        &mut buffer,
+        &inputs,
+        expr.canonical_num_limbs(),
+    );
+    let data: Vec<u8> = inputs
+        .iter()
+        .flat_map(|x| biguint_to_limbs_vec(x, expr.canonical_num_limbs()))
+        .collect();
+    record.fill_from_execution_data(0, &data);
+    assert_eq!(*record.opcode, 0);
+
+    // Test with maximum inputs
+    let max_inputs = vec![BigUint::one(); 40]; // MAX_INPUT_LIMBS / 4
+    let mut max_buffer = vec![0u8; 2048];
+    let max_record =
+        FieldExpressionCoreRecordMut::new_from_execution_data(&mut max_buffer, &max_inputs, 4);
+    assert_eq!(*max_record.opcode, 0);
+
+    // Test input reconstruction
+    let reconstructed_inputs: Vec<BigUint> = record
+        .input_limbs
+        .chunks(expr.canonical_num_limbs())
+        .map(BigUint::from_bytes_le)
+        .collect();
+    assert_eq!(reconstructed_inputs.len(), inputs.len());
+    for (original, reconstructed) in inputs.iter().zip(reconstructed_inputs.iter()) {
+        assert_eq!(original, reconstructed);
+    }
+}
+
+#[test]
+fn test_tracestep_tracefiller_roundtrip() {
+    let prime = secp256k1_coord_prime();
+    let (_, builder) = setup(&prime);
+
+    let x1 = ExprBuilder::new_input(builder.clone());
+    let x2 = ExprBuilder::new_input(builder.clone());
+    let x3 = x1.clone() * x2.clone();
+    let x4 = x3.clone() + x1.clone();
+    let mut x5 = x4.clone();
+    x5.save();
+    let builder_data = builder.borrow().clone();
+
+    let (expr, _range_checker, _width) = create_field_expr_with_setup(builder_data);
+
+    let inputs = vec![
+        generate_random_biguint(&prime),
+        generate_random_biguint(&prime),
+    ];
+
+    let vars_direct = expr.execute(inputs.clone(), vec![]);
+
+    // Test record creation and reconstruction roundtrip
+    let mut buffer = vec![0u8; 1024];
+    let mut record = FieldExpressionCoreRecordMut::new_from_execution_data(
+        &mut buffer,
+        &inputs,
+        expr.canonical_num_limbs(),
+    );
+    let data: Vec<u8> = inputs
+        .iter()
+        .flat_map(|x| biguint_to_limbs_vec(x, expr.canonical_num_limbs()))
+        .collect();
+    record.fill_from_execution_data(0, &data);
+
+    let reconstructed_inputs: Vec<BigUint> = record
+        .input_limbs
+        .chunks(expr.canonical_num_limbs())
+        .map(BigUint::from_bytes_le)
+        .collect();
+    let vars_reconstructed = expr.execute(reconstructed_inputs, vec![]);
+
+    // All intermediate variables must be preserved
+    assert_eq!(vars_direct.len(), vars_reconstructed.len());
+    for (direct, reconstructed) in vars_direct.iter().zip(vars_reconstructed.iter()) {
+        assert_eq!(
+            direct, reconstructed,
+            "Variable preservation failed in roundtrip"
+        );
+    }
+}
+
+#[test]
+fn test_direct_recorded_with_complex_operations() {
+    let prime = secp256k1_coord_prime();
+    let (_, builder) = setup(&prime);
+
+    let x1 = ExprBuilder::new_input(builder.clone());
+    let x2 = ExprBuilder::new_input(builder.clone());
+    let x3 = ExprBuilder::new_input(builder.clone());
+
+    let numerator = x1.clone() * x2.clone() + x3.clone();
+    let denominator = x1.clone() + x2.clone();
+    let mut result = numerator / denominator;
+    result.save();
+
+    let builder_data = builder.borrow().clone();
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder_data);
+
+    // Test edge cases with small and large numbers
+    let test_cases = vec![
+        (
+            BigUint::from(1u32),
+            BigUint::from(2u32),
+            BigUint::from(3u32),
+        ),
+        (
+            BigUint::from(100u32),
+            BigUint::from(200u32),
+            BigUint::from(300u32),
+        ),
+        (
+            generate_random_biguint(&prime),
+            generate_random_biguint(&prime),
+            generate_random_biguint(&prime),
+        ),
+    ];
+
+    for (x, y, z) in test_cases {
+        let inputs = vec![x.clone(), y.clone(), z.clone()];
+        let flags = vec![];
+
+        // Test direct/recorded equivalence
+        test_trace_equivalence(&expr, &range_checker, inputs.clone(), flags.clone(), width);
+
+        // Verify mathematical correctness
+        let expected = {
+            let num = (&x * &y + &z) % &prime;
+            let den_inv = (&x + &y).modinv(&prime).unwrap();
+            (num * den_inv) % &prime
+        };
+
+        let direct_row = generate_direct_trace(&expr, &range_checker, inputs, flags, width);
+        let FieldExprCols { vars, .. } = expr.load_vars(&direct_row);
+        extract_and_verify_result(&expr, &direct_row, &expected, vars.len() - 1);
+    }
+}
+
+#[test]
+fn test_concurrent_direct_recorded_simulation() {
+    // Simulate mixed direct/recorded execution to ensure RecordArena abstraction works correctly
+    let prime = secp256k1_coord_prime();
+    let (_, builder) = setup(&prime);
+
+    let x1 = ExprBuilder::new_input(builder.clone());
+    let x2 = ExprBuilder::new_input(builder.clone());
+    let mut x3 = x1 + x2;
+    x3.save();
+    let builder_data = builder.borrow().clone();
+
+    let (expr, range_checker, width) = create_field_expr_with_setup(builder_data);
+
+    // Simulate multiple "concurrent" executions with different modes
+    let execution_scenarios = vec![
+        ("direct", true),
+        ("recorded", false),
+        ("direct", true),
+        ("recorded", false),
+    ];
+
+    let mut all_traces = Vec::new();
+
+    for (name, is_direct) in execution_scenarios {
+        let inputs = vec![
+            generate_random_biguint(&prime),
+            generate_random_biguint(&prime),
+        ];
+
+        let trace = if is_direct {
+            generate_direct_trace(&expr, &range_checker, inputs.clone(), vec![], width)
+        } else {
+            generate_recorded_trace(&expr, &range_checker, &inputs, vec![], width)
+        };
+
+        all_traces.push((name, inputs, trace));
+    }
+
+    // Verify each trace is mathematically valid
+    for (_, inputs, trace) in &all_traces {
+        let expected = (&inputs[0] + &inputs[1]) % &prime;
+        extract_and_verify_result(&expr, trace, &expected, 0);
+    }
+
+    // Verify that direct and recorded with same inputs produce same results
+    let same_inputs = vec![BigUint::from(123u32), BigUint::from(456u32)];
+    test_trace_equivalence(&expr, &range_checker, same_inputs, vec![], width);
+}
diff --git a/crates/circuits/mod-builder/src/utils.rs b/crates/circuits/mod-builder/src/utils.rs
index 7540f0ae2c..2f2561ba87 100644
--- a/crates/circuits/mod-builder/src/utils.rs
+++ b/crates/circuits/mod-builder/src/utils.rs
@@ -1,27 +1,14 @@
 use num_bigint::BigUint;
-use num_traits::{FromPrimitive, ToPrimitive, Zero};
-
-// little endian.
-pub fn limbs_to_biguint(x: &[u32], limb_size: usize) -> BigUint {
-    let mut result = BigUint::zero();
-    let base = BigUint::from_u32(1 << limb_size).unwrap();
-    for limb in x.iter().rev() {
-        result = result * &base + BigUint::from_u32(*limb).unwrap();
-    }
-    result
-}
 
 // Use this when num_limbs is not a constant.
 // little endian.
-// Warning: This function only returns the last NUM_LIMBS*LIMB_SIZE bits of
+// Warning: This function only returns the last NUM_LIMBS bytes of
 //          the input, while the input can have more than that.
-pub fn biguint_to_limbs_vec(mut x: BigUint, limb_size: usize, num_limbs: usize) -> Vec<u32> {
-    let mut result = vec![0; num_limbs];
-    let base = BigUint::from_u32(1 << limb_size).unwrap();
-    for r in result.iter_mut() {
-        *r = (x.clone() % &base).to_u32().unwrap();
-        x /= &base;
-    }
-    assert!(x.is_zero());
-    result
+#[inline(always)]
+pub fn biguint_to_limbs_vec(x: &BigUint, num_limbs: usize) -> Vec<u8> {
+    x.to_bytes_le()
+        .into_iter()
+        .chain(std::iter::repeat(0u8))
+        .take(num_limbs)
+        .collect()
 }
diff --git a/crates/circuits/poseidon2-air/src/config.rs b/crates/circuits/poseidon2-air/src/config.rs
index be597c6dc6..6007f0b4fb 100644
--- a/crates/circuits/poseidon2-air/src/config.rs
+++ b/crates/circuits/poseidon2-air/src/config.rs
@@ -15,7 +15,7 @@ pub struct Poseidon2Config<F> {
     pub constants: Poseidon2Constants<F>,
 }
 
-impl<F: PrimeField32> Default for Poseidon2Config<F> {
+impl<F: Field> Default for Poseidon2Config<F> {
     fn default() -> Self {
         Self {
             constants: default_baby_bear_rc(),
diff --git a/crates/circuits/poseidon2-air/src/lib.rs b/crates/circuits/poseidon2-air/src/lib.rs
index 8a51ee88c7..747f94630e 100644
--- a/crates/circuits/poseidon2-air/src/lib.rs
+++ b/crates/circuits/poseidon2-air/src/lib.rs
@@ -42,7 +42,7 @@ pub const BABY_BEAR_POSEIDON2_SBOX_DEGREE: u64 = 7;
 
 /// `SBOX_REGISTERS` affects the max constraint degree of the AIR. See [p3_poseidon2_air] for more
 /// details.
-#[derive(Debug)]
+#[derive(Debug, Clone)]
 pub struct Poseidon2SubChip<F: Field, const SBOX_REGISTERS: usize> {
     // This is Arc purely because Poseidon2Air cannot derive Clone
     pub air: Arc<Poseidon2SubAir<F, SBOX_REGISTERS>>,
diff --git a/crates/circuits/primitives/derive/src/lib.rs b/crates/circuits/primitives/derive/src/lib.rs
index 47ff1e220a..35e5f8fd5b 100644
--- a/crates/circuits/primitives/derive/src/lib.rs
+++ b/crates/circuits/primitives/derive/src/lib.rs
@@ -73,6 +73,49 @@ pub fn aligned_borrow_derive(input: TokenStream) -> TokenStream {
     TokenStream::from(methods)
 }
 
+/// `S` is the type the derive macro is being called on
+/// Implements Borrow<S> and BorrowMut<S> for [u8]
+/// [u8] has to have (checked via `debug_assert!`s)
+/// - at least size_of(S) length
+/// - at least align_of(S) alignment
+#[proc_macro_derive(AlignedBytesBorrow)]
+pub fn aligned_bytes_borrow_derive(input: TokenStream) -> TokenStream {
+    let ast = parse_macro_input!(input as DeriveInput);
+    let name = &ast.ident;
+
+    // Get impl generics, type generics, where clause
+    // Note, need to add the new type generic to the `impl_generics`
+    let (impl_generics, type_generics, where_clause) = ast.generics.split_for_impl();
+
+    let methods = quote! {
+        impl #impl_generics core::borrow::Borrow<#name #type_generics> for [u8]
+        where
+            #where_clause
+        {
+            fn borrow(&self) -> &#name #type_generics {
+                use core::mem::{align_of, size_of_val};
+                debug_assert!(size_of_val(self) >= core::mem::size_of::<#name #type_generics>());
+                debug_assert_eq!(self.as_ptr() as usize % align_of::<#name #type_generics>(), 0);
+                unsafe { &*(self.as_ptr() as *const #name #type_generics) }
+            }
+        }
+
+        impl #impl_generics core::borrow::BorrowMut<#name #type_generics> for [u8]
+        where
+            #where_clause
+        {
+            fn borrow_mut(&mut self) -> &mut #name #type_generics {
+                use core::mem::{align_of, size_of_val};
+                debug_assert!(size_of_val(self) >= core::mem::size_of::<#name #type_generics>());
+                debug_assert_eq!(self.as_ptr() as usize % align_of::<#name #type_generics>(), 0);
+                unsafe { &mut *(self.as_mut_ptr() as *mut #name #type_generics) }
+            }
+        }
+    };
+
+    TokenStream::from(methods)
+}
+
 #[proc_macro_derive(Chip, attributes(chip))]
 pub fn chip_derive(input: TokenStream) -> TokenStream {
     // Parse the attributes from the struct or enum
@@ -86,9 +129,10 @@ pub fn chip_derive(input: TokenStream) -> TokenStream {
         Data::Struct(inner) => {
             let generics = &ast.generics;
             let mut new_generics = generics.clone();
+            new_generics.params.push(syn::parse_quote! { R });
             new_generics
                 .params
-                .push(syn::parse_quote! { SC: openvm_stark_backend::config::StarkGenericConfig });
+                .push(syn::parse_quote! { PB: openvm_stark_backend::prover::hal::ProverBackend });
             let (impl_generics, _, _) = new_generics.split_for_impl();
 
             // Check if the struct has only one unnamed field
@@ -105,17 +149,11 @@ pub fn chip_derive(input: TokenStream) -> TokenStream {
             let where_clause = new_generics.make_where_clause();
             where_clause
                 .predicates
-                .push(syn::parse_quote! { #inner_ty: openvm_stark_backend::Chip<SC> });
+                .push(syn::parse_quote! { #inner_ty: openvm_stark_backend::Chip<R, PB> });
             quote! {
-                impl #impl_generics openvm_stark_backend::Chip<SC> for #name #ty_generics #where_clause {
-                    fn air(&self) -> openvm_stark_backend::AirRef<SC> {
-                        self.0.air()
-                    }
-                    fn generate_air_proof_input(self) -> openvm_stark_backend::prover::types::AirProofInput<SC> {
-                        self.0.generate_air_proof_input()
-                    }
-                    fn generate_air_proof_input_with_id(self, air_id: usize) -> (usize, openvm_stark_backend::prover::types::AirProofInput<SC>) {
-                        self.0.generate_air_proof_input_with_id(air_id)
+                impl #impl_generics openvm_stark_backend::Chip<R, PB> for #name #ty_generics #where_clause {
+                    fn generate_proving_ctx(&self, records: R) -> openvm_stark_backend::prover::types::AirProvingContext<PB> {
+                        self.0.generate_proving_ctx(records)
                     }
                 }
             }.into()
@@ -134,34 +172,32 @@ pub fn chip_derive(input: TokenStream) -> TokenStream {
                 })
                 .collect::<Vec<_>>();
 
-            let (air_arms, generate_air_proof_input_arms, generate_air_proof_input_with_id_arms): (Vec<_>, Vec<_>, Vec<_>) =
-                multiunzip(variants.iter().map(|(variant_name, field)| {
+            let (generate_proving_ctx_arms, where_predicates): (Vec<_>, Vec<_>) =
+                variants.iter().map(|(variant_name, field)| {
                 let field_ty = &field.ty;
-                let air_arm = quote! {
-                    #name::#variant_name(x) => <#field_ty as openvm_stark_backend::Chip<SC>>::air(x)
-                };
-                let generate_air_proof_input_arm = quote! {
-                    #name::#variant_name(x) => <#field_ty as openvm_stark_backend::Chip<SC>>::generate_air_proof_input(x)
-                };
-                let generate_air_proof_input_with_id_arm = quote! {
-                    #name::#variant_name(x) => <#field_ty as openvm_stark_backend::Chip<SC>>::generate_air_proof_input_with_id(x, air_id)
+                let generate_proving_ctx_arm = quote! {
+                    #name::#variant_name(x) => <#field_ty as openvm_stark_backend::Chip<R, PB>>::generate_proving_ctx(x, records)
                 };
-                (air_arm, generate_air_proof_input_arm, generate_air_proof_input_with_id_arm)
-            }));
+                let where_predicate =
+                    syn::parse_quote! { #field_ty: openvm_stark_backend::Chip<R, PB> };
+                (generate_proving_ctx_arm, where_predicate)
+            }).collect();
 
-            // Attach an extra generic SC: StarkGenericConfig to the impl_generics
+            // Attach extra generics R and PB to the impl_generics
             let generics = &ast.generics;
             let mut new_generics = generics.clone();
+            new_generics.params.push(syn::parse_quote! { R });
             new_generics
                 .params
-                .push(syn::parse_quote! { SC: openvm_stark_backend::config::StarkGenericConfig });
+                .push(syn::parse_quote! { PB: openvm_stark_backend::prover::hal::ProverBackend });
             let (impl_generics, _, _) = new_generics.split_for_impl();
 
             // Implement Chip whenever the inner type implements Chip
             let mut new_generics = generics.clone();
             let where_clause = new_generics.make_where_clause();
-            where_clause.predicates.push(syn::parse_quote! { openvm_stark_backend::config::Domain<SC>: openvm_stark_backend::p3_commit::PolynomialSpace<Val = F>
-            });
+            for predicate in where_predicates {
+                where_clause.predicates.push(predicate);
+            }
             let attributes = ast.attrs.iter().find(|&attr| attr.path().is_ident("chip"));
             if let Some(attr) = attributes {
                 let mut fail_flag = false;
@@ -195,20 +231,10 @@ pub fn chip_derive(input: TokenStream) -> TokenStream {
             }
 
             quote! {
-                impl #impl_generics openvm_stark_backend::Chip<SC> for #name #ty_generics #where_clause {
-                    fn air(&self) -> openvm_stark_backend::AirRef<SC> {
-                        match self {
-                            #(#air_arms,)*
-                        }
-                    }
-                    fn generate_air_proof_input(self) -> openvm_stark_backend::prover::types::AirProofInput<SC> {
-                        match self {
-                            #(#generate_air_proof_input_arms,)*
-                        }
-                    }
-                    fn generate_air_proof_input_with_id(self, air_id: usize) -> (usize, openvm_stark_backend::prover::types::AirProofInput<SC>) {
+                impl #impl_generics openvm_stark_backend::Chip<R, PB> for #name #ty_generics #where_clause {
+                    fn generate_proving_ctx(&self, records: R) -> openvm_stark_backend::prover::types::AirProvingContext<PB> {
                         match self {
-                            #(#generate_air_proof_input_with_id_arms,)*
+                            #(#generate_proving_ctx_arms,)*
                         }
                     }
                 }
diff --git a/crates/circuits/primitives/src/bitwise_op_lookup/mod.rs b/crates/circuits/primitives/src/bitwise_op_lookup/mod.rs
index a9e649f84e..f3a0152b35 100644
--- a/crates/circuits/primitives/src/bitwise_op_lookup/mod.rs
+++ b/crates/circuits/primitives/src/bitwise_op_lookup/mod.rs
@@ -11,9 +11,9 @@ use openvm_stark_backend::{
     p3_air::{Air, BaseAir, PairBuilder},
     p3_field::{Field, FieldAlgebra},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
     rap::{get_air_name, BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    Chip, ChipUsageGetter,
 };
 
 mod bus;
@@ -112,10 +112,8 @@ pub struct BitwiseOperationLookupChip<const NUM_BITS: usize> {
     pub count_xor: Vec<AtomicU32>,
 }
 
-#[derive(Clone)]
-pub struct SharedBitwiseOperationLookupChip<const NUM_BITS: usize>(
-    Arc<BitwiseOperationLookupChip<NUM_BITS>>,
-);
+pub type SharedBitwiseOperationLookupChip<const NUM_BITS: usize> =
+    Arc<BitwiseOperationLookupChip<NUM_BITS>>;
 
 impl<const NUM_BITS: usize> BitwiseOperationLookupChip<NUM_BITS> {
     pub fn new(bus: BitwiseOperationLookupBus) -> Self {
@@ -159,15 +157,17 @@ impl<const NUM_BITS: usize> BitwiseOperationLookupChip<NUM_BITS> {
         }
     }
 
+    /// Generates trace and resets all internal counters to 0.
     pub fn generate_trace<F: Field>(&self) -> RowMajorMatrix<F> {
         let mut rows = F::zero_vec(self.count_range.len() * NUM_BITWISE_OP_LOOKUP_COLS);
         for (n, row) in rows.chunks_mut(NUM_BITWISE_OP_LOOKUP_COLS).enumerate() {
             let cols: &mut BitwiseOperationLookupCols<F> = row.borrow_mut();
             cols.mult_range = F::from_canonical_u32(
-                self.count_range[n].load(std::sync::atomic::Ordering::SeqCst),
+                self.count_range[n].swap(0, std::sync::atomic::Ordering::SeqCst),
+            );
+            cols.mult_xor = F::from_canonical_u32(
+                self.count_xor[n].swap(0, std::sync::atomic::Ordering::SeqCst),
             );
-            cols.mult_xor =
-                F::from_canonical_u32(self.count_xor[n].load(std::sync::atomic::Ordering::SeqCst));
         }
         RowMajorMatrix::new(rows, NUM_BITWISE_OP_LOOKUP_COLS)
     }
@@ -177,57 +177,13 @@ impl<const NUM_BITS: usize> BitwiseOperationLookupChip<NUM_BITS> {
     }
 }
 
-impl<const NUM_BITS: usize> SharedBitwiseOperationLookupChip<NUM_BITS> {
-    pub fn new(bus: BitwiseOperationLookupBus) -> Self {
-        Self(Arc::new(BitwiseOperationLookupChip::new(bus)))
-    }
-    pub fn bus(&self) -> BitwiseOperationLookupBus {
-        self.0.bus()
-    }
-
-    pub fn air_width(&self) -> usize {
-        self.0.air_width()
-    }
-
-    pub fn request_range(&self, x: u32, y: u32) {
-        self.0.request_range(x, y);
-    }
-
-    pub fn request_xor(&self, x: u32, y: u32) -> u32 {
-        self.0.request_xor(x, y)
-    }
-
-    pub fn clear(&self) {
-        self.0.clear()
-    }
-
-    pub fn generate_trace<F: Field>(&self) -> RowMajorMatrix<F> {
-        self.0.generate_trace()
-    }
-}
-
-impl<SC: StarkGenericConfig, const NUM_BITS: usize> Chip<SC>
+impl<R, SC: StarkGenericConfig, const NUM_BITS: usize> Chip<R, CpuBackend<SC>>
     for BitwiseOperationLookupChip<NUM_BITS>
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air)
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    /// Generates trace and resets all internal counters to 0.
+    fn generate_proving_ctx(&self, _: R) -> AirProvingContext<CpuBackend<SC>> {
         let trace = self.generate_trace::<Val<SC>>();
-        AirProofInput::simple_no_pis(trace)
-    }
-}
-
-impl<SC: StarkGenericConfig, const NUM_BITS: usize> Chip<SC>
-    for SharedBitwiseOperationLookupChip<NUM_BITS>
-{
-    fn air(&self) -> AirRef<SC> {
-        self.0.air()
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        self.0.generate_air_proof_input()
+        AirProvingContext::simple_no_pis(Arc::new(trace))
     }
 }
 
@@ -245,29 +201,3 @@ impl<const NUM_BITS: usize> ChipUsageGetter for BitwiseOperationLookupChip<NUM_B
         NUM_BITWISE_OP_LOOKUP_COLS
     }
 }
-
-impl<const NUM_BITS: usize> ChipUsageGetter for SharedBitwiseOperationLookupChip<NUM_BITS> {
-    fn air_name(&self) -> String {
-        self.0.air_name()
-    }
-
-    fn constant_trace_height(&self) -> Option<usize> {
-        self.0.constant_trace_height()
-    }
-
-    fn current_trace_height(&self) -> usize {
-        self.0.current_trace_height()
-    }
-
-    fn trace_width(&self) -> usize {
-        self.0.trace_width()
-    }
-}
-
-impl<const NUM_BITS: usize> AsRef<BitwiseOperationLookupChip<NUM_BITS>>
-    for SharedBitwiseOperationLookupChip<NUM_BITS>
-{
-    fn as_ref(&self) -> &BitwiseOperationLookupChip<NUM_BITS> {
-        &self.0
-    }
-}
diff --git a/crates/circuits/primitives/src/range/mod.rs b/crates/circuits/primitives/src/range/mod.rs
index 39dd70aae7..dc94c03c9c 100644
--- a/crates/circuits/primitives/src/range/mod.rs
+++ b/crates/circuits/primitives/src/range/mod.rs
@@ -122,7 +122,7 @@ impl RangeCheckerChip {
             let cols: &mut RangeCols<F> = (*row).borrow_mut();
             // Set multiplicity for each value in range
             cols.mult =
-                F::from_canonical_u32(self.count[n].load(std::sync::atomic::Ordering::SeqCst));
+                F::from_canonical_u32(self.count[n].swap(0, std::sync::atomic::Ordering::Relaxed));
         }
         RowMajorMatrix::new(rows, NUM_RANGE_COLS)
     }
diff --git a/crates/circuits/primitives/src/range_gate/mod.rs b/crates/circuits/primitives/src/range_gate/mod.rs
index 7c1a877c49..a3401e0c97 100644
--- a/crates/circuits/primitives/src/range_gate/mod.rs
+++ b/crates/circuits/primitives/src/range_gate/mod.rs
@@ -143,7 +143,7 @@ impl RangeCheckerGateChip {
             .iter()
             .enumerate()
             .flat_map(|(i, count)| {
-                let c = count.load(std::sync::atomic::Ordering::Relaxed);
+                let c = count.swap(0, std::sync::atomic::Ordering::Relaxed);
                 vec![F::from_canonical_usize(i), F::from_canonical_u32(c)]
             })
             .collect();
diff --git a/crates/circuits/primitives/src/range_tuple/mod.rs b/crates/circuits/primitives/src/range_tuple/mod.rs
index 3d0754cc9a..4962d567c5 100644
--- a/crates/circuits/primitives/src/range_tuple/mod.rs
+++ b/crates/circuits/primitives/src/range_tuple/mod.rs
@@ -16,9 +16,9 @@ use openvm_stark_backend::{
     p3_air::{Air, BaseAir, PairBuilder},
     p3_field::{Field, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
     rap::{get_air_name, BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    Chip, ChipUsageGetter,
 };
 
 mod bus;
@@ -105,8 +105,7 @@ pub struct RangeTupleCheckerChip<const N: usize> {
     pub count: Vec<Arc<AtomicU32>>,
 }
 
-#[derive(Debug, Clone)]
-pub struct SharedRangeTupleCheckerChip<const N: usize>(Arc<RangeTupleCheckerChip<N>>);
+pub type SharedRangeTupleCheckerChip<const N: usize> = Arc<RangeTupleCheckerChip<N>>;
 
 impl<const N: usize> RangeTupleCheckerChip<N> {
     pub fn new(bus: RangeTupleCheckerBus<N>) -> Self {
@@ -154,61 +153,19 @@ impl<const N: usize> RangeTupleCheckerChip<N> {
         let rows = self
             .count
             .iter()
-            .map(|c| F::from_canonical_u32(c.load(std::sync::atomic::Ordering::SeqCst)))
+            .map(|c| F::from_canonical_u32(c.swap(0, std::sync::atomic::Ordering::Relaxed)))
             .collect::<Vec<_>>();
         RowMajorMatrix::new(rows, 1)
     }
 }
 
-impl<const N: usize> SharedRangeTupleCheckerChip<N> {
-    pub fn new(bus: RangeTupleCheckerBus<N>) -> Self {
-        Self(Arc::new(RangeTupleCheckerChip::new(bus)))
-    }
-    pub fn bus(&self) -> &RangeTupleCheckerBus<N> {
-        self.0.bus()
-    }
-
-    pub fn sizes(&self) -> &[u32; N] {
-        self.0.sizes()
-    }
-
-    pub fn add_count(&self, ids: &[u32]) {
-        self.0.add_count(ids);
-    }
-
-    pub fn clear(&self) {
-        self.0.clear();
-    }
-
-    pub fn generate_trace<F: Field>(&self) -> RowMajorMatrix<F> {
-        self.0.generate_trace()
-    }
-}
-
-impl<SC: StarkGenericConfig, const N: usize> Chip<SC> for RangeTupleCheckerChip<N>
+impl<R, SC: StarkGenericConfig, const N: usize> Chip<R, CpuBackend<SC>> for RangeTupleCheckerChip<N>
 where
     Val<SC>: PrimeField32,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air)
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    fn generate_proving_ctx(&self, _: R) -> AirProvingContext<CpuBackend<SC>> {
         let trace = self.generate_trace::<Val<SC>>();
-        AirProofInput::simple_no_pis(trace)
-    }
-}
-
-impl<SC: StarkGenericConfig, const N: usize> Chip<SC> for SharedRangeTupleCheckerChip<N>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        self.0.air()
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        self.0.generate_air_proof_input()
+        AirProvingContext::simple_no_pis(Arc::new(trace))
     }
 }
 
@@ -226,27 +183,3 @@ impl<const N: usize> ChipUsageGetter for RangeTupleCheckerChip<N> {
         NUM_RANGE_TUPLE_COLS
     }
 }
-
-impl<const N: usize> ChipUsageGetter for SharedRangeTupleCheckerChip<N> {
-    fn air_name(&self) -> String {
-        self.0.air_name()
-    }
-
-    fn constant_trace_height(&self) -> Option<usize> {
-        self.0.constant_trace_height()
-    }
-
-    fn current_trace_height(&self) -> usize {
-        self.0.current_trace_height()
-    }
-
-    fn trace_width(&self) -> usize {
-        self.0.trace_width()
-    }
-}
-
-impl<const N: usize> AsRef<RangeTupleCheckerChip<N>> for SharedRangeTupleCheckerChip<N> {
-    fn as_ref(&self) -> &RangeTupleCheckerChip<N> {
-        &self.0
-    }
-}
diff --git a/crates/circuits/primitives/src/var_range/mod.rs b/crates/circuits/primitives/src/var_range/mod.rs
index 1ba3f2e776..82999a8bda 100644
--- a/crates/circuits/primitives/src/var_range/mod.rs
+++ b/crates/circuits/primitives/src/var_range/mod.rs
@@ -16,9 +16,9 @@ use openvm_stark_backend::{
     p3_air::{Air, BaseAir, PairBuilder},
     p3_field::{Field, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
     rap::{get_air_name, BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    Chip, ChipUsageGetter,
 };
 use tracing::instrument;
 
@@ -102,8 +102,7 @@ pub struct VariableRangeCheckerChip {
     pub count: Vec<AtomicU32>,
 }
 
-#[derive(Clone)]
-pub struct SharedVariableRangeCheckerChip(Arc<VariableRangeCheckerChip>);
+pub type SharedVariableRangeCheckerChip = Arc<VariableRangeCheckerChip>;
 
 impl VariableRangeCheckerChip {
     pub fn new(bus: VariableRangeCheckerBus) -> Self {
@@ -153,12 +152,13 @@ impl VariableRangeCheckerChip {
         }
     }
 
+    /// Generates trace and resets the internal counters all to 0.
     pub fn generate_trace<F: Field>(&self) -> RowMajorMatrix<F> {
         let mut rows = F::zero_vec(self.count.len() * NUM_VARIABLE_RANGE_COLS);
         for (n, row) in rows.chunks_mut(NUM_VARIABLE_RANGE_COLS).enumerate() {
             let cols: &mut VariableRangeCols<F> = row.borrow_mut();
             cols.mult =
-                F::from_canonical_u32(self.count[n].load(std::sync::atomic::Ordering::SeqCst));
+                F::from_canonical_u32(self.count[n].swap(0, std::sync::atomic::Ordering::Relaxed));
         }
         RowMajorMatrix::new(rows, NUM_VARIABLE_RANGE_COLS)
     }
@@ -186,60 +186,15 @@ impl VariableRangeCheckerChip {
     }
 }
 
-impl SharedVariableRangeCheckerChip {
-    pub fn new(bus: VariableRangeCheckerBus) -> Self {
-        Self(Arc::new(VariableRangeCheckerChip::new(bus)))
-    }
-
-    pub fn bus(&self) -> VariableRangeCheckerBus {
-        self.0.bus()
-    }
-
-    pub fn range_max_bits(&self) -> usize {
-        self.0.range_max_bits()
-    }
-
-    pub fn air_width(&self) -> usize {
-        self.0.air_width()
-    }
-
-    pub fn add_count(&self, value: u32, max_bits: usize) {
-        self.0.add_count(value, max_bits)
-    }
-
-    pub fn clear(&self) {
-        self.0.clear()
-    }
-
-    pub fn generate_trace<F: Field>(&self) -> RowMajorMatrix<F> {
-        self.0.generate_trace()
-    }
-}
-
-impl<SC: StarkGenericConfig> Chip<SC> for VariableRangeCheckerChip
+// We allow any `R` type so this can work with arbitrary record arenas.
+impl<R, SC: StarkGenericConfig> Chip<R, CpuBackend<SC>> for VariableRangeCheckerChip
 where
     Val<SC>: PrimeField32,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air)
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    /// Generates trace and resets the internal counters all to 0.
+    fn generate_proving_ctx(&self, _: R) -> AirProvingContext<CpuBackend<SC>> {
         let trace = self.generate_trace::<Val<SC>>();
-        AirProofInput::simple_no_pis(trace)
-    }
-}
-
-impl<SC: StarkGenericConfig> Chip<SC> for SharedVariableRangeCheckerChip
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        self.0.air()
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        self.0.generate_air_proof_input()
+        AirProvingContext::simple_no_pis(Arc::new(trace))
     }
 }
 
@@ -257,27 +212,3 @@ impl ChipUsageGetter for VariableRangeCheckerChip {
         NUM_VARIABLE_RANGE_COLS
     }
 }
-
-impl ChipUsageGetter for SharedVariableRangeCheckerChip {
-    fn air_name(&self) -> String {
-        self.0.air_name()
-    }
-
-    fn constant_trace_height(&self) -> Option<usize> {
-        self.0.constant_trace_height()
-    }
-
-    fn current_trace_height(&self) -> usize {
-        self.0.current_trace_height()
-    }
-
-    fn trace_width(&self) -> usize {
-        self.0.trace_width()
-    }
-}
-
-impl AsRef<VariableRangeCheckerChip> for SharedVariableRangeCheckerChip {
-    fn as_ref(&self) -> &VariableRangeCheckerChip {
-        &self.0
-    }
-}
diff --git a/crates/circuits/primitives/src/xor/lookup/mod.rs b/crates/circuits/primitives/src/xor/lookup/mod.rs
index c9e76ad4c9..af9175183d 100644
--- a/crates/circuits/primitives/src/xor/lookup/mod.rs
+++ b/crates/circuits/primitives/src/xor/lookup/mod.rs
@@ -19,9 +19,9 @@ use openvm_stark_backend::{
     p3_air::{Air, BaseAir, PairBuilder},
     p3_field::Field,
     p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
     rap::{get_air_name, BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    Chip, ChipUsageGetter,
 };
 
 use super::bus::XorBus;
@@ -170,14 +170,10 @@ impl<const M: usize> XorLookupChip<M> {
     }
 }
 
-impl<SC: StarkGenericConfig, const M: usize> Chip<SC> for XorLookupChip<M> {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air)
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+impl<R, SC: StarkGenericConfig, const M: usize> Chip<R, CpuBackend<SC>> for XorLookupChip<M> {
+    fn generate_proving_ctx(&self, _: R) -> AirProvingContext<CpuBackend<SC>> {
         let trace = self.generate_trace::<Val<SC>>();
-        AirProofInput::simple_no_pis(trace)
+        AirProvingContext::simple_no_pis(Arc::new(trace))
     }
 }
 
diff --git a/crates/circuits/sha256-air/src/air.rs b/crates/circuits/sha256-air/src/air.rs
index 96578984d0..b27af6ffa9 100644
--- a/crates/circuits/sha256-air/src/air.rs
+++ b/crates/circuits/sha256-air/src/air.rs
@@ -15,11 +15,11 @@ use openvm_stark_backend::{
 
 use super::{
     big_sig0_field, big_sig1_field, ch_field, compose, maj_field, small_sig0_field,
-    small_sig1_field, u32_into_limbs, Sha256DigestCols, Sha256RoundCols, SHA256_DIGEST_WIDTH,
-    SHA256_H, SHA256_HASH_WORDS, SHA256_K, SHA256_ROUNDS_PER_ROW, SHA256_ROUND_WIDTH,
-    SHA256_WORD_BITS, SHA256_WORD_U16S, SHA256_WORD_U8S,
+    small_sig1_field, Sha256DigestCols, Sha256RoundCols, SHA256_DIGEST_WIDTH, SHA256_H,
+    SHA256_HASH_WORDS, SHA256_K, SHA256_ROUNDS_PER_ROW, SHA256_ROUND_WIDTH, SHA256_WORD_BITS,
+    SHA256_WORD_U16S, SHA256_WORD_U8S,
 };
-use crate::constraint_word_addition;
+use crate::{constraint_word_addition, u32_into_u16s};
 
 /// Expects the message to be padded to a multiple of 512 bits
 #[derive(Clone, Debug)]
@@ -154,7 +154,7 @@ impl Sha256Air {
                     .assert_eq(
                         a_limb,
                         AB::Expr::from_canonical_u32(
-                            u32_into_limbs::<2>(SHA256_H[SHA256_ROUNDS_PER_ROW - i - 1])[j],
+                            u32_into_u16s(SHA256_H[SHA256_ROUNDS_PER_ROW - i - 1])[j],
                         ),
                     );
 
@@ -166,7 +166,7 @@ impl Sha256Air {
                     .assert_eq(
                         e_limb,
                         AB::Expr::from_canonical_u32(
-                            u32_into_limbs::<2>(SHA256_H[SHA256_ROUNDS_PER_ROW - i + 3])[j],
+                            u32_into_u16s(SHA256_H[SHA256_ROUNDS_PER_ROW - i + 3])[j],
                         ),
                     );
             }
@@ -561,9 +561,8 @@ impl Sha256Air {
                         .map(|rw_idx| {
                             (
                                 rw_idx,
-                                u32_into_limbs::<SHA256_WORD_U16S>(
-                                    SHA256_K[rw_idx * SHA256_ROUNDS_PER_ROW + i],
-                                )[j] as usize,
+                                u32_into_u16s(SHA256_K[rw_idx * SHA256_ROUNDS_PER_ROW + i])[j]
+                                    as usize,
                             )
                         })
                         .collect::<Vec<_>>(),
diff --git a/crates/circuits/sha256-air/src/tests.rs b/crates/circuits/sha256-air/src/tests.rs
index 903b7b0695..7ad0229185 100644
--- a/crates/circuits/sha256-air/src/tests.rs
+++ b/crates/circuits/sha256-air/src/tests.rs
@@ -1,11 +1,14 @@
-use std::{array, borrow::BorrowMut, cmp::max, sync::Arc};
+use std::{array, borrow::BorrowMut, sync::Arc};
 
 use openvm_circuit::arch::{
     instructions::riscv::RV32_CELL_BITS,
     testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
 };
 use openvm_circuit_primitives::{
-    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
     SubAir,
 };
 use openvm_stark_backend::{
@@ -13,18 +16,19 @@ use openvm_stark_backend::{
     interaction::{BusIndex, InteractionBuilder},
     p3_air::{Air, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
-    p3_maybe_rayon::prelude::{IndexedParallelIterator, ParallelIterator, ParallelSliceMut},
-    prover::types::AirProofInput,
-    rap::{get_air_name, BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    p3_matrix::{dense::RowMajorMatrix, Matrix},
+    prover::{cpu::CpuBackend, types::AirProvingContext},
+    rap::{BaseAirWithPublicValues, PartitionedBaseAir},
+    utils::disable_debug_builder,
+    verifier::VerificationError,
+    AirRef, Chip,
 };
-use openvm_stark_sdk::utils::create_seeded_rng;
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::Rng;
 
 use crate::{
-    compose, small_sig0_field, Sha256Air, Sha256RoundCols, SHA256_BLOCK_U8S, SHA256_DIGEST_WIDTH,
-    SHA256_HASH_WORDS, SHA256_ROUNDS_PER_ROW, SHA256_ROUND_WIDTH, SHA256_ROWS_PER_BLOCK,
-    SHA256_WORD_U16S, SHA256_WORD_U8S,
+    Sha256Air, Sha256DigestCols, Sha256FillerHelper, SHA256_BLOCK_U8S, SHA256_DIGEST_WIDTH,
+    SHA256_HASH_WORDS, SHA256_WIDTH, SHA256_WORD_U8S,
 };
 
 // A wrapper AIR purely for testing purposes
@@ -47,51 +51,47 @@ impl<AB: InteractionBuilder> Air<AB> for Sha256TestAir {
     }
 }
 
+const SELF_BUS_IDX: BusIndex = 28;
+type F = BabyBear;
+type RecordType = Vec<([u8; SHA256_BLOCK_U8S], bool)>;
+
 // A wrapper Chip purely for testing purposes
 pub struct Sha256TestChip {
-    pub air: Sha256TestAir,
+    pub step: Sha256FillerHelper,
     pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
-    pub records: Vec<([u8; SHA256_BLOCK_U8S], bool)>,
 }
 
-impl<SC: StarkGenericConfig> Chip<SC> for Sha256TestChip
+impl<SC: StarkGenericConfig> Chip<RecordType, CpuBackend<SC>> for Sha256TestChip
 where
     Val<SC>: PrimeField32,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    fn generate_proving_ctx(&self, records: RecordType) -> AirProvingContext<CpuBackend<SC>> {
         let trace = crate::generate_trace::<Val<SC>>(
-            &self.air.sub_air,
-            self.bitwise_lookup_chip.clone(),
-            self.records,
+            &self.step,
+            self.bitwise_lookup_chip.as_ref(),
+            SHA256_WIDTH,
+            records,
         );
-        AirProofInput::simple_no_pis(trace)
+        AirProvingContext::simple_no_pis(Arc::new(trace))
     }
 }
 
-impl ChipUsageGetter for Sha256TestChip {
-    fn air_name(&self) -> String {
-        get_air_name(&self.air)
-    }
-    fn current_trace_height(&self) -> usize {
-        self.records.len() * SHA256_ROWS_PER_BLOCK
-    }
-
-    fn trace_width(&self) -> usize {
-        max(SHA256_ROUND_WIDTH, SHA256_DIGEST_WIDTH)
-    }
-}
-
-const SELF_BUS_IDX: BusIndex = 28;
-#[test]
-fn rand_sha256_test() {
+#[allow(clippy::type_complexity)]
+fn create_air_with_air_ctx<SC: StarkGenericConfig>() -> (
+    (AirRef<SC>, AirProvingContext<CpuBackend<SC>>),
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+)
+where
+    Val<SC>: PrimeField32,
+{
     let mut rng = create_seeded_rng();
-    let tester = VmChipTestBuilder::default();
     let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
     let len = rng.gen_range(1..100);
     let random_records: Vec<_> = (0..len)
         .map(|i| {
@@ -101,133 +101,63 @@ fn rand_sha256_test() {
             )
         })
         .collect();
+
+    let air = Sha256TestAir {
+        sub_air: Sha256Air::new(bitwise_bus, SELF_BUS_IDX),
+    };
     let chip = Sha256TestChip {
-        air: Sha256TestAir {
-            sub_air: Sha256Air::new(bitwise_bus, SELF_BUS_IDX),
-        },
+        step: Sha256FillerHelper::new(),
         bitwise_lookup_chip: bitwise_chip.clone(),
-        records: random_records,
     };
+    let air_ctx = chip.generate_proving_ctx(random_records);
 
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
+    ((Arc::new(air), air_ctx), (bitwise_chip.air, bitwise_chip))
 }
 
-// A wrapper Chip to test that the final_hash is properly constrained.
-// This chip implements a malicious trace gen that violates the final_hash constraints.
-pub struct Sha256TestBadFinalHashChip {
-    pub air: Sha256TestAir,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
-    pub records: Vec<([u8; SHA256_BLOCK_U8S], bool)>,
+#[test]
+fn rand_sha256_test() {
+    let tester = VmChipTestBuilder::default();
+    let (air_ctx, bitwise) = create_air_with_air_ctx();
+    let tester = tester
+        .build()
+        .load_air_proving_ctx(air_ctx)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
 }
 
-impl<SC: StarkGenericConfig> Chip<SC> for Sha256TestBadFinalHashChip
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let mut trace = crate::generate_trace::<Val<SC>>(
-            &self.air.sub_air,
-            self.bitwise_lookup_chip.clone(),
-            self.records.clone(),
-        );
-
-        // Set the final_hash in the digest row of the last block of each hash to zero.
-        // That is, every hash that this chip does will result in a final_hash of zero.
-        for (i, row) in self.records.iter().enumerate() {
-            if row.1 {
-                let last_digest_row_idx = (i + 1) * SHA256_ROWS_PER_BLOCK - 1;
-                let last_digest_row: &mut crate::Sha256DigestCols<Val<SC>> =
-                    trace.row_mut(last_digest_row_idx)[..SHA256_DIGEST_WIDTH].borrow_mut();
-                // Set the final_hash to all zeros
+#[test]
+fn negative_sha256_test_bad_final_hash() {
+    let tester = VmChipTestBuilder::default();
+    let ((air, mut air_ctx), bitwise) = create_air_with_air_ctx();
+
+    // Set the final_hash to all zeros
+    let modify_trace = |trace: &mut RowMajorMatrix<F>| {
+        trace.row_chunks_exact_mut(1).for_each(|row| {
+            let mut row_slice = row.row_slice(0).to_vec();
+            let cols: &mut Sha256DigestCols<F> = row_slice[..SHA256_DIGEST_WIDTH].borrow_mut();
+            if cols.flags.is_last_block.is_one() && cols.flags.is_digest_row.is_one() {
                 for i in 0..SHA256_HASH_WORDS {
                     for j in 0..SHA256_WORD_U8S {
-                        last_digest_row.final_hash[i][j] = Val::<SC>::ZERO;
+                        cols.final_hash[i][j] = F::ZERO;
                     }
                 }
-
-                let (last_round_row, last_digest_row) =
-                    trace.row_pair_mut(last_digest_row_idx - 1, last_digest_row_idx);
-                let last_round_row: &mut crate::Sha256RoundCols<Val<SC>> =
-                    last_round_row.borrow_mut();
-                let last_digest_row: &mut crate::Sha256RoundCols<Val<SC>> =
-                    last_digest_row.borrow_mut();
-                // fix the intermed_4 for the digest row
-                generate_intermed_4(last_round_row, last_digest_row);
+                row.values.copy_from_slice(&row_slice);
             }
-        }
-
-        let non_padded_height = self.records.len() * SHA256_ROWS_PER_BLOCK;
-        let width = <Sha256Air as BaseAir<Val<SC>>>::width(&self.air.sub_air);
-        // recalculate the missing cells (second pass of generate_trace)
-        trace.values[width..]
-            .par_chunks_mut(width * SHA256_ROWS_PER_BLOCK)
-            .take(non_padded_height / SHA256_ROWS_PER_BLOCK)
-            .for_each(|chunk| {
-                self.air.sub_air.generate_missing_cells(chunk, width, 0);
-            });
-
-        AirProofInput::simple_no_pis(trace)
-    }
-}
-
-// Copy of private method in Sha256Air used for testing
-/// Puts the correct intermed_4 in the `next_row`
-fn generate_intermed_4<F: PrimeField32>(
-    local_cols: &Sha256RoundCols<F>,
-    next_cols: &mut Sha256RoundCols<F>,
-) {
-    let w = [local_cols.message_schedule.w, next_cols.message_schedule.w].concat();
-    let w_limbs: Vec<[F; SHA256_WORD_U16S]> = w
-        .iter()
-        .map(|x| array::from_fn(|i| compose::<F>(&x[i * 16..(i + 1) * 16], 1)))
-        .collect();
-    for i in 0..SHA256_ROUNDS_PER_ROW {
-        let sig_w = small_sig0_field::<F>(&w[i + 1]);
-        let sig_w_limbs: [F; SHA256_WORD_U16S] =
-            array::from_fn(|j| compose::<F>(&sig_w[j * 16..(j + 1) * 16], 1));
-        for (j, sig_w_limb) in sig_w_limbs.iter().enumerate() {
-            next_cols.schedule_helper.intermed_4[i][j] = w_limbs[i][j] + *sig_w_limb;
-        }
-    }
-}
-
-impl ChipUsageGetter for Sha256TestBadFinalHashChip {
-    fn air_name(&self) -> String {
-        get_air_name(&self.air)
-    }
-    fn current_trace_height(&self) -> usize {
-        self.records.len() * SHA256_ROWS_PER_BLOCK
-    }
-
-    fn trace_width(&self) -> usize {
-        max(SHA256_ROUND_WIDTH, SHA256_DIGEST_WIDTH)
-    }
-}
-
-#[test]
-#[should_panic]
-fn test_sha256_final_hash_constraints() {
-    let mut rng = create_seeded_rng();
-    let tester = VmChipTestBuilder::default();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let len = rng.gen_range(1..100);
-    let random_records: Vec<_> = (0..len)
-        .map(|_| (array::from_fn(|_| rng.gen::<u8>()), true))
-        .collect();
-    let chip = Sha256TestBadFinalHashChip {
-        air: Sha256TestAir {
-            sub_air: Sha256Air::new(bitwise_bus, SELF_BUS_IDX),
-        },
-        bitwise_lookup_chip: bitwise_chip.clone(),
-        records: random_records,
+        });
     };
 
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
+    // Modify the air_ctx
+    let trace = Option::take(&mut air_ctx.common_main).unwrap();
+    let mut trace = Arc::into_inner(trace).unwrap();
+    modify_trace(&mut trace);
+    air_ctx.common_main = Some(Arc::new(trace));
+
+    disable_debug_builder();
+    let tester = tester
+        .build()
+        .load_air_proving_ctx((air, air_ctx))
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test_with_expected_error(VerificationError::OodEvaluationMismatch);
 }
diff --git a/crates/circuits/sha256-air/src/trace.rs b/crates/circuits/sha256-air/src/trace.rs
index eaf9174f50..8cbaebbc55 100644
--- a/crates/circuits/sha256-air/src/trace.rs
+++ b/crates/circuits/sha256-air/src/trace.rs
@@ -1,31 +1,48 @@
 use std::{array, borrow::BorrowMut, ops::Range};
 
 use openvm_circuit_primitives::{
-    bitwise_op_lookup::SharedBitwiseOperationLookupChip, utils::next_power_of_two_or_zero,
+    bitwise_op_lookup::BitwiseOperationLookupChip, encoder::Encoder,
+    utils::next_power_of_two_or_zero,
 };
 use openvm_stark_backend::{
-    p3_air::BaseAir, p3_field::PrimeField32, p3_matrix::dense::RowMajorMatrix,
-    p3_maybe_rayon::prelude::*,
+    p3_field::PrimeField32, p3_matrix::dense::RowMajorMatrix, p3_maybe_rayon::prelude::*,
 };
 use sha2::{compress256, digest::generic_array::GenericArray};
 
 use super::{
-    air::Sha256Air, big_sig0_field, big_sig1_field, ch_field, columns::Sha256RoundCols, compose,
-    get_flag_pt_array, maj_field, small_sig0_field, small_sig1_field, SHA256_BLOCK_WORDS,
-    SHA256_DIGEST_WIDTH, SHA256_HASH_WORDS, SHA256_ROUND_WIDTH,
+    big_sig0_field, big_sig1_field, ch_field, columns::Sha256RoundCols, compose, get_flag_pt_array,
+    maj_field, small_sig0_field, small_sig1_field, SHA256_BLOCK_WORDS, SHA256_DIGEST_WIDTH,
+    SHA256_HASH_WORDS, SHA256_ROUND_WIDTH,
 };
 use crate::{
     big_sig0, big_sig1, ch, columns::Sha256DigestCols, limbs_into_u32, maj, small_sig0, small_sig1,
-    u32_into_limbs, SHA256_BLOCK_U8S, SHA256_BUFFER_SIZE, SHA256_H, SHA256_INVALID_CARRY_A,
+    u32_into_bits_field, u32_into_u16s, SHA256_BLOCK_U8S, SHA256_H, SHA256_INVALID_CARRY_A,
     SHA256_INVALID_CARRY_E, SHA256_K, SHA256_ROUNDS_PER_ROW, SHA256_ROWS_PER_BLOCK,
-    SHA256_WORD_BITS, SHA256_WORD_U16S, SHA256_WORD_U8S,
+    SHA256_WORD_U16S, SHA256_WORD_U8S,
 };
 
+/// A helper struct for the SHA256 trace generation.
+/// Also, separates the inner AIR from the trace generation.
+pub struct Sha256FillerHelper {
+    pub row_idx_encoder: Encoder,
+}
+
+impl Default for Sha256FillerHelper {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
 /// The trace generation of SHA256 should be done in two passes.
 /// The first pass should do `get_block_trace` for every block and generate the invalid rows through
 /// `get_default_row` The second pass should go through all the blocks and call
 /// `generate_missing_cells`
-impl Sha256Air {
+impl Sha256FillerHelper {
+    pub fn new() -> Self {
+        Self {
+            row_idx_encoder: Encoder::new(18, 2, false),
+        }
+    }
     /// This function takes the input_message (padding not handled), the previous hash,
     /// and returns the new hash after processing the block input
     pub fn get_block_hash(
@@ -52,18 +69,16 @@ impl Sha256Air {
         trace_width: usize,
         trace_start_col: usize,
         input: &[u32; SHA256_BLOCK_WORDS],
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
+        bitwise_lookup_chip: &BitwiseOperationLookupChip<8>,
         prev_hash: &[u32; SHA256_HASH_WORDS],
         is_last_block: bool,
         global_block_idx: u32,
         local_block_idx: u32,
-        buffer_vals: &[[F; SHA256_BUFFER_SIZE]; 4],
     ) {
         #[cfg(debug_assertions)]
         {
             assert!(trace.len() == trace_width * SHA256_ROWS_PER_BLOCK);
             assert!(trace_start_col + super::SHA256_WIDTH <= trace_width);
-            assert!(self.bitwise_lookup_bus == bitwise_lookup_chip.bus());
             if local_block_idx == 0 {
                 assert!(*prev_hash == SHA256_H);
             }
@@ -87,14 +102,10 @@ impl Sha256Air {
                 cols.flags.local_block_idx = F::from_canonical_u32(local_block_idx);
 
                 // W_idx = M_idx
-                if i < SHA256_ROWS_PER_BLOCK / SHA256_ROUNDS_PER_ROW {
+                if i < 4 {
                     for j in 0..SHA256_ROUNDS_PER_ROW {
-                        cols.message_schedule.w[j] = u32_into_limbs::<SHA256_WORD_BITS>(
-                            input[i * SHA256_ROUNDS_PER_ROW + j],
-                        )
-                        .map(F::from_canonical_u32);
-                        cols.message_schedule.carry_or_buffer[j] =
-                            array::from_fn(|k| buffer_vals[i][j * SHA256_WORD_U16S * 2 + k]);
+                        cols.message_schedule.w[j] =
+                            u32_into_bits_field::<F>(input[i * SHA256_ROUNDS_PER_ROW + j]);
                     }
                 }
                 // W_idx = SIG1(W_{idx-2}) + W_{idx-7} + SIG0(W_{idx-15}) + W_{idx-16}
@@ -108,14 +119,10 @@ impl Sha256Air {
                             message_schedule[idx - 16],
                         ];
                         let w: u32 = nums.iter().fold(0, |acc, &num| acc.wrapping_add(num));
-                        cols.message_schedule.w[j] =
-                            u32_into_limbs::<SHA256_WORD_BITS>(w).map(F::from_canonical_u32);
+                        cols.message_schedule.w[j] = u32_into_bits_field::<F>(w);
 
-                        let nums_limbs = nums
-                            .iter()
-                            .map(|x| u32_into_limbs::<SHA256_WORD_U16S>(*x))
-                            .collect::<Vec<_>>();
-                        let w_limbs = u32_into_limbs::<SHA256_WORD_U16S>(w);
+                        let nums_limbs = nums.map(u32_into_u16s);
+                        let w_limbs = u32_into_u16s(w);
 
                         // fill in the carrys
                         for k in 0..SHA256_WORD_U16S {
@@ -157,25 +164,18 @@ impl Sha256Air {
 
                     // e = d + t1
                     let e = work_vars[3].wrapping_add(t1_sum);
-                    cols.work_vars.e[j] =
-                        u32_into_limbs::<SHA256_WORD_BITS>(e).map(F::from_canonical_u32);
-                    let e_limbs = u32_into_limbs::<SHA256_WORD_U16S>(e);
+                    cols.work_vars.e[j] = u32_into_bits_field::<F>(e);
+                    let e_limbs = u32_into_u16s(e);
                     // a = t1 + t2
                     let a = t1_sum.wrapping_add(t2_sum);
-                    cols.work_vars.a[j] =
-                        u32_into_limbs::<SHA256_WORD_BITS>(a).map(F::from_canonical_u32);
-                    let a_limbs = u32_into_limbs::<SHA256_WORD_U16S>(a);
+                    cols.work_vars.a[j] = u32_into_bits_field::<F>(a);
+                    let a_limbs = u32_into_u16s(a);
                     // fill in the carrys
                     for k in 0..SHA256_WORD_U16S {
-                        let t1_limb = t1.iter().fold(0, |acc, &num| {
-                            acc + u32_into_limbs::<SHA256_WORD_U16S>(num)[k]
-                        });
-                        let t2_limb = t2.iter().fold(0, |acc, &num| {
-                            acc + u32_into_limbs::<SHA256_WORD_U16S>(num)[k]
-                        });
+                        let t1_limb = t1.iter().fold(0, |acc, &num| acc + u32_into_u16s(num)[k]);
+                        let t2_limb = t2.iter().fold(0, |acc, &num| acc + u32_into_u16s(num)[k]);
 
-                        let mut e_limb =
-                            t1_limb + u32_into_limbs::<SHA256_WORD_U16S>(work_vars[3])[k];
+                        let mut e_limb = t1_limb + u32_into_u16s(work_vars[3])[k];
                         let mut a_limb = t1_limb + t2_limb;
                         if k > 0 {
                             a_limb += cols.work_vars.carry_a[j][k - 1].as_canonical_u32();
@@ -203,16 +203,14 @@ impl Sha256Air {
                 if i > 0 {
                     for j in 0..SHA256_ROUNDS_PER_ROW {
                         let idx = i * SHA256_ROUNDS_PER_ROW + j;
-                        let w_4 = u32_into_limbs::<SHA256_WORD_U16S>(message_schedule[idx - 4]);
-                        let sig_0_w_3 = u32_into_limbs::<SHA256_WORD_U16S>(small_sig0(
-                            message_schedule[idx - 3],
-                        ));
+                        let w_4 = u32_into_u16s(message_schedule[idx - 4]);
+                        let sig_0_w_3 = u32_into_u16s(small_sig0(message_schedule[idx - 3]));
                         cols.schedule_helper.intermed_4[j] =
                             array::from_fn(|k| F::from_canonical_u32(w_4[k] + sig_0_w_3[k]));
                         if j < SHA256_ROUNDS_PER_ROW - 1 {
                             let w_3 = message_schedule[idx - 3];
                             cols.schedule_helper.w_3[j] =
-                                u32_into_limbs::<SHA256_WORD_U16S>(w_3).map(F::from_canonical_u32);
+                                u32_into_u16s(w_3).map(F::from_canonical_u32);
                         }
                     }
                 }
@@ -223,8 +221,7 @@ impl Sha256Air {
                     row[get_range(trace_start_col, SHA256_DIGEST_WIDTH)].borrow_mut();
                 for j in 0..SHA256_ROUNDS_PER_ROW - 1 {
                     let w_3 = message_schedule[i * SHA256_ROUNDS_PER_ROW + j - 3];
-                    cols.schedule_helper.w_3[j] =
-                        u32_into_limbs::<SHA256_WORD_U16S>(w_3).map(F::from_canonical_u32);
+                    cols.schedule_helper.w_3[j] = u32_into_u16s(w_3).map(F::from_canonical_u32);
                 }
                 cols.flags.is_round_row = F::ZERO;
                 cols.flags.is_first_4_rows = F::ZERO;
@@ -237,29 +234,27 @@ impl Sha256Air {
                 cols.flags.local_block_idx = F::from_canonical_u32(local_block_idx);
                 let final_hash: [u32; SHA256_HASH_WORDS] =
                     array::from_fn(|i| work_vars[i].wrapping_add(prev_hash[i]));
-                let final_hash_limbs: [[u32; SHA256_WORD_U8S]; SHA256_HASH_WORDS] =
-                    array::from_fn(|i| u32_into_limbs::<SHA256_WORD_U8S>(final_hash[i]));
+                let final_hash_limbs: [[u8; SHA256_WORD_U8S]; SHA256_HASH_WORDS] =
+                    array::from_fn(|i| final_hash[i].to_le_bytes());
                 // need to ensure final hash limbs are bytes, in order for
                 //   prev_hash[i] + work_vars[i] == final_hash[i]
                 // to be constrained correctly
                 for word in final_hash_limbs.iter() {
                     for chunk in word.chunks(2) {
-                        bitwise_lookup_chip.request_range(chunk[0], chunk[1]);
+                        bitwise_lookup_chip.request_range(chunk[0] as u32, chunk[1] as u32);
                     }
                 }
                 cols.final_hash = array::from_fn(|i| {
-                    array::from_fn(|j| F::from_canonical_u32(final_hash_limbs[i][j]))
+                    array::from_fn(|j| F::from_canonical_u8(final_hash_limbs[i][j]))
                 });
-                cols.prev_hash = prev_hash
-                    .map(|f| u32_into_limbs::<SHA256_WORD_U16S>(f).map(F::from_canonical_u32));
+                cols.prev_hash = prev_hash.map(|f| u32_into_u16s(f).map(F::from_canonical_u32));
                 let hash = if is_last_block {
-                    SHA256_H.map(u32_into_limbs::<SHA256_WORD_BITS>)
+                    SHA256_H.map(u32_into_bits_field::<F>)
                 } else {
                     cols.final_hash
-                        .map(|f| limbs_into_u32(f.map(|x| x.as_canonical_u32())))
-                        .map(u32_into_limbs::<SHA256_WORD_BITS>)
-                }
-                .map(|x| x.map(F::from_canonical_u32));
+                        .map(|f| u32::from_le_bytes(f.map(|x| x.as_canonical_u32() as u8)))
+                        .map(u32_into_bits_field::<F>)
+                };
 
                 for i in 0..SHA256_ROUNDS_PER_ROW {
                     cols.hash.a[i] = hash[SHA256_ROUNDS_PER_ROW - i - 1];
@@ -338,24 +333,14 @@ impl Sha256Air {
 
     /// Fills the `cols` as a padding row
     /// Note: we still need to correctly fill in the hash values, carries and intermeds
-    pub fn generate_default_row<F: PrimeField32>(self: &Sha256Air, cols: &mut Sha256RoundCols<F>) {
-        cols.flags.is_round_row = F::ZERO;
-        cols.flags.is_first_4_rows = F::ZERO;
-        cols.flags.is_digest_row = F::ZERO;
-
-        cols.flags.is_last_block = F::ZERO;
-        cols.flags.global_block_idx = F::ZERO;
+    pub fn generate_default_row<F: PrimeField32>(
+        self: &Sha256FillerHelper,
+        cols: &mut Sha256RoundCols<F>,
+    ) {
         cols.flags.row_idx =
             get_flag_pt_array(&self.row_idx_encoder, 17).map(F::from_canonical_u32);
-        cols.flags.local_block_idx = F::ZERO;
-
-        cols.message_schedule.w = [[F::ZERO; SHA256_WORD_BITS]; SHA256_ROUNDS_PER_ROW];
-        cols.message_schedule.carry_or_buffer =
-            [[F::ZERO; SHA256_WORD_U16S * 2]; SHA256_ROUNDS_PER_ROW];
 
-        let hash = SHA256_H
-            .map(u32_into_limbs::<SHA256_WORD_BITS>)
-            .map(|x| x.map(F::from_canonical_u32));
+        let hash = SHA256_H.map(u32_into_bits_field::<F>);
 
         for i in 0..SHA256_ROUNDS_PER_ROW {
             cols.work_vars.a[i] = hash[SHA256_ROUNDS_PER_ROW - i - 1];
@@ -486,15 +471,16 @@ impl Sha256Air {
     }
 }
 
+/// Generates a trace for a standalone SHA256 computation (currently only used for testing)
 /// `records` consists of pairs of `(input_block, is_last_block)`.
 pub fn generate_trace<F: PrimeField32>(
-    sub_air: &Sha256Air,
-    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
+    step: &Sha256FillerHelper,
+    bitwise_lookup_chip: &BitwiseOperationLookupChip<8>,
+    width: usize,
     records: Vec<([u8; SHA256_BLOCK_U8S], bool)>,
 ) -> RowMajorMatrix<F> {
     let non_padded_height = records.len() * SHA256_ROWS_PER_BLOCK;
     let height = next_power_of_two_or_zero(non_padded_height);
-    let width = <Sha256Air as BaseAir<F>>::width(sub_air);
     let mut values = F::zero_vec(height * width);
 
     struct BlockContext {
@@ -522,7 +508,7 @@ pub fn generate_trace<F: PrimeField32>(
             prev_hash = SHA256_H;
         } else {
             local_block_idx += 1;
-            prev_hash = Sha256Air::get_block_hash(&prev_hash, input);
+            prev_hash = Sha256FillerHelper::get_block_hash(&prev_hash, input);
         }
     }
     // first pass
@@ -542,17 +528,16 @@ pub fn generate_trace<F: PrimeField32>(
                     input[(i + 1) * SHA256_WORD_U8S - j - 1] as u32
                 }))
             });
-            sub_air.generate_block_trace(
+            step.generate_block_trace(
                 block,
                 width,
                 0,
                 &input_words,
-                bitwise_lookup_chip.clone(),
+                bitwise_lookup_chip,
                 &prev_hash,
                 is_last_block,
                 global_block_idx,
                 local_block_idx,
-                &[[F::ZERO; 16]; 4],
             );
         });
     // second pass: padding rows
@@ -560,14 +545,14 @@ pub fn generate_trace<F: PrimeField32>(
         .par_chunks_mut(width)
         .for_each(|row| {
             let cols: &mut Sha256RoundCols<F> = row.borrow_mut();
-            sub_air.generate_default_row(cols);
+            step.generate_default_row(cols);
         });
     // second pass: non-padding rows
     values[width..]
         .par_chunks_mut(width * SHA256_ROWS_PER_BLOCK)
         .take(non_padded_height / SHA256_ROWS_PER_BLOCK)
         .for_each(|chunk| {
-            sub_air.generate_missing_cells(chunk, width, 0);
+            step.generate_missing_cells(chunk, width, 0);
         });
     RowMajorMatrix::new(values, width)
 }
diff --git a/crates/circuits/sha256-air/src/utils.rs b/crates/circuits/sha256-air/src/utils.rs
index abf8b6e7f2..ba598f2604 100644
--- a/crates/circuits/sha256-air/src/utils.rs
+++ b/crates/circuits/sha256-air/src/utils.rs
@@ -6,7 +6,6 @@ use openvm_circuit_primitives::{
     utils::{not, select},
 };
 use openvm_stark_backend::{p3_air::AirBuilder, p3_field::FieldAlgebra};
-use rand::{rngs::StdRng, Rng};
 
 use super::{Sha256DigestCols, Sha256RoundCols};
 
@@ -74,10 +73,21 @@ pub const SHA256_H: [u32; 8] = [
     0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19,
 ];
 
-/// Convert a u32 into a list of limbs in little endian
-pub fn u32_into_limbs<const NUM_LIMBS: usize>(num: u32) -> [u32; NUM_LIMBS] {
-    let limb_bits = 32 / NUM_LIMBS;
-    array::from_fn(|i| (num >> (limb_bits * i)) & ((1 << limb_bits) - 1))
+/// Returns the number of blocks required to hash a message of length `len`
+pub fn get_sha256_num_blocks(len: u32) -> u32 {
+    // need to pad with one 1 bit, 64 bits for the message length and then pad until the length
+    // is divisible by [SHA256_BLOCK_BITS]
+    ((len << 3) as usize + 1 + 64).div_ceil(SHA256_BLOCK_BITS) as u32
+}
+
+/// Convert a u32 into a list of bits in little endian then convert each bit into a field element
+pub fn u32_into_bits_field<F: FieldAlgebra + Clone>(num: u32) -> [F; SHA256_WORD_BITS] {
+    array::from_fn(|i| F::from_bool((num >> i) & 1 == 1))
+}
+
+/// Convert a u32 into a an array of 2 16-bit limbs in little endian
+pub fn u32_into_u16s(num: u32) -> [u32; 2] {
+    [num & 0xffff, num >> 16]
 }
 
 /// Convert a list of limbs in little endian into a u32
@@ -227,13 +237,6 @@ pub(crate) fn small_sig1_field<F: FieldAlgebra + Clone>(
     xor(&rotr::<F>(x, 17), &rotr::<F>(x, 19), &shr::<F>(x, 10))
 }
 
-/// Generate a random message of a given length
-pub fn get_random_message(rng: &mut StdRng, len: usize) -> Vec<u8> {
-    let mut random_message: Vec<u8> = vec![0u8; len];
-    rng.fill(&mut random_message[..]);
-    random_message
-}
-
 /// Wrapper of `get_flag_pt` to get the flag pointer as an array
 pub fn get_flag_pt_array<const N: usize>(encoder: &Encoder, flag_idx: usize) -> [u32; N] {
     encoder.get_flag_pt(flag_idx).try_into().unwrap()
diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml
index f105c588a3..4983992663 100644
--- a/crates/cli/Cargo.toml
+++ b/crates/cli/Cargo.toml
@@ -19,16 +19,15 @@ vergen = { version = "8", default-features = false, features = [
 [dependencies]
 openvm-build = { workspace = true }
 openvm-transpiler = { workspace = true }
-openvm-native-recursion = { workspace = true }
 openvm-sdk = { workspace = true }
 openvm-stark-sdk.workspace = true
 openvm-stark-backend.workspace = true
 openvm-circuit = { workspace = true }
 
-aws-sdk-s3 = "1.78"
-aws-config = "1.5"
-tokio = { version = "1.43.1", features = ["rt", "rt-multi-thread", "macros"] }
-clap = { version = "4.5.9", features = ["derive", "env"] }
+aws-sdk-s3 = "1.98.0"
+aws-config = "1.8.2"
+tokio = { version = "1.46.1", features = ["rt", "rt-multi-thread", "macros"] }
+clap = { workspace = true, features = ["derive", "env"] }
 eyre.workspace = true
 tracing.workspace = true
 serde.workspace = true
@@ -42,12 +41,12 @@ toml_edit = "0.22"
 include_dir = "0.7"
 
 [features]
-default = ["parallel", "jemalloc", "evm-verify", "bench-metrics"]
+default = ["parallel", "jemalloc", "evm-verify", "metrics"]
 evm-prove = ["openvm-sdk/evm-prove"]
 evm-verify = ["evm-prove", "openvm-sdk/evm-verify"]
-bench-metrics = ["openvm-sdk/bench-metrics"]
+metrics = ["openvm-sdk/metrics"]
 # for guest profiling:
-profiling = ["openvm-sdk/profiling"]
+perf-metrics = ["openvm-sdk/perf-metrics", "metrics"]
 # performance features:
 # (rayon is always imported because of halo2, so "parallel" feature is redundant)
 parallel = ["openvm-sdk/parallel"]
@@ -55,3 +54,4 @@ mimalloc = ["openvm-sdk/mimalloc"]
 jemalloc = ["openvm-sdk/jemalloc"]
 jemalloc-prof = ["openvm-sdk/jemalloc-prof"]
 nightly-features = ["openvm-sdk/nightly-features"]
+ci = []
diff --git a/crates/cli/src/bin/cargo-openvm.rs b/crates/cli/src/bin/cargo-openvm.rs
index 36d5942bab..bd53a29539 100644
--- a/crates/cli/src/bin/cargo-openvm.rs
+++ b/crates/cli/src/bin/cargo-openvm.rs
@@ -12,10 +12,13 @@ pub enum Cargo {
 }
 
 #[derive(clap::Args)]
-#[command(author, about, long_about = None, args_conflicts_with_subcommands = true, version = OPENVM_VERSION_MESSAGE)]
+#[command(author, about, long_about = None, version = OPENVM_VERSION_MESSAGE)]
 pub struct VmCli {
     #[command(subcommand)]
     pub command: VmCliCommands,
+
+    #[arg(long)]
+    pub verbose: bool,
 }
 
 #[derive(Subcommand)]
@@ -36,7 +39,13 @@ pub enum VmCliCommands {
 async fn main() -> Result<()> {
     let Cargo::OpenVm(args) = Cargo::parse();
     let command = args.command;
-    setup_tracing_with_log_level(Level::WARN);
+    let log_level = if args.verbose {
+        Level::INFO
+    } else {
+        Level::WARN
+    };
+    setup_tracing_with_log_level(log_level);
+
     match command {
         VmCliCommands::Build(cmd) => cmd.run(),
         VmCliCommands::Commit(cmd) => cmd.run(),
diff --git a/crates/cli/src/commands/build.rs b/crates/cli/src/commands/build.rs
index 92ecc9fc0d..58a31e262b 100644
--- a/crates/cli/src/commands/build.rs
+++ b/crates/cli/src/commands/build.rs
@@ -10,9 +10,11 @@ use itertools::izip;
 use openvm_build::{
     build_generic, get_package, get_workspace_packages, get_workspace_root, GuestOptions,
 };
-use openvm_circuit::arch::{InitFileGenerator, OPENVM_DEFAULT_INIT_FILE_NAME};
-use openvm_sdk::{fs::write_exe_to_file, Sdk};
-use openvm_transpiler::{elf::Elf, openvm_platform::memory::MEM_SIZE};
+use openvm_circuit::arch::{
+    instructions::exe::VmExe, InitFileGenerator, OPENVM_DEFAULT_INIT_FILE_NAME,
+};
+use openvm_sdk::{config::TranspilerConfig, fs::write_object_to_file};
+use openvm_transpiler::{elf::Elf, openvm_platform::memory::MEM_SIZE, FromElf};
 
 use crate::util::{
     get_manifest_path_and_dir, get_target_dir, get_target_output_dir, read_config_toml_or_default,
@@ -432,17 +434,17 @@ pub fn build(build_args: &BuildArgs, cargo_args: &BuildCargoArgs) -> Result<Path
         let transpiler = app_config.app_vm_config.transpiler();
         let data = read(elf_path.clone())?;
         let elf = Elf::decode(&data, MEM_SIZE as u32)?;
-        let exe = Sdk::new().transpile(elf, transpiler)?;
+        let exe = VmExe::from_elf(elf, transpiler)?;
 
         let target_name = if target.is_example() {
-            &format!("examples/{}", target.name)
+            PathBuf::from("examples").join(&target.name)
         } else {
-            &target.name
+            PathBuf::from(&target.name)
         };
-        let file_name = format!("{}.vmexe", target_name);
+        let file_name = target_name.with_extension("vmexe");
         let file_path = target_output_dir.join(&file_name);
 
-        write_exe_to_file(exe, &file_path)?;
+        write_object_to_file(&file_path, exe)?;
         if let Some(output_dir) = &build_args.output_dir {
             create_dir_all(output_dir)?;
             copy(file_path, output_dir.join(file_name))?;
diff --git a/crates/cli/src/commands/commit.rs b/crates/cli/src/commands/commit.rs
index 4c7e307300..2a643ee2d6 100644
--- a/crates/cli/src/commands/commit.rs
+++ b/crates/cli/src/commands/commit.rs
@@ -6,12 +6,15 @@ use std::{
 use clap::Parser;
 use eyre::Result;
 use openvm_circuit::arch::OPENVM_DEFAULT_INIT_FILE_NAME;
-use openvm_sdk::{commit::AppExecutionCommit, fs::write_to_file_json, Sdk};
+use openvm_sdk::{fs::write_to_file_json, Sdk};
 
 use super::{RunArgs, RunCargoArgs};
 use crate::{
-    commands::{load_app_pk, load_or_build_and_commit_exe},
-    util::{get_manifest_path_and_dir, get_target_dir, get_target_output_dir},
+    commands::{load_app_pk, load_or_build_exe},
+    util::{
+        get_app_commit_path, get_manifest_path_and_dir, get_single_target_name, get_target_dir,
+        get_target_output_dir,
+    },
 };
 
 #[derive(Parser)]
@@ -64,7 +67,6 @@ pub struct CommitCmd {
 
 impl CommitCmd {
     pub fn run(&self) -> Result<()> {
-        let sdk = Sdk::new();
         let app_pk = load_app_pk(&self.app_pk, &self.cargo_args)?;
 
         let run_args = RunArgs {
@@ -74,28 +76,29 @@ impl CommitCmd {
             init_file_name: self.init_file_name.clone(),
             input: None,
         };
-        let (committed_exe, target_name) =
-            load_or_build_and_commit_exe(&sdk, &run_args, &self.cargo_args, &app_pk)?;
+        let (exe, target_name_stem) = load_or_build_exe(&run_args, &self.cargo_args)?;
+        let sdk = Sdk::new(app_pk.app_config())?.with_app_pk(app_pk);
 
-        let commits = AppExecutionCommit::compute(
-            &app_pk.app_vm_pk.vm_config,
-            &committed_exe,
-            &app_pk.leaf_committed_exe,
-        );
-        println!("exe commit: {:?}", commits.app_exe_commit.to_bn254());
-        println!("vm commit: {:?}", commits.app_vm_commit.to_bn254());
+        let app_commit = sdk.app_prover(exe)?.app_commit();
+        println!("exe commit: {:?}", app_commit.app_exe_commit.to_bn254());
+        println!("vm commit: {:?}", app_commit.app_vm_commit.to_bn254());
 
         let (manifest_path, _) = get_manifest_path_and_dir(&self.cargo_args.manifest_path)?;
         let target_dir = get_target_dir(&self.cargo_args.target_dir, &manifest_path);
         let target_output_dir = get_target_output_dir(&target_dir, &self.cargo_args.profile);
 
-        let commit_name = format!("{}.commit.json", &target_name);
-        let commit_path = target_output_dir.join(&commit_name);
+        // target_name_stem does not contain "examples/" prefix
+        let target_name =
+            get_single_target_name(&self.cargo_args).unwrap_or(target_name_stem.into());
+        let commit_path = get_app_commit_path(&target_output_dir, target_name);
 
-        write_to_file_json(&commit_path, commits)?;
+        println!("Writing app commit to {}", commit_path.display());
+        write_to_file_json(&commit_path, app_commit)?;
         if let Some(output_dir) = &self.output_dir {
             create_dir_all(output_dir)?;
-            copy(commit_path, output_dir.join(commit_name))?;
+            let commit_name = commit_path.file_name().unwrap();
+            let to_path = output_dir.join(commit_name);
+            copy(commit_path, to_path)?;
         }
 
         Ok(())
diff --git a/crates/cli/src/commands/keygen.rs b/crates/cli/src/commands/keygen.rs
index a021a183e1..108af5750b 100644
--- a/crates/cli/src/commands/keygen.rs
+++ b/crates/cli/src/commands/keygen.rs
@@ -5,10 +5,7 @@ use std::{
 
 use clap::Parser;
 use eyre::Result;
-use openvm_sdk::{
-    fs::{write_app_pk_to_file, write_app_vk_to_file},
-    Sdk,
-};
+use openvm_sdk::{fs::write_object_to_file, Sdk};
 
 use crate::{
     default::{DEFAULT_APP_PK_NAME, DEFAULT_APP_VK_NAME},
@@ -93,10 +90,9 @@ pub(crate) fn keygen(
     output_dir: Option<impl AsRef<Path>>,
 ) -> Result<()> {
     let app_config = read_config_toml_or_default(config)?;
-    let app_pk = Sdk::new().app_keygen(app_config)?;
-    let app_vk = app_pk.get_app_vk();
-    write_app_vk_to_file(app_vk, &app_vk_path)?;
-    write_app_pk_to_file(app_pk, &app_pk_path)?;
+    let (app_pk, app_vk) = Sdk::new(app_config)?.app_keygen();
+    write_object_to_file(&app_vk_path, app_vk)?;
+    write_object_to_file(&app_pk_path, app_pk)?;
 
     if let Some(output_dir) = output_dir {
         let output_dir = output_dir.as_ref();
diff --git a/crates/cli/src/commands/prove.rs b/crates/cli/src/commands/prove.rs
index 55c10901b9..8ae0e61a70 100644
--- a/crates/cli/src/commands/prove.rs
+++ b/crates/cli/src/commands/prove.rs
@@ -1,30 +1,25 @@
-use std::{path::PathBuf, sync::Arc};
+use std::path::PathBuf;
 
 use clap::Parser;
 use eyre::Result;
-#[cfg(feature = "evm-prove")]
-use openvm_sdk::fs::write_evm_proof_to_file;
+use openvm_circuit::arch::instructions::exe::VmExe;
 use openvm_sdk::{
-    commit::AppExecutionCommit,
     config::{AggregationTreeConfig, SdkVmConfig},
-    fs::{
-        read_agg_stark_pk_from_file, read_app_pk_from_file, read_exe_from_file,
-        write_app_proof_to_file, write_to_file_json,
-    },
+    fs::{encode_to_file, read_object_from_file, write_to_file_json},
     keygen::AppProvingKey,
-    types::VmStarkProofBytes,
-    NonRootCommittedExe, Sdk,
+    types::VersionedVmStarkProof,
+    Sdk, F,
 };
 
 use super::{RunArgs, RunCargoArgs};
+#[cfg(feature = "evm-prove")]
+use crate::util::read_default_agg_and_halo2_pk;
 use crate::{
     commands::build,
     default::default_agg_stark_pk_path,
     input::read_to_stdin,
     util::{get_app_pk_path, get_manifest_path_and_dir, get_single_target_name, get_target_dir},
 };
-#[cfg(feature = "evm-prove")]
-use crate::{default::default_params_dir, util::read_default_agg_pk};
 
 #[derive(Parser)]
 #[command(name = "prove", about = "Generate a program proof")]
@@ -122,20 +117,24 @@ impl ProveCmd {
                 run_args,
                 cargo_args,
             } => {
-                let sdk = Sdk::new();
                 let app_pk = load_app_pk(app_pk, cargo_args)?;
-                let (committed_exe, target_name) =
-                    load_or_build_and_commit_exe(&sdk, run_args, cargo_args, &app_pk)?;
+                let sdk = Sdk::new(app_pk.app_config())?.with_app_pk(app_pk);
+                let (exe, target_name) = load_or_build_exe(run_args, cargo_args)?;
 
-                let app_proof =
-                    sdk.generate_app_proof(app_pk, committed_exe, read_to_stdin(&run_args.input)?)?;
+                let app_proof = sdk
+                    .app_prover(exe)?
+                    .prove(read_to_stdin(&run_args.input)?)?;
 
                 let proof_path = if let Some(proof) = proof {
                     proof
                 } else {
-                    &PathBuf::from(format!("{}.app.proof", target_name))
+                    &PathBuf::from(target_name).with_extension("app.proof")
                 };
-                write_app_proof_to_file(app_proof, proof_path)?;
+                println!(
+                    "App proof completed! Writing App proof to {}",
+                    proof_path.display()
+                );
+                encode_to_file(proof_path, app_proof)?;
             }
             ProveSubCommand::Stark {
                 app_pk,
@@ -144,36 +143,33 @@ impl ProveCmd {
                 cargo_args,
                 agg_tree_config,
             } => {
-                let sdk = Sdk::new().with_agg_tree_config(*agg_tree_config);
                 let app_pk = load_app_pk(app_pk, cargo_args)?;
-                let (committed_exe, target_name) =
-                    load_or_build_and_commit_exe(&sdk, run_args, cargo_args, &app_pk)?;
-
-                let commits = AppExecutionCommit::compute(
-                    &app_pk.app_vm_pk.vm_config,
-                    &committed_exe,
-                    &app_pk.leaf_committed_exe,
-                );
-                println!("exe commit: {:?}", commits.app_exe_commit.to_bn254());
-                println!("vm commit: {:?}", commits.app_vm_commit.to_bn254());
+                let (exe, target_name) = load_or_build_exe(run_args, cargo_args)?;
 
-                let agg_stark_pk = read_agg_stark_pk_from_file(default_agg_stark_pk_path()).map_err(|e| {
+                let agg_pk = read_object_from_file(default_agg_stark_pk_path()).map_err(|e| {
                     eyre::eyre!("Failed to read aggregation proving key: {}\nPlease run 'cargo openvm setup' first", e)
                 })?;
-                let stark_proof = sdk.generate_e2e_stark_proof(
-                    app_pk,
-                    committed_exe,
-                    agg_stark_pk,
-                    read_to_stdin(&run_args.input)?,
-                )?;
-
-                let stark_proof_bytes = VmStarkProofBytes::new(commits, stark_proof)?;
+                let sdk = Sdk::new(app_pk.app_config())?
+                    .with_agg_tree_config(*agg_tree_config)
+                    .with_app_pk(app_pk)
+                    .with_agg_pk(agg_pk);
+                let mut prover = sdk.prover(exe)?;
+                let app_commit = prover.app_commit();
+                println!("exe commit: {:?}", app_commit.app_exe_commit.to_bn254());
+                println!("vm commit: {:?}", app_commit.app_vm_commit.to_bn254());
+
+                let stark_proof = prover.prove(read_to_stdin(&run_args.input)?)?;
+                let stark_proof_bytes = VersionedVmStarkProof::new(stark_proof)?;
 
                 let proof_path = if let Some(proof) = proof {
                     proof
                 } else {
-                    &PathBuf::from(format!("{}.stark.proof", target_name))
+                    &PathBuf::from(target_name).with_extension("stark.proof")
                 };
+                println!(
+                    "STARK proof completed! Writing STARK proof to {}",
+                    proof_path.display()
+                );
                 write_to_file_json(proof_path, stark_proof_bytes)?;
             }
             #[cfg(feature = "evm-prove")]
@@ -184,40 +180,34 @@ impl ProveCmd {
                 cargo_args,
                 agg_tree_config,
             } => {
-                use openvm_native_recursion::halo2::utils::CacheHalo2ParamsReader;
-
-                let sdk = Sdk::new().with_agg_tree_config(*agg_tree_config);
                 let app_pk = load_app_pk(app_pk, cargo_args)?;
-                let (committed_exe, target_name) =
-                    load_or_build_and_commit_exe(&sdk, run_args, cargo_args, &app_pk)?;
-
-                let commits = AppExecutionCommit::compute(
-                    &app_pk.app_vm_pk.vm_config,
-                    &committed_exe,
-                    &app_pk.leaf_committed_exe,
-                );
-                println!("exe commit: {:?}", commits.app_exe_commit.to_bn254());
-                println!("vm commit: {:?}", commits.app_vm_commit.to_bn254());
+                let (exe, target_name) = load_or_build_exe(run_args, cargo_args)?;
 
                 println!("Generating EVM proof, this may take a lot of compute and memory...");
-                let agg_pk = read_default_agg_pk().map_err(|e| {
+                let (agg_pk, halo2_pk) = read_default_agg_and_halo2_pk().map_err(|e| {
                     eyre::eyre!("Failed to read aggregation proving key: {}\nPlease run 'cargo openvm setup' first", e)
                 })?;
-                let params_reader = CacheHalo2ParamsReader::new(default_params_dir());
-                let evm_proof = sdk.generate_evm_proof(
-                    &params_reader,
-                    app_pk,
-                    committed_exe,
-                    agg_pk,
-                    read_to_stdin(&run_args.input)?,
-                )?;
+                let sdk = Sdk::new(app_pk.app_config())?
+                    .with_agg_tree_config(*agg_tree_config)
+                    .with_app_pk(app_pk)
+                    .with_agg_pk(agg_pk)
+                    .with_halo2_pk(halo2_pk);
+                let mut prover = sdk.evm_prover(exe)?;
+                let app_commit = prover.stark_prover.app_commit();
+                println!("exe commit: {:?}", app_commit.app_exe_commit.to_bn254());
+                println!("vm commit: {:?}", app_commit.app_vm_commit.to_bn254());
+                let evm_proof = prover.prove_evm(read_to_stdin(&run_args.input)?)?;
 
                 let proof_path = if let Some(proof) = proof {
                     proof
                 } else {
-                    &PathBuf::from(format!("{}.evm.proof", target_name))
+                    &PathBuf::from(target_name).with_extension("evm.proof")
                 };
-                write_evm_proof_to_file(evm_proof, proof_path)?;
+                println!(
+                    "EVM proof completed! Writing EVM proof to {}",
+                    proof_path.display()
+                );
+                write_to_file_json(proof_path, evm_proof)?;
             }
         }
         Ok(())
@@ -227,7 +217,7 @@ impl ProveCmd {
 pub(crate) fn load_app_pk(
     app_pk: &Option<PathBuf>,
     cargo_args: &RunCargoArgs,
-) -> Result<Arc<AppProvingKey<SdkVmConfig>>> {
+) -> Result<AppProvingKey<SdkVmConfig>> {
     let (manifest_path, _) = get_manifest_path_and_dir(&cargo_args.manifest_path)?;
     let target_dir = get_target_dir(&cargo_args.target_dir, &manifest_path);
 
@@ -237,16 +227,15 @@ pub(crate) fn load_app_pk(
         get_app_pk_path(&target_dir)
     };
 
-    Ok(Arc::new(read_app_pk_from_file(app_pk_path)?))
+    read_object_from_file(app_pk_path)
 }
 
-// Returns (committed_exe, target_name) where target_name has no extension
-pub(crate) fn load_or_build_and_commit_exe(
-    sdk: &Sdk,
+/// Returns `(exe, target_name.file_stem())` where target_name has no extension and only contains
+/// the file stem (in particular it does not include `examples/` if the target was an example)
+pub(crate) fn load_or_build_exe(
     run_args: &RunArgs,
     cargo_args: &RunCargoArgs,
-    app_pk: &Arc<AppProvingKey<SdkVmConfig>>,
-) -> Result<(Arc<NonRootCommittedExe>, String)> {
+) -> Result<(VmExe<F>, String)> {
     let exe_path = if let Some(exe) = &run_args.exe {
         exe
     } else {
@@ -255,13 +244,12 @@ pub(crate) fn load_or_build_and_commit_exe(
         let build_args = run_args.clone().into();
         let cargo_args = cargo_args.clone().into();
         let output_dir = build(&build_args, &cargo_args)?;
-        &output_dir.join(format!("{}.vmexe", target_name))
+        &output_dir.join(target_name.with_extension("vmexe"))
     };
 
-    let app_exe = read_exe_from_file(exe_path)?;
-    let committed_exe = sdk.commit_app_exe(app_pk.app_fri_params(), app_exe)?;
+    let app_exe = read_object_from_file(exe_path)?;
     Ok((
-        committed_exe,
+        app_exe,
         exe_path.file_stem().unwrap().to_string_lossy().into_owned(),
     ))
 }
diff --git a/crates/cli/src/commands/run.rs b/crates/cli/src/commands/run.rs
index 133e7eebf5..bdd106fa0c 100644
--- a/crates/cli/src/commands/run.rs
+++ b/crates/cli/src/commands/run.rs
@@ -2,8 +2,8 @@ use std::path::PathBuf;
 
 use clap::Parser;
 use eyre::Result;
-use openvm_circuit::arch::OPENVM_DEFAULT_INIT_FILE_NAME;
-use openvm_sdk::{fs::read_exe_from_file, Sdk};
+use openvm_circuit::arch::{instructions::exe::VmExe, OPENVM_DEFAULT_INIT_FILE_NAME};
+use openvm_sdk::{fs::read_object_from_file, Sdk, F};
 
 use super::{build, BuildArgs, BuildCargoArgs};
 use crate::{
@@ -237,7 +237,7 @@ impl RunCmd {
             let build_args = self.run_args.clone().into();
             let cargo_args = self.cargo_args.clone().into();
             let output_dir = build(&build_args, &cargo_args)?;
-            &output_dir.join(format!("{}.vmexe", target_name))
+            &output_dir.join(target_name.with_extension("vmexe"))
         };
 
         let (_, manifest_dir) = get_manifest_path_and_dir(&self.cargo_args.manifest_path)?;
@@ -247,14 +247,10 @@ impl RunCmd {
                 .to_owned()
                 .unwrap_or_else(|| manifest_dir.join("openvm.toml")),
         )?;
-        let exe = read_exe_from_file(exe_path)?;
+        let exe: VmExe<F> = read_object_from_file(exe_path)?;
 
-        let sdk = Sdk::new();
-        let output = sdk.execute(
-            exe,
-            app_config.app_vm_config,
-            read_to_stdin(&self.run_args.input)?,
-        )?;
+        let sdk = Sdk::new(app_config)?;
+        let output = sdk.execute(exe, read_to_stdin(&self.run_args.input)?)?;
         println!("Execution output: {:?}", output);
         Ok(())
     }
diff --git a/crates/cli/src/commands/setup.rs b/crates/cli/src/commands/setup.rs
index 770be24d4c..34c5fd13e6 100644
--- a/crates/cli/src/commands/setup.rs
+++ b/crates/cli/src/commands/setup.rs
@@ -7,23 +7,21 @@ use aws_config::{defaults, BehaviorVersion, Region};
 use aws_sdk_s3::Client;
 use clap::Parser;
 use eyre::{eyre, Result};
-use openvm_native_recursion::halo2::utils::CacheHalo2ParamsReader;
 use openvm_sdk::{
-    config::{AggConfig, AggStarkConfig},
     fs::{
-        write_agg_halo2_pk_to_file, write_agg_stark_pk_to_file, write_evm_halo2_verifier_to_folder,
+        read_object_from_file, write_evm_halo2_verifier_to_folder, write_object_to_file,
         EVM_HALO2_VERIFIER_BASE_NAME, EVM_HALO2_VERIFIER_INTERFACE_NAME,
         EVM_HALO2_VERIFIER_PARENT_NAME,
     },
-    DefaultStaticVerifierPvHandler, Sdk,
+    Sdk,
 };
 
 use crate::{
     default::{
-        default_agg_halo2_pk_path, default_agg_stark_pk_path, default_asm_path,
-        default_evm_halo2_verifier_path, default_params_dir,
+        default_agg_halo2_pk_path, default_agg_stark_pk_path, default_agg_stark_vk_path,
+        default_asm_path, default_evm_halo2_verifier_path, default_params_dir,
     },
-    util::read_default_agg_pk,
+    util::read_default_agg_and_halo2_pk,
 };
 
 #[derive(Parser)]
@@ -49,7 +47,7 @@ pub struct SetupCmd {
 impl SetupCmd {
     pub async fn run(&self) -> Result<()> {
         let default_agg_stark_pk_path = default_agg_stark_pk_path();
-        let default_params_dir = default_params_dir();
+        let default_agg_stark_vk_path = default_agg_stark_vk_path();
         let default_evm_halo2_verifier_path = default_evm_halo2_verifier_path();
         let default_asm_path = default_asm_path();
         if !self.evm {
@@ -57,17 +55,25 @@ impl SetupCmd {
                 println!("Aggregation stark proving key already exists");
                 return Ok(());
             }
-            let agg_stark_config = AggStarkConfig::default();
-            let sdk = Sdk::new();
-            let agg_stark_pk = sdk.agg_stark_keygen(agg_stark_config)?;
-
-            println!("Writing stark proving key to file...");
-            write_agg_stark_pk_to_file(&agg_stark_pk, default_agg_stark_pk_path)?;
+            // agg keygen does not depend on the app config
+            let sdk = Sdk::standard();
+            let (agg_pk, agg_vk) = sdk.agg_keygen()?;
+
+            println!(
+                "Writing STARK aggregation proving key to {}",
+                &default_agg_stark_pk_path
+            );
+            write_object_to_file(default_agg_stark_pk_path, agg_pk)?;
+            println!(
+                "Writing STARK aggregation verifying key to {}",
+                &default_agg_stark_vk_path
+            );
+            write_object_to_file(default_agg_stark_vk_path, agg_vk)?;
 
             println!("Generating root verifier ASM...");
-            let root_verifier_asm = sdk.generate_root_verifier_asm(&agg_stark_pk);
+            let root_verifier_asm = sdk.generate_root_verifier_asm();
 
-            println!("Writing root verifier ASM to file...");
+            println!("Writing root verifier ASM to {}", &default_asm_path);
             write(&default_asm_path, root_verifier_asm)?;
         } else {
             let default_agg_halo2_pk_path = default_agg_halo2_pk_path();
@@ -93,31 +99,41 @@ impl SetupCmd {
             }
 
             Self::download_params(10, 24).await?;
-            let params_reader = CacheHalo2ParamsReader::new(&default_params_dir);
-            let agg_config = AggConfig::default();
-            let sdk = Sdk::new();
+            // halo2 keygen does not depend on the app config
+            let sdk = Sdk::standard();
 
-            let agg_pk = if !self.force_agg_keygen
+            let agg_vk = if !self.force_agg_keygen
                 && PathBuf::from(&default_agg_stark_pk_path).exists()
+                && PathBuf::from(&default_agg_stark_vk_path).exists()
                 && PathBuf::from(&default_agg_halo2_pk_path).exists()
             {
-                read_default_agg_pk()?
+                let (agg_pk, halo2_pk) = read_default_agg_and_halo2_pk()?;
+                sdk.set_agg_pk(agg_pk)
+                    .map_err(|_| eyre!("agg_pk already existed"))?;
+                sdk.set_halo2_pk(halo2_pk)
+                    .map_err(|_| eyre!("halo2_pk already existed"))?;
+                read_object_from_file(&default_agg_stark_vk_path)?
             } else {
                 println!("Generating proving key...");
-                sdk.agg_keygen(agg_config, &params_reader, &DefaultStaticVerifierPvHandler)?
+                let (_agg_pk, agg_vk) = sdk.agg_keygen()?;
+                let _halo2_pk = sdk.halo2_pk();
+                agg_vk
             };
 
             println!("Generating root verifier ASM...");
-            let root_verifier_asm = sdk.generate_root_verifier_asm(&agg_pk.agg_stark_pk);
+            let root_verifier_asm = sdk.generate_root_verifier_asm();
 
             println!("Generating verifier contract...");
-            let verifier = sdk.generate_halo2_verifier_solidity(&params_reader, &agg_pk)?;
+            let verifier = sdk.generate_halo2_verifier_solidity()?;
 
             println!("Writing stark proving key to file...");
-            write_agg_stark_pk_to_file(&agg_pk.agg_stark_pk, &default_agg_stark_pk_path)?;
+            write_object_to_file(&default_agg_stark_pk_path, sdk.agg_pk())?;
+
+            println!("Writing stark verifying key to file...");
+            write_object_to_file(&default_agg_stark_vk_path, agg_vk)?;
 
             println!("Writing halo2 proving key to file...");
-            write_agg_halo2_pk_to_file(&agg_pk.halo2_pk, &default_agg_halo2_pk_path)?;
+            write_object_to_file(&default_agg_halo2_pk_path, sdk.halo2_pk())?;
 
             println!("Writing root verifier ASM to file...");
             write(&default_asm_path, root_verifier_asm)?;
diff --git a/crates/cli/src/commands/verify.rs b/crates/cli/src/commands/verify.rs
index a4fb6f7fa1..fc4c311ea0 100644
--- a/crates/cli/src/commands/verify.rs
+++ b/crates/cli/src/commands/verify.rs
@@ -1,22 +1,24 @@
 use std::path::{Path, PathBuf};
 
 use clap::Parser;
-use eyre::Result;
+use eyre::{Context, Result};
 use openvm_sdk::{
-    fs::{
-        read_agg_stark_pk_from_file, read_app_proof_from_file, read_app_vk_from_file,
-        read_from_file_json,
-    },
-    types::VmStarkProofBytes,
-    Sdk,
+    fs::{decode_from_file, read_from_file_json, read_object_from_file},
+    prover::verify_app_proof,
+    types::VersionedVmStarkProof,
+    Sdk, OPENVM_VERSION,
 };
 
 use super::KeygenCargoArgs;
 #[cfg(feature = "evm-verify")]
 use crate::default::default_evm_halo2_verifier_path;
 use crate::{
-    default::default_agg_stark_pk_path,
-    util::{get_app_vk_path, get_files_with_ext, get_manifest_path_and_dir, get_target_dir},
+    commands::RunCargoArgs,
+    default::default_agg_stark_vk_path,
+    util::{
+        get_app_commit_path, get_app_vk_path, get_files_with_ext, get_manifest_path_and_dir,
+        get_single_target_name, get_target_dir, get_target_output_dir,
+    },
 };
 
 #[derive(Parser)]
@@ -49,6 +51,16 @@ enum VerifySubCommand {
         cargo_args: KeygenCargoArgs,
     },
     Stark {
+        /// NOTE: if `openvm commit` was called with the `--exe` option, then `--app-commit` must
+        /// be specified so the command knows where to find the app commit.
+        #[arg(
+            long,
+            action,
+            help = "Path to app commit, by default will search for it using the binary target name",
+            help_heading = "OpenVM Options"
+        )]
+        app_commit: Option<PathBuf>,
+
         #[arg(
             long,
             action,
@@ -56,6 +68,9 @@ enum VerifySubCommand {
             help_heading = "OpenVM Options"
         )]
         proof: Option<PathBuf>,
+
+        #[command(flatten)]
+        cargo_args: RunCargoArgs,
     },
     #[cfg(feature = "evm-verify")]
     Evm {
@@ -71,7 +86,6 @@ enum VerifySubCommand {
 
 impl VerifyCmd {
     pub fn run(&self) -> Result<()> {
-        let sdk = Sdk::new();
         match &self.command {
             VerifySubCommand::App {
                 app_vk,
@@ -85,7 +99,7 @@ impl VerifyCmd {
                     let target_dir = get_target_dir(&cargo_args.target_dir, &manifest_path);
                     get_app_vk_path(&target_dir)
                 };
-                let app_vk = read_app_vk_from_file(app_vk_path)?;
+                let app_vk = read_object_from_file(app_vk_path)?;
 
                 let proof_path = if let Some(proof) = proof {
                     proof.clone()
@@ -99,17 +113,31 @@ impl VerifyCmd {
                     files[0].clone()
                 };
                 println!("Verifying application proof at {}", proof_path.display());
-                let app_proof = read_app_proof_from_file(proof_path)?;
-                sdk.verify_app_proof(&app_vk, &app_proof)?;
+                let app_proof = decode_from_file(proof_path)?;
+                verify_app_proof(&app_vk, &app_proof)?;
             }
-            VerifySubCommand::Stark { proof } => {
-                let agg_stark_pk = read_agg_stark_pk_from_file(default_agg_stark_pk_path())
+            VerifySubCommand::Stark {
+                app_commit,
+                proof,
+                cargo_args,
+            } => {
+                let agg_vk = read_object_from_file(default_agg_stark_vk_path())
                     .map_err(|e| {
                         eyre::eyre!(
-                        "Failed to read aggregation STARK proving key: {}\nPlease run 'cargo openvm setup' first",
-                        e
+                        "Failed to read aggregation STARK verifying key: {e}\nPlease run 'cargo openvm setup' first",
                     )
                     })?;
+                let app_commit_path = if let Some(app_commit) = app_commit {
+                    app_commit.to_path_buf()
+                } else {
+                    let (manifest_path, _) = get_manifest_path_and_dir(&cargo_args.manifest_path)?;
+                    let target_dir = get_target_dir(&cargo_args.target_dir, &manifest_path);
+                    let target_output_dir = get_target_output_dir(&target_dir, &cargo_args.profile);
+                    let target_name = get_single_target_name(cargo_args)?;
+                    get_app_commit_path(&target_output_dir, target_name)
+                };
+                let expected_app_commit = read_from_file_json(app_commit_path)?;
+
                 let proof_path = if let Some(proof) = proof {
                     proof.clone()
                 } else {
@@ -122,28 +150,24 @@ impl VerifyCmd {
                     files[0].clone()
                 };
                 println!("Verifying STARK proof at {}", proof_path.display());
-                let stark_proof_bytes: VmStarkProofBytes = read_from_file_json(proof_path)?;
-                let expected_exe_commit = stark_proof_bytes.app_commit.app_exe_commit.to_bn254();
-                let expected_vm_commit = stark_proof_bytes.app_commit.app_vm_commit.to_bn254();
-                sdk.verify_e2e_stark_proof(
-                    &agg_stark_pk,
-                    &stark_proof_bytes.try_into()?,
-                    &expected_exe_commit,
-                    &expected_vm_commit,
-                )?;
+                let stark_proof: VersionedVmStarkProof = read_from_file_json(proof_path)
+                    .with_context(|| {
+                        format!("Proof needs to be compatible with openvm v{OPENVM_VERSION}",)
+                    })?;
+                if stark_proof.version != format!("v{OPENVM_VERSION}") {
+                    eprintln!("Attempting to verify proof generated with openvm {}, but the verifier is on openvm v{OPENVM_VERSION}", stark_proof.version);
+                }
+                Sdk::verify_proof(&agg_vk, expected_app_commit, &stark_proof.try_into()?)?;
             }
             #[cfg(feature = "evm-verify")]
             VerifySubCommand::Evm { proof } => {
-                use openvm_sdk::fs::{
-                    read_evm_halo2_verifier_from_folder, read_evm_proof_from_file,
-                };
+                use openvm_sdk::{fs::read_evm_halo2_verifier_from_folder, types::EvmProof};
 
                 let evm_verifier =
                     read_evm_halo2_verifier_from_folder(default_evm_halo2_verifier_path())
                         .map_err(|e| {
                             eyre::eyre!(
-                        "Failed to read EVM verifier: {}\nPlease run 'cargo openvm setup' first",
-                        e
+                        "Failed to read EVM verifier: {e}\nPlease run 'cargo openvm setup' first"
                     )
                         })?;
 
@@ -158,9 +182,15 @@ impl VerifyCmd {
                     }
                     files[0].clone()
                 };
+                // The app config used here doesn't matter, it is ignored in verification
                 println!("Verifying EVM proof at {}", proof_path.display());
-                let evm_proof = read_evm_proof_from_file(proof_path)?;
-                sdk.verify_evm_halo2_proof(&evm_verifier, evm_proof)?;
+                let evm_proof: EvmProof = read_from_file_json(proof_path).with_context(|| {
+                    format!("Proof needs to be compatible with openvm v{OPENVM_VERSION}",)
+                })?;
+                if evm_proof.version != format!("v{OPENVM_VERSION}") {
+                    eprintln!("Attempting to verify proof generated with openvm {}, but the verifier is on openvm v{OPENVM_VERSION}", evm_proof.version);
+                }
+                Sdk::verify_evm_halo2_proof(&evm_verifier, evm_proof)?;
             }
         }
         Ok(())
diff --git a/crates/cli/src/default.rs b/crates/cli/src/default.rs
index d1972551b0..390dfb9e82 100644
--- a/crates/cli/src/default.rs
+++ b/crates/cli/src/default.rs
@@ -12,6 +12,10 @@ pub fn default_agg_stark_pk_path() -> String {
     env::var("HOME").unwrap() + "/.openvm/agg_stark.pk"
 }
 
+pub fn default_agg_stark_vk_path() -> String {
+    env::var("HOME").unwrap() + "/.openvm/agg_stark.vk"
+}
+
 pub fn default_agg_halo2_pk_path() -> String {
     env::var("HOME").unwrap() + "/.openvm/agg_halo2.pk"
 }
diff --git a/crates/cli/src/util.rs b/crates/cli/src/util.rs
index d3006b0836..11a67da3ce 100644
--- a/crates/cli/src/util.rs
+++ b/crates/cli/src/util.rs
@@ -7,7 +7,7 @@ use eyre::Result;
 use openvm_build::{get_in_scope_packages, get_workspace_packages};
 use openvm_sdk::config::{AppConfig, SdkVmConfig};
 #[cfg(feature = "evm-prove")]
-use openvm_sdk::{fs::read_agg_stark_pk_from_file, keygen::AggProvingKey};
+use openvm_sdk::keygen::{AggProvingKey, Halo2ProvingKey};
 use serde::de::DeserializeOwned;
 
 use crate::{
@@ -34,14 +34,12 @@ pub fn read_config_toml_or_default(config: impl AsRef<Path>) -> Result<AppConfig
 }
 
 #[cfg(feature = "evm-prove")]
-pub fn read_default_agg_pk() -> Result<AggProvingKey> {
-    let agg_stark_pk = read_agg_stark_pk_from_file(crate::default::default_agg_stark_pk_path())?;
-    let halo2_pk =
-        openvm_sdk::fs::read_agg_halo2_pk_from_file(crate::default::default_agg_halo2_pk_path())?;
-    Ok(AggProvingKey {
-        agg_stark_pk,
-        halo2_pk,
-    })
+pub fn read_default_agg_and_halo2_pk() -> Result<(AggProvingKey, Halo2ProvingKey)> {
+    use openvm_sdk::fs::read_object_from_file;
+
+    let agg_pk = read_object_from_file(crate::default::default_agg_stark_pk_path())?;
+    let halo2_pk = read_object_from_file(crate::default::default_agg_halo2_pk_path())?;
+    Ok((agg_pk, halo2_pk))
 }
 
 pub fn find_manifest_dir(mut current_dir: PathBuf) -> Result<PathBuf> {
@@ -88,11 +86,16 @@ pub fn get_app_vk_path(target_dir: &Path) -> PathBuf {
     target_dir.join("openvm").join(DEFAULT_APP_VK_NAME)
 }
 
+pub fn get_app_commit_path(target_output_dir: &Path, target_name: PathBuf) -> PathBuf {
+    let commit_name = target_name.with_extension("commit.json");
+    target_output_dir.join(commit_name)
+}
+
 // Given the arguments to a run command, this function isolates the executable to
 // run. If a specific binary or example is specified it will return that, else it
 // will search the workspace/package for binary targets. If there is a single
 // binary that will be returned, else an error will be raised.
-pub fn get_single_target_name(cargo_args: &RunCargoArgs) -> Result<String> {
+pub fn get_single_target_name(cargo_args: &RunCargoArgs) -> Result<PathBuf> {
     let num_targets = cargo_args.bin.len() + cargo_args.example.len();
     let single_target_name = if num_targets > 1 {
         return Err(eyre::eyre!(
@@ -133,12 +136,12 @@ pub fn get_single_target_name(cargo_args: &RunCargoArgs) -> Result<String> {
                 "No binaries found. If you would like to run an example, use the --example flag.",
             ));
         } else {
-            binaries[0].name.clone()
+            PathBuf::from(binaries[0].name.clone())
         }
     } else if cargo_args.bin.is_empty() {
-        format!("examples/{}", cargo_args.example[0])
+        PathBuf::from("examples").join(&cargo_args.example[0])
     } else {
-        cargo_args.bin[0].clone()
+        PathBuf::from(cargo_args.bin[0].clone())
     };
     Ok(single_target_name)
 }
diff --git a/crates/cli/tests/app_e2e.rs b/crates/cli/tests/integration.rs
similarity index 77%
rename from crates/cli/tests/app_e2e.rs
rename to crates/cli/tests/integration.rs
index 482b583ef1..b8045b6bca 100644
--- a/crates/cli/tests/app_e2e.rs
+++ b/crates/cli/tests/integration.rs
@@ -3,16 +3,27 @@ use std::{
     fs::{self, read_to_string},
     path::Path,
     process::Command,
+    sync::OnceLock,
 };
 
 use eyre::Result;
 use itertools::Itertools;
 use tempfile::tempdir;
 
+fn install_cli() {
+    static FORCE_INSTALL: OnceLock<bool> = OnceLock::new();
+    FORCE_INSTALL.get_or_init(|| {
+        if !matches!(env::var("SKIP_INSTALL"), Ok(x) if !x.is_empty()) {
+            run_cmd("cargo", &["install", "--path", ".", "--force", "--locked"]).unwrap();
+        }
+        true
+    });
+}
+
 #[test]
 fn test_cli_app_e2e() -> Result<()> {
     let temp_dir = tempdir()?;
-    run_cmd("cargo", &["install", "--path", ".", "--force", "--locked"])?;
+    install_cli();
     let exe_path = "tests/programs/fibonacci/target/openvm/release/openvm-cli-example-test.vmexe";
     let temp_pk = temp_dir.path().join("app.pk");
     let temp_vk = temp_dir.path().join("app.vk");
@@ -87,7 +98,7 @@ fn test_cli_app_e2e() -> Result<()> {
 
 #[test]
 fn test_cli_app_e2e_simplified() -> Result<()> {
-    run_cmd("cargo", &["install", "--path", ".", "--force", "--locked"])?;
+    install_cli();
     run_cmd(
         "cargo",
         &[
@@ -122,13 +133,64 @@ fn test_cli_app_e2e_simplified() -> Result<()> {
     Ok(())
 }
 
+#[test]
+fn test_cli_stark_e2e_simplified() -> Result<()> {
+    install_cli();
+    run_cmd("cargo", &["openvm", "setup"])?;
+    run_cmd(
+        "cargo",
+        &[
+            "openvm",
+            "keygen",
+            "--manifest-path",
+            "tests/programs/multi/Cargo.toml",
+        ],
+    )?;
+    run_cmd(
+        "cargo",
+        &[
+            "openvm",
+            "commit",
+            "--manifest-path",
+            "tests/programs/multi/Cargo.toml",
+            "--example",
+            "fibonacci",
+        ],
+    )?;
+    run_cmd(
+        "cargo",
+        &[
+            "openvm",
+            "prove",
+            "stark",
+            "--manifest-path",
+            "tests/programs/multi/Cargo.toml",
+            "--example",
+            "fibonacci",
+        ],
+    )?;
+    run_cmd(
+        "cargo",
+        &[
+            "openvm",
+            "verify",
+            "stark",
+            "--manifest-path",
+            "tests/programs/multi/Cargo.toml",
+            "--example",
+            "fibonacci",
+        ],
+    )?;
+    Ok(())
+}
+
 #[test]
 fn test_cli_init_build() -> Result<()> {
     let temp_dir = tempdir()?;
     let temp_path = temp_dir.path();
     let config_path = temp_path.join("openvm.toml");
     let manifest_path = temp_path.join("Cargo.toml");
-    run_cmd("cargo", &["install", "--path", ".", "--force", "--locked"])?;
+    install_cli();
 
     // Cargo will not respect patches if run within a workspace
     run_cmd(
diff --git a/crates/continuations/src/verifier/internal/types.rs b/crates/continuations/src/verifier/internal/types.rs
index 14560053c0..697477a09c 100644
--- a/crates/continuations/src/verifier/internal/types.rs
+++ b/crates/continuations/src/verifier/internal/types.rs
@@ -31,14 +31,20 @@ pub struct InternalVmVerifierInput<SC: StarkGenericConfig> {
 assert_impl_all!(InternalVmVerifierInput<BabyBearPoseidon2Config>: Serialize, DeserializeOwned);
 
 /// A proof which can prove OpenVM program execution.
-#[derive(Deserialize, Serialize, Derivative)]
-#[serde(bound = "")]
+///
+/// The `inner` field contains the raw STARK proof, including the public values of each AIR. The
+/// `user_public_values` are special user-defined values that are only committed to in the `inner`
+/// public values: one can verify using a Merkle proof that the former are committed to in the
+/// latter.
+///
+/// This struct may be serialized using the `Encode` trait in the `openvm_sdk` crate.
+#[derive(Derivative)]
 #[derivative(Clone(bound = "Com<SC>: Clone"))]
 pub struct VmStarkProof<SC: StarkGenericConfig> {
-    pub proof: Proof<SC>,
+    /// STARK backend proof
+    pub inner: Proof<SC>,
     pub user_public_values: Vec<Val<SC>>,
 }
-assert_impl_all!(VmStarkProof<BabyBearPoseidon2Config>: Serialize, DeserializeOwned);
 
 /// Aggregated state of all segments
 #[derive(Debug, Clone, Copy, AlignedBorrow)]
diff --git a/crates/continuations/src/verifier/internal/vars.rs b/crates/continuations/src/verifier/internal/vars.rs
index 4fa00c004b..0dddd0b24a 100644
--- a/crates/continuations/src/verifier/internal/vars.rs
+++ b/crates/continuations/src/verifier/internal/vars.rs
@@ -57,7 +57,7 @@ impl Hintable<C> for VmStarkProof<SC> {
     }
 
     fn write(&self) -> Vec<Vec<<C as Config>::N>> {
-        let mut stream = self.proof.write();
+        let mut stream = self.inner.write();
         stream.extend(self.user_public_values.write());
         stream
     }
diff --git a/crates/continuations/src/verifier/leaf/mod.rs b/crates/continuations/src/verifier/leaf/mod.rs
index 969733ba41..7ab08cdb0b 100644
--- a/crates/continuations/src/verifier/leaf/mod.rs
+++ b/crates/continuations/src/verifier/leaf/mod.rs
@@ -1,6 +1,6 @@
 use openvm_circuit::{
-    arch::{instructions::program::Program, SystemConfig},
-    system::memory::tree::public_values::PUBLIC_VALUES_ADDRESS_SPACE_OFFSET,
+    arch::{instructions::program::Program, SystemConfig, ADDR_SPACE_OFFSET},
+    system::memory::merkle::public_values::PUBLIC_VALUES_ADDRESS_SPACE_OFFSET,
 };
 use openvm_native_compiler::{conversion::CompilerOptions, prelude::*};
 use openvm_native_recursion::{
@@ -113,7 +113,7 @@ impl LeafVmVerifierConfig {
         builder: &mut Builder<C>,
     ) -> ([Felt<F>; DIGEST_SIZE], [Felt<F>; DIGEST_SIZE]) {
         let memory_dimensions = self.app_system_config.memory_config.memory_dimensions();
-        let pv_as = PUBLIC_VALUES_ADDRESS_SPACE_OFFSET + memory_dimensions.as_offset;
+        let pv_as = PUBLIC_VALUES_ADDRESS_SPACE_OFFSET + ADDR_SPACE_OFFSET;
         let pv_start_idx = memory_dimensions.label_to_index((pv_as, 0));
         let pv_height = log2_strict_usize(self.app_system_config.num_public_values / DIGEST_SIZE);
         let proof_len = memory_dimensions.overall_height() - pv_height;
diff --git a/crates/continuations/src/verifier/leaf/types.rs b/crates/continuations/src/verifier/leaf/types.rs
index 16aca7a169..d47b36f248 100644
--- a/crates/continuations/src/verifier/leaf/types.rs
+++ b/crates/continuations/src/verifier/leaf/types.rs
@@ -1,6 +1,6 @@
 use derivative::Derivative;
 use openvm_circuit::{
-    arch::ContinuationVmProof, system::memory::tree::public_values::UserPublicValuesProof,
+    arch::ContinuationVmProof, system::memory::merkle::public_values::UserPublicValuesProof,
 };
 use openvm_native_compiler::ir::DIGEST_SIZE;
 use openvm_stark_sdk::{
diff --git a/crates/prof/src/aggregate.rs b/crates/prof/src/aggregate.rs
index 047d16b30a..999c62bd90 100644
--- a/crates/prof/src/aggregate.rs
+++ b/crates/prof/src/aggregate.rs
@@ -117,6 +117,23 @@ impl GroupedMetrics {
         })
     }
 
+    /// Validates that E1, metered, and preflight instruction counts all match each other
+    fn validate_instruction_counts(group_summaries: &HashMap<MetricName, Stats>) {
+        let e1_insns = group_summaries.get(EXECUTE_E1_INSNS_LABEL);
+        let metered_insns = group_summaries.get(EXECUTE_METERED_INSNS_LABEL);
+        let preflight_insns = group_summaries.get(EXECUTE_PREFLIGHT_INSNS_LABEL);
+
+        if let (Some(e1_insns), Some(preflight_insns)) = (e1_insns, preflight_insns) {
+            assert_eq!(e1_insns.sum.val as u64, preflight_insns.sum.val as u64);
+        }
+        if let (Some(e1_insns), Some(metered_insns)) = (e1_insns, metered_insns) {
+            assert_eq!(e1_insns.sum.val as u64, metered_insns.sum.val as u64);
+        }
+        if let (Some(metered_insns), Some(preflight_insns)) = (metered_insns, preflight_insns) {
+            assert_eq!(metered_insns.sum.val as u64, preflight_insns.sum.val as u64);
+        }
+    }
+
     pub fn aggregate(&self) -> AggregateMetrics {
         let by_group: HashMap<String, _> = self
             .by_group
@@ -133,6 +150,11 @@ impl GroupedMetrics {
                         (metric_name.clone(), summary)
                     })
                     .collect();
+
+                if !group_name.contains("keygen") {
+                    Self::validate_instruction_counts(&group_summaries);
+                }
+
                 (group_name.clone(), group_summaries)
             })
             .collect();
@@ -165,11 +187,14 @@ impl AggregateMetrics {
         let mut total_par_proof_time = MdTableCell::new(0.0, Some(0.0));
         for (group_name, metrics) in &self.by_group {
             let stats = metrics.get(PROOF_TIME_LABEL);
-            let execute_stats = metrics.get(EXECUTE_TIME_LABEL);
+            let execute_metered_stats = metrics.get(EXECUTE_METERED_TIME_LABEL);
+            let execute_e1_stats = metrics.get(EXECUTE_E1_TIME_LABEL);
             if stats.is_none() {
                 continue;
             }
-            let stats = stats.unwrap();
+            let stats = stats.unwrap_or_else(|| {
+                panic!("Missing proof time statistics for group '{}'", group_name)
+            });
             let mut sum = stats.sum;
             let mut max = stats.max;
             // convert ms to s
@@ -184,26 +209,61 @@ impl AggregateMetrics {
             if !group_name.contains("keygen") {
                 // Proving time in keygen group is dummy and not part of total.
                 total_proof_time.val += sum.val;
-                *total_proof_time.diff.as_mut().unwrap() += sum.diff.unwrap_or(0.0);
+                *total_proof_time
+                    .diff
+                    .as_mut()
+                    .expect("total_proof_time.diff should be initialized") +=
+                    sum.diff.unwrap_or(0.0);
                 total_par_proof_time.val += max.val;
-                *total_par_proof_time.diff.as_mut().unwrap() += max.diff.unwrap_or(0.0);
+                *total_par_proof_time
+                    .diff
+                    .as_mut()
+                    .expect("total_par_proof_time.diff should be initialized") +=
+                    max.diff.unwrap_or(0.0);
 
-                // Account for the fact that execution is serial
-                // Add total execution time for the app proofs, and subtract the max segment
-                // execution time
+                // Account for the serial execute_metered and execute_e1 for app outside of segments
                 if group_name != "leaf"
                     && group_name != "root"
                     && group_name != "halo2_outer"
                     && group_name != "halo2_wrapper"
                     && !group_name.starts_with("internal")
                 {
-                    let execute_stats = execute_stats.unwrap();
-                    total_par_proof_time.val +=
-                        (execute_stats.sum.val - execute_stats.max.val) / 1000.0;
-                    *total_par_proof_time.diff.as_mut().unwrap() +=
-                        (execute_stats.sum.diff.unwrap_or(0.0)
-                            - execute_stats.max.diff.unwrap_or(0.0))
-                            / 1000.0;
+                    if let Some(execute_metered_stats) = execute_metered_stats {
+                        // For metered metrics without segment labels, we just use the value
+                        // directly Count is 1, so avg = sum = max = min =
+                        // value
+                        total_proof_time.val += execute_metered_stats.avg.val / 1000.0;
+                        total_par_proof_time.val += execute_metered_stats.avg.val / 1000.0;
+                        if let Some(diff) = execute_metered_stats.avg.diff {
+                            *total_proof_time
+                                .diff
+                                .as_mut()
+                                .expect("total_proof_time.diff should be initialized") +=
+                                diff / 1000.0;
+                            *total_par_proof_time
+                                .diff
+                                .as_mut()
+                                .expect("total_par_proof_time.diff should be initialized") +=
+                                diff / 1000.0;
+                        }
+                    }
+
+                    if let Some(execute_e1_stats) = execute_e1_stats {
+                        total_proof_time.val += execute_e1_stats.avg.val / 1000.0;
+                        total_par_proof_time.val += execute_e1_stats.avg.val / 1000.0;
+                        if let Some(diff) = execute_e1_stats.avg.diff {
+                            *total_proof_time
+                                .diff
+                                .as_mut()
+                                .expect("total_proof_time.diff should be initialized") +=
+                                diff / 1000.0;
+                            *total_par_proof_time
+                                .diff
+                                .as_mut()
+                                .expect("total_par_proof_time.diff should be initialized") +=
+                                diff / 1000.0;
+                        }
+                    }
                 }
             }
         }
@@ -239,7 +299,13 @@ impl AggregateMetrics {
             .into_iter()
             .map(|group_name| {
                 let key = group_name.clone();
-                let value = self.by_group.get(group_name).unwrap().clone();
+                let value = self
+                    .by_group
+                    .get(group_name)
+                    .unwrap_or_else(|| {
+                        panic!("Group '{}' should exist in by_group map", group_name)
+                    })
+                    .clone();
                 (key, value)
             })
             .collect()
@@ -252,6 +318,7 @@ impl AggregateMetrics {
             .map(|(group_name, metrics)| {
                 let metrics = metrics
                     .iter()
+                    .filter(|(_, stats)| stats.avg.val.is_finite() && stats.sum.val.is_finite())
                     .flat_map(|(metric_name, stats)| {
                         [
                             (format!("{metric_name}::sum"), stats.sum.into()),
@@ -295,11 +362,37 @@ impl AggregateMetrics {
             for metric_name in names {
                 let summary = summaries.get(metric_name);
                 if let Some(summary) = summary {
-                    writeln!(
-                        writer,
-                        "| `{:<20}` | {:<10} | {:<10} | {:<10} | {:<10} |",
-                        metric_name, summary.avg, summary.sum, summary.max, summary.min,
-                    )?;
+                    // Special handling for execute_metered metrics (not aggregated across segments
+                    // in the app proof case)
+                    if metric_name == EXECUTE_METERED_TIME_LABEL
+                        && group_name != "leaf"
+                        && group_name != "root"
+                        && group_name != "halo2_outer"
+                        && group_name != "halo2_wrapper"
+                        && !group_name.starts_with("internal")
+                    {
+                        writeln!(
+                            writer,
+                            "| `{:<20}` | {:<10} | {:<10} | {:<10} | {:<10} |",
+                            metric_name, summary.avg, "-", "-", "-",
+                        )?;
+                    } else if metric_name == EXECUTE_E1_INSN_MI_S_LABEL
+                        || metric_name == EXECUTE_PREFLIGHT_INSN_MI_S_LABEL
+                        || metric_name == EXECUTE_METERED_INSN_MI_S_LABEL
+                    {
+                        // skip sum because it is misleading
+                        writeln!(
+                            writer,
+                            "| `{:<20}` | {:<10} | {:<10} | {:<10} | {:<10} |",
+                            metric_name, summary.avg, "-", summary.max, summary.min,
+                        )?;
+                    } else {
+                        writeln!(
+                            writer,
+                            "| `{:<20}` | {:<10} | {:<10} | {:<10} | {:<10} |",
+                            metric_name, summary.avg, summary.sum, summary.max, summary.min,
+                        )?;
+                    }
                 }
             }
             writeln!(writer)?;
@@ -317,11 +410,16 @@ impl AggregateMetrics {
         writeln!(writer, "|:---|---:|---:|")?;
         let mut rows = Vec::new();
         for (group_name, summaries) in self.to_vec() {
+            if group_name.contains("keygen") {
+                continue;
+            }
             let stats = summaries.get(PROOF_TIME_LABEL);
             if stats.is_none() {
                 continue;
             }
-            let stats = stats.unwrap();
+            let stats = stats.unwrap_or_else(|| {
+                panic!("Missing proof time statistics for group '{}'", group_name)
+            });
             let mut sum = stats.sum;
             let mut max = stats.max;
             // convert ms to s
@@ -352,7 +450,12 @@ impl AggregateMetrics {
         self.by_group
             .keys()
             .find(|k| group_weight(k) == 0)
-            .unwrap_or_else(|| self.by_group.keys().next().unwrap())
+            .unwrap_or_else(|| {
+                self.by_group
+                    .keys()
+                    .next()
+                    .expect("by_group should contain at least one group")
+            })
             .clone()
     }
 }
@@ -381,18 +484,38 @@ impl BenchmarkOutput {
 }
 
 pub const PROOF_TIME_LABEL: &str = "total_proof_time_ms";
-pub const CELLS_USED_LABEL: &str = "main_cells_used";
-pub const CYCLES_LABEL: &str = "total_cycles";
-pub const EXECUTE_TIME_LABEL: &str = "execute_time_ms";
+pub const MAIN_CELLS_USED_LABEL: &str = "main_cells_used";
+pub const TOTAL_CELLS_USED_LABEL: &str = "total_cells_used";
+pub const EXECUTE_E1_INSNS_LABEL: &str = "execute_e1_insns";
+pub const EXECUTE_METERED_INSNS_LABEL: &str = "execute_metered_insns";
+pub const EXECUTE_PREFLIGHT_INSNS_LABEL: &str = "execute_preflight_insns";
+pub const EXECUTE_E1_TIME_LABEL: &str = "execute_e1_time_ms";
+pub const EXECUTE_E1_INSN_MI_S_LABEL: &str = "execute_e1_insn_mi/s";
+pub const EXECUTE_METERED_TIME_LABEL: &str = "execute_metered_time_ms";
+pub const EXECUTE_METERED_INSN_MI_S_LABEL: &str = "execute_metered_insn_mi/s";
+pub const EXECUTE_PREFLIGHT_TIME_LABEL: &str = "execute_preflight_time_ms";
+pub const EXECUTE_PREFLIGHT_INSN_MI_S_LABEL: &str = "execute_preflight_insn_mi/s";
 pub const TRACE_GEN_TIME_LABEL: &str = "trace_gen_time_ms";
+pub const MEM_FIN_TIME_LABEL: &str = "memory_finalize_time_ms";
+pub const BOUNDARY_FIN_TIME_LABEL: &str = "boundary_finalize_time_ms";
+pub const MERKLE_FIN_TIME_LABEL: &str = "merkle_finalize_time_ms";
 pub const PROVE_EXCL_TRACE_TIME_LABEL: &str = "stark_prove_excluding_trace_time_ms";
 
 pub const VM_METRIC_NAMES: &[&str] = &[
     PROOF_TIME_LABEL,
-    CELLS_USED_LABEL,
-    CYCLES_LABEL,
-    EXECUTE_TIME_LABEL,
+    MAIN_CELLS_USED_LABEL,
+    TOTAL_CELLS_USED_LABEL,
+    EXECUTE_E1_TIME_LABEL,
+    EXECUTE_E1_INSN_MI_S_LABEL,
+    EXECUTE_METERED_TIME_LABEL,
+    EXECUTE_METERED_INSN_MI_S_LABEL,
+    EXECUTE_PREFLIGHT_INSNS_LABEL,
+    EXECUTE_PREFLIGHT_TIME_LABEL,
+    EXECUTE_PREFLIGHT_INSN_MI_S_LABEL,
     TRACE_GEN_TIME_LABEL,
+    MEM_FIN_TIME_LABEL,
+    BOUNDARY_FIN_TIME_LABEL,
+    MERKLE_FIN_TIME_LABEL,
     PROVE_EXCL_TRACE_TIME_LABEL,
     "main_trace_commit_time_ms",
     "generate_perm_trace_time_ms",
diff --git a/crates/prof/src/lib.rs b/crates/prof/src/lib.rs
index 58440a8e02..ec6117c1e7 100644
--- a/crates/prof/src/lib.rs
+++ b/crates/prof/src/lib.rs
@@ -1,12 +1,13 @@
 use std::{collections::HashMap, fs::File, path::Path};
 
-use aggregate::{
-    EXECUTE_TIME_LABEL, PROOF_TIME_LABEL, PROVE_EXCL_TRACE_TIME_LABEL, TRACE_GEN_TIME_LABEL,
-};
+use aggregate::{PROOF_TIME_LABEL, PROVE_EXCL_TRACE_TIME_LABEL, TRACE_GEN_TIME_LABEL};
 use eyre::Result;
 use memmap2::Mmap;
 
-use crate::types::{Labels, Metric, MetricDb, MetricsFile};
+use crate::{
+    aggregate::{EXECUTE_METERED_TIME_LABEL, EXECUTE_PREFLIGHT_TIME_LABEL},
+    types::{Labels, Metric, MetricDb, MetricsFile},
+};
 
 pub mod aggregate;
 pub mod summary;
@@ -45,13 +46,29 @@ impl MetricDb {
     pub fn apply_aggregations(&mut self) {
         for metrics in self.flat_dict.values_mut() {
             let get = |key: &str| metrics.iter().find(|m| m.name == key).map(|m| m.value);
-            let execute_time = get(EXECUTE_TIME_LABEL);
+            let total_proof_time = get(PROOF_TIME_LABEL);
+            if total_proof_time.is_some() {
+                // We have instrumented total_proof_time_ms
+                continue;
+            }
+            // otherwise, calculate it from sub-components
+            let execute_metered_time = get(EXECUTE_METERED_TIME_LABEL);
+            let execute_preflight_time = get(EXECUTE_PREFLIGHT_TIME_LABEL);
             let trace_gen_time = get(TRACE_GEN_TIME_LABEL);
             let prove_excl_trace_time = get(PROVE_EXCL_TRACE_TIME_LABEL);
-            if let (Some(execute_time), Some(trace_gen_time), Some(prove_excl_trace_time)) =
-                (execute_time, trace_gen_time, prove_excl_trace_time)
-            {
-                let total_time = execute_time + trace_gen_time + prove_excl_trace_time;
+            if let (
+                Some(execute_preflight_time),
+                Some(trace_gen_time),
+                Some(prove_excl_trace_time),
+            ) = (
+                execute_preflight_time,
+                trace_gen_time,
+                prove_excl_trace_time,
+            ) {
+                let total_time = execute_metered_time.unwrap_or(0.0)
+                    + execute_preflight_time
+                    + trace_gen_time
+                    + prove_excl_trace_time;
                 metrics.push(Metric::new(PROOF_TIME_LABEL.to_string(), total_time));
             }
         }
@@ -90,7 +107,12 @@ impl MetricDb {
 
             let label_values: Vec<String> = label_keys
                 .iter()
-                .map(|key| label_dict.get(key).unwrap().clone())
+                .map(|key| {
+                    label_dict
+                        .get(key)
+                        .unwrap_or_else(|| panic!("Label key '{}' should exist in label_dict", key))
+                        .clone()
+                })
                 .collect();
 
             // Add to dict_by_label_types
diff --git a/crates/prof/src/main.rs b/crates/prof/src/main.rs
index 31ddb2b359..1474153a9f 100644
--- a/crates/prof/src/main.rs
+++ b/crates/prof/src/main.rs
@@ -84,8 +84,9 @@ fn main() -> Result<()> {
             // If this is a new benchmark, prev_path will not exist
             if let Ok(prev_db) = MetricDb::new(&prev_path) {
                 let prev_grouped = GroupedMetrics::new(&prev_db, "group")?;
-                prev_aggregated = Some(prev_grouped.aggregate());
-                aggregated.set_diff(prev_aggregated.as_ref().unwrap());
+                let prev_grouped_aggregated = prev_grouped.aggregate();
+                aggregated.set_diff(&prev_grouped_aggregated);
+                prev_aggregated = Some(prev_grouped_aggregated);
             }
         }
         if name.is_empty() {
diff --git a/crates/prof/src/summary.rs b/crates/prof/src/summary.rs
index 9501b03e05..0ab51448f3 100644
--- a/crates/prof/src/summary.rs
+++ b/crates/prof/src/summary.rs
@@ -4,7 +4,11 @@ use eyre::Result;
 use itertools::Itertools;
 
 use crate::{
-    aggregate::{AggregateMetrics, CELLS_USED_LABEL, CYCLES_LABEL, PROOF_TIME_LABEL},
+    aggregate::{
+        AggregateMetrics, EXECUTE_METERED_TIME_LABEL, EXECUTE_PREFLIGHT_INSNS_LABEL,
+        EXECUTE_PREFLIGHT_TIME_LABEL, MAIN_CELLS_USED_LABEL, PROOF_TIME_LABEL,
+        PROVE_EXCL_TRACE_TIME_LABEL, TRACE_GEN_TIME_LABEL,
+    },
     types::MdTableCell,
 };
 
@@ -37,7 +41,7 @@ pub struct SingleSummaryMetrics {
     /// Parallel proof time is approximated as the max of proof times within a group
     pub par_proof_time_ms: MdTableCell,
     pub cells_used: MdTableCell,
-    pub cycles: MdTableCell,
+    pub insns: MdTableCell,
 }
 
 impl GithubSummary {
@@ -52,8 +56,14 @@ impl GithubSummary {
             .zip_eq(md_paths.iter())
             .zip_eq(names)
             .map(|(((aggregated, prev_aggregated), md_path), name)| {
-                let md_filename = md_path.file_name().unwrap().to_str().unwrap();
-                let mut row = aggregated.get_summary_row(md_filename).unwrap();
+                let md_filename = md_path
+                    .file_name()
+                    .expect("Path should have a filename")
+                    .to_str()
+                    .expect("Filename should be valid UTF-8");
+                let mut row = aggregated.get_summary_row(md_filename).unwrap_or_else(|| {
+                    panic!("Failed to get summary row for file '{}'", md_filename)
+                });
                 if let Some(prev_aggregated) = prev_aggregated {
                     // md_filename doesn't matter
                     if let Some(prev_row) = prev_aggregated.get_summary_row(md_filename) {
@@ -136,14 +146,14 @@ impl SingleSummaryMetrics {
         write!(
             writer,
             "{} | {} | {} |",
-            self.proof_time_ms, self.cycles, self.cells_used,
+            self.proof_time_ms, self.insns, self.cells_used,
         )?;
         Ok(())
     }
 
     pub fn set_diff(&mut self, prev: &Self) {
         self.cells_used.diff = Some(self.cells_used.val - prev.cells_used.val);
-        self.cycles.diff = Some(self.cycles.val - prev.cycles.val);
+        self.insns.diff = Some(self.insns.val - prev.insns.val);
         self.proof_time_ms.diff = Some(self.proof_time_ms.val - prev.proof_time_ms.val);
     }
 }
@@ -152,16 +162,73 @@ impl AggregateMetrics {
     pub fn get_single_summary(&self, name: &str) -> Option<SingleSummaryMetrics> {
         let stats = self.by_group.get(name)?;
         // Any group must have proof_time, but may not have cells_used or cycles (e.g., halo2)
-        let proof_time_ms = stats.get(PROOF_TIME_LABEL)?.sum;
-        let par_proof_time_ms = stats.get(PROOF_TIME_LABEL)?.max;
+        let proof_time_ms = if let Some(proof_stats) = stats.get(PROOF_TIME_LABEL) {
+            proof_stats.sum
+        } else {
+            // Note: execute_metered is outside any segment scope, so it should have sum = max = avg
+            let execute_metered = stats
+                .get(EXECUTE_METERED_TIME_LABEL)
+                .map(|s| s.sum.val)
+                .unwrap_or(0.0);
+            let execute_preflight = stats
+                .get(EXECUTE_PREFLIGHT_TIME_LABEL)
+                .map(|s| s.sum.val)
+                .unwrap_or(0.0);
+            // If total_proof_time_ms is not available, compute it from components
+            let trace_gen = stats
+                .get(TRACE_GEN_TIME_LABEL)
+                .map(|s| s.sum.val)
+                .unwrap_or(0.0);
+            let stark_prove = stats
+                .get(PROVE_EXCL_TRACE_TIME_LABEL)
+                .map(|s| s.sum.val)
+                .unwrap_or(0.0);
+            println!(
+                "{} {} {} {}",
+                execute_metered, execute_preflight, trace_gen, stark_prove
+            );
+            MdTableCell::new(
+                execute_metered + execute_preflight + trace_gen + stark_prove,
+                None,
+            )
+        };
+        println!("{}", self.total_proof_time.val);
+        let par_proof_time_ms = if let Some(proof_stats) = stats.get(PROOF_TIME_LABEL) {
+            proof_stats.max
+        } else {
+            // Use the same computation for max
+            let execute_metered = stats
+                .get(EXECUTE_METERED_TIME_LABEL)
+                .map(|s| s.max.val)
+                .unwrap_or(0.0);
+            let execute_preflight = stats
+                .get(EXECUTE_PREFLIGHT_TIME_LABEL)
+                .map(|s| s.max.val)
+                .unwrap_or(0.0);
+            let trace_gen = stats
+                .get(TRACE_GEN_TIME_LABEL)
+                .map(|s| s.max.val)
+                .unwrap_or(0.0);
+            let stark_prove = stats
+                .get(PROVE_EXCL_TRACE_TIME_LABEL)
+                .map(|s| s.max.val)
+                .unwrap_or(0.0);
+            MdTableCell::new(
+                execute_metered + execute_preflight + trace_gen + stark_prove,
+                None,
+            )
+        };
         let cells_used = stats
-            .get(CELLS_USED_LABEL)
+            .get(MAIN_CELLS_USED_LABEL)
+            .map(|s| s.sum)
+            .unwrap_or_default();
+        let insns = stats
+            .get(EXECUTE_PREFLIGHT_INSNS_LABEL)
             .map(|s| s.sum)
             .unwrap_or_default();
-        let cycles = stats.get(CYCLES_LABEL).map(|s| s.sum).unwrap_or_default();
         Some(SingleSummaryMetrics {
             cells_used,
-            cycles,
+            insns,
             proof_time_ms,
             par_proof_time_ms,
         })
diff --git a/crates/sdk/Cargo.toml b/crates/sdk/Cargo.toml
index 6a868a3beb..8e1bdd449a 100644
--- a/crates/sdk/Cargo.toml
+++ b/crates/sdk/Cargo.toml
@@ -42,7 +42,6 @@ derivative = { workspace = true }
 derive_more = { workspace = true }
 serde = { workspace = true }
 eyre.workspace = true
-async-trait.workspace = true
 metrics.workspace = true
 tracing.workspace = true
 itertools.workspace = true
@@ -50,14 +49,16 @@ getset.workspace = true
 clap = { workspace = true, features = ["derive"] }
 serde_with = { workspace = true, features = ["hex"] }
 serde_json.workspace = true
+toml.workspace = true
 thiserror.workspace = true
+rand.workspace = true
 snark-verifier = { workspace = true, optional = true }
 snark-verifier-sdk = { workspace = true, optional = true }
 tempfile.workspace = true
 hex.workspace = true
 forge-fmt = { workspace = true, optional = true }
-rrs-lib = { workspace = true }
-num-bigint = { workspace = true }
+rrs-lib.workspace = true
+num-bigint.workspace = true
 
 [features]
 default = ["parallel", "jemalloc"]
@@ -73,13 +74,15 @@ evm-verify = [
     "dep:alloy-sol-types",
     "dep:forge-fmt",
 ]
-bench-metrics = [
-    "openvm-circuit/bench-metrics",
-    "openvm-native-recursion/bench-metrics",
-    "openvm-native-compiler/bench-metrics",
+metrics = [
+    "openvm-circuit/metrics",
+    "openvm-native-recursion/metrics",
+    "openvm-native-compiler/metrics",
 ]
 # for guest profiling:
-profiling = ["openvm-circuit/function-span", "openvm-transpiler/function-span"]
+perf-metrics = ["openvm-circuit/perf-metrics", "openvm-transpiler/function-span"]
+# turns on stark-backend debugger in all proofs
+stark-debug = ["openvm-circuit/stark-debug"]
 test-utils = ["openvm-circuit/test-utils"]
 # performance features:
 # (rayon is always imported because of halo2, so "parallel" feature is redundant)
@@ -93,3 +96,6 @@ nightly-features = ["openvm-circuit/nightly-features"]
 name = "sdk_evm"
 path = "examples/sdk_evm.rs"
 required-features = ["evm-verify"]
+
+[package.metadata.cargo-shear]
+ignored = ["derive_more", "rand"]
diff --git a/crates/sdk/examples/sdk_app.rs b/crates/sdk/examples/sdk_app.rs
deleted file mode 100644
index 31ba0ab264..0000000000
--- a/crates/sdk/examples/sdk_app.rs
+++ /dev/null
@@ -1,114 +0,0 @@
-// ANCHOR: dependencies
-use std::{fs, sync::Arc};
-
-use eyre::Result;
-use openvm::platform::memory::MEM_SIZE;
-use openvm_build::GuestOptions;
-use openvm_sdk::{
-    config::{AppConfig, SdkVmConfig},
-    prover::AppProver,
-    Sdk, StdIn,
-};
-use openvm_stark_sdk::config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters};
-use openvm_transpiler::elf::Elf;
-use serde::{Deserialize, Serialize};
-
-#[derive(Serialize, Deserialize)]
-pub struct SomeStruct {
-    pub a: u64,
-    pub b: u64,
-}
-// ANCHOR_END: dependencies
-
-#[allow(dead_code, unused_variables)]
-fn read_elf() -> Result<(), Box<dyn std::error::Error>> {
-    // ANCHOR: read_elf
-    // 2b. Load the ELF from a file
-    let elf_bytes = fs::read("your_path_to_elf")?;
-    let elf = Elf::decode(&elf_bytes, MEM_SIZE as u32)?;
-    // ANCHOR_END: read_elf
-    Ok(())
-}
-
-#[allow(unused_variables, unused_doc_comments)]
-fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // ANCHOR: vm_config
-    let vm_config = SdkVmConfig::builder()
-        .system(Default::default())
-        .rv32i(Default::default())
-        .rv32m(Default::default())
-        .io(Default::default())
-        .build();
-    // ANCHOR_END: vm_config
-
-    /// to import example guest code in crate replace `target_path` for:
-    /// ```
-    /// use std::path::PathBuf;
-    ///
-    /// let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
-    /// path.push("guest/fib");
-    /// let target_path = path.to_str().unwrap();
-    /// ```
-    // ANCHOR: build
-    // 1. Build the VmConfig with the extensions needed.
-    let sdk = Sdk::new();
-
-    // 2a. Build the ELF with guest options and a target filter.
-    let guest_opts = GuestOptions::default();
-    let target_path = "your_path_project_root";
-    let elf = sdk.build(
-        guest_opts,
-        &vm_config,
-        target_path,
-        &Default::default(),
-        None,
-    )?;
-    // ANCHOR_END: build
-
-    // ANCHOR: transpilation
-    // 3. Transpile the ELF into a VmExe
-    let exe = sdk.transpile(elf, vm_config.transpiler())?;
-    // ANCHOR_END: transpilation
-
-    // ANCHOR: execution
-    // 4. Format your input into StdIn
-    let my_input = SomeStruct { a: 1, b: 2 }; // anything that can be serialized
-    let mut stdin = StdIn::default();
-    stdin.write(&my_input);
-
-    // 5. Run the program
-    let output = sdk.execute(exe.clone(), vm_config.clone(), stdin.clone())?;
-    println!("public values output: {:?}", output);
-    // ANCHOR_END: execution
-
-    // ANCHOR: proof_generation
-    // 6. Set app configuration
-    let app_log_blowup = 2;
-    let app_fri_params = FriParameters::standard_with_100_bits_conjectured_security(app_log_blowup);
-    let app_config = AppConfig::new(app_fri_params, vm_config);
-
-    // 7. Commit the exe
-    let app_committed_exe = sdk.commit_app_exe(app_fri_params, exe)?;
-
-    // 8. Generate an AppProvingKey
-    let app_pk = Arc::new(sdk.app_keygen(app_config)?);
-
-    // 9a. Generate a proof
-    let proof = sdk.generate_app_proof(app_pk.clone(), app_committed_exe.clone(), stdin.clone())?;
-    // 9b. Generate a proof with an AppProver with custom fields
-    let app_prover = AppProver::<_, BabyBearPoseidon2Engine>::new(
-        app_pk.app_vm_pk.clone(),
-        app_committed_exe.clone(),
-    )
-    .with_program_name("test_program");
-    let proof = app_prover.generate_app_proof(stdin.clone());
-    // ANCHOR_END: proof_generation
-
-    // ANCHOR: verification
-    // 10. Verify your program
-    let app_vk = app_pk.get_app_vk();
-    sdk.verify_app_proof(&app_vk, &proof)?;
-    // ANCHOR_END: verification
-
-    Ok(())
-}
diff --git a/crates/sdk/examples/sdk_evm.rs b/crates/sdk/examples/sdk_evm.rs
index 8833542b73..8db6aa8239 100644
--- a/crates/sdk/examples/sdk_evm.rs
+++ b/crates/sdk/examples/sdk_evm.rs
@@ -1,16 +1,9 @@
-// ANCHOR: dependencies
-use std::{fs, sync::Arc};
+// [!region dependencies]
+use std::fs;
 
 use eyre::Result;
-use openvm::platform::memory::MEM_SIZE;
 use openvm_build::GuestOptions;
-use openvm_native_recursion::halo2::utils::CacheHalo2ParamsReader;
-use openvm_sdk::{
-    config::{AggConfig, AppConfig, SdkVmConfig},
-    DefaultStaticVerifierPvHandler, Sdk, StdIn,
-};
-use openvm_stark_sdk::config::FriParameters;
-use openvm_transpiler::elf::Elf;
+use openvm_sdk::{Sdk, StdIn};
 use serde::{Deserialize, Serialize};
 
 #[derive(Serialize, Deserialize)]
@@ -18,29 +11,19 @@ pub struct SomeStruct {
     pub a: u64,
     pub b: u64,
 }
-// ANCHOR_END: dependencies
+// [!endregion dependencies]
 
 #[allow(dead_code, unused_variables)]
 fn read_elf() -> Result<(), Box<dyn std::error::Error>> {
-    // ANCHOR: read_elf
+    // [!region read_elf]
     // 2b. Load the ELF from a file
-    let elf_bytes = fs::read("your_path_to_elf")?;
-    let elf = Elf::decode(&elf_bytes, MEM_SIZE as u32)?;
-    // ANCHOR_END: read_elf
+    let elf: Vec<u8> = fs::read("your_path_to_elf")?;
+    // [!endregion read_elf]
     Ok(())
 }
 
 #[allow(unused_variables, unused_doc_comments)]
 fn main() -> Result<(), Box<dyn std::error::Error>> {
-    // ANCHOR: vm_config
-    let vm_config = SdkVmConfig::builder()
-        .system(Default::default())
-        .rv32i(Default::default())
-        .rv32m(Default::default())
-        .io(Default::default())
-        .build();
-    // ANCHOR_END: vm_config
-
     /// to import example guest code in crate replace `target_path` for:
     /// ```
     /// use std::path::PathBuf;
@@ -49,73 +32,36 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
     /// path.push("guest/fib");
     /// let target_path = path.to_str().unwrap();
     /// ```
-    // ANCHOR: build
+    // [!region build]
     // 1. Build the VmConfig with the extensions needed.
-    let sdk = Sdk::new();
+    let sdk = Sdk::riscv32();
 
     // 2a. Build the ELF with guest options and a target filter.
     let guest_opts = GuestOptions::default();
     let target_path = "your_path_project_root";
-    let elf = sdk.build(
-        guest_opts,
-        &vm_config,
-        target_path,
-        &Default::default(),
-        None,
-    )?;
-    // ANCHOR_END: build
-
-    // ANCHOR: transpilation
-    // 3. Transpile the ELF into a VmExe
-    let exe = sdk.transpile(elf, vm_config.transpiler())?;
-    // ANCHOR_END: transpilation
+    let elf = sdk.build(guest_opts, target_path, &None, None)?;
+    // [!endregion build]
 
-    // ANCHOR: execution
-    // 4. Format your input into StdIn
+    // [!region input]
+    // 3. Format your input into StdIn
     let my_input = SomeStruct { a: 1, b: 2 }; // anything that can be serialized
     let mut stdin = StdIn::default();
     stdin.write(&my_input);
-    // ANCHOR_END: execution
-
-    // ANCHOR: keygen
-    // 5. Set app configuration
-    let app_log_blowup = 2;
-    let app_fri_params = FriParameters::standard_with_100_bits_conjectured_security(app_log_blowup);
-    let app_config = AppConfig::new(app_fri_params, vm_config);
-
-    // 6. Commit the exe
-    let app_committed_exe = sdk.commit_app_exe(app_fri_params, exe)?;
-
-    // 7. Generate an AppProvingKey
-    let app_pk = Arc::new(sdk.app_keygen(app_config)?);
-    // ANCHOR_END: keygen
-
-    // ANCHOR: evm_verification
-    // 8. Generate the aggregation proving key
-    const DEFAULT_PARAMS_DIR: &str = concat!(env!("HOME"), "/.openvm/params/");
-    let halo2_params_reader = CacheHalo2ParamsReader::new(DEFAULT_PARAMS_DIR);
-    let agg_config = AggConfig::default();
-    let agg_pk = sdk.agg_keygen(
-        agg_config,
-        &halo2_params_reader,
-        &DefaultStaticVerifierPvHandler,
-    )?;
+    // [!endregion input]
 
-    // 9. Generate the SNARK verifier smart contract
-    let verifier = sdk.generate_halo2_verifier_solidity(&halo2_params_reader, &agg_pk)?;
+    // [!region evm_verification]
+    // 4. Generate the SNARK verifier smart contract
+    let verifier = sdk.generate_halo2_verifier_solidity()?;
 
-    // 10. Generate an EVM proof
-    let proof = sdk.generate_evm_proof(
-        &halo2_params_reader,
-        app_pk,
-        app_committed_exe,
-        agg_pk,
-        stdin,
-    )?;
+    // 5. Generate an EVM proof
+    // NOTE: this will do app_keygen, agg_keygen, halo2_keygen automatically if they have never been
+    // called before. As a consequence, the first call to `prove_evm` will take longer if you do not
+    // explicitly call `app_keygen`, `agg_keygen`, and `halo2_keygen` before calling `prove_evm`.
+    let proof = sdk.prove_evm(elf, stdin)?;
 
-    // 11. Verify the EVM proof
-    sdk.verify_evm_halo2_proof(&verifier, proof)?;
-    // ANCHOR_END: evm_verification
+    // 6. Verify the EVM proof
+    Sdk::verify_evm_halo2_proof(&verifier, proof)?;
+    // [!endregion evm_verification]
 
     Ok(())
 }
diff --git a/crates/sdk/examples/sdk_stark.rs b/crates/sdk/examples/sdk_stark.rs
new file mode 100644
index 0000000000..9fdeee1d3d
--- /dev/null
+++ b/crates/sdk/examples/sdk_stark.rs
@@ -0,0 +1,72 @@
+// [!region dependencies]
+use std::fs;
+
+use openvm_build::GuestOptions;
+use openvm_sdk::{Sdk, StdIn};
+use serde::{Deserialize, Serialize};
+
+#[derive(Serialize, Deserialize)]
+pub struct SomeStruct {
+    pub a: u64,
+    pub b: u64,
+}
+// [!endregion dependencies]
+
+#[allow(dead_code, unused_variables)]
+fn read_elf() -> eyre::Result<()> {
+    // [!region read_elf]
+    // 2b. Load the ELF from a file
+    let elf: Vec<u8> = fs::read("your_path_to_elf")?;
+    // [!endregion read_elf]
+    Ok(())
+}
+
+#[allow(unused_variables, unused_doc_comments)]
+fn main() -> eyre::Result<()> {
+    /// to import example guest code in crate replace `target_path` for:
+    /// ```
+    /// use std::path::PathBuf;
+    ///
+    /// let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
+    /// path.push("guest/fib");
+    /// let target_path = path.to_str().unwrap();
+    /// ```
+    // [!region build]
+    // 1. Build the VmConfig with the extensions needed.
+    let sdk = Sdk::riscv32();
+
+    // 2a. Build the ELF with guest options and a target filter.
+    let guest_opts = GuestOptions::default();
+    let target_path = "your_path_project_root";
+    let elf = sdk.build(guest_opts, target_path, &None, None)?;
+    // [!endregion build]
+
+    // [!region execution]
+    // 3. Format your input into StdIn
+    let my_input = SomeStruct { a: 1, b: 2 }; // anything that can be serialized
+    let mut stdin = StdIn::default();
+    stdin.write(&my_input);
+
+    // 4. Run the program
+    let output = sdk.execute(elf.clone(), stdin.clone())?;
+    println!("public values output: {:?}", output);
+    // [!endregion execution]
+
+    // [!region proof_generation]
+    // 5a. Generate a proof
+    let (proof, app_commit) = sdk.prove(elf.clone(), stdin.clone())?;
+    // 5b. Generate a proof with a StarkProver with custom fields
+    let mut prover = sdk.prover(elf)?.with_program_name("test_program");
+    let app_commit = prover.app_commit();
+    let proof = prover.prove(stdin.clone())?;
+    // [!endregion proof_generation]
+
+    // [!region verification]
+    // 6. Do this once to save the agg_vk, independent of the proof.
+    let (_agg_pk, agg_vk) = sdk.agg_keygen()?;
+    // 7. Verify your program
+    Sdk::verify_proof(&agg_vk, app_commit, &proof)?;
+    // [!endregion verification]
+
+    Ok(())
+}
diff --git a/crates/sdk/guest/fib/src/main.rs b/crates/sdk/guest/fib/src/main.rs
index bc6d94cda8..7b65644496 100644
--- a/crates/sdk/guest/fib/src/main.rs
+++ b/crates/sdk/guest/fib/src/main.rs
@@ -3,15 +3,31 @@
 
 openvm::entry!(main);
 
-pub fn main() {
-    let n = core::hint::black_box(1 << 3);
+fn fibonacci(n: u32) -> (u32, u32) {
+    if n <= 1 {
+        return (0, n);
+    }
     let mut a: u32 = 0;
     let mut b: u32 = 1;
-    for _ in 1..n {
+    for _ in 2..=n {
         let sum = a + b;
         a = b;
         b = sum;
     }
+    (a, b)
+}
+
+pub fn main() {
+    // arbitrary n that results in more than 1 segment
+    let n = core::hint::black_box(1 << 5);
+
+    let mut a = 0;
+    let mut b = 0;
+    // calculate nth fibonacci number n times
+    for _ in 0..n {
+        (a, b) = fibonacci(n);
+    }
+
     if a == 0 {
         panic!();
     }
diff --git a/crates/sdk/src/codec.rs b/crates/sdk/src/codec.rs
index 9d0ab48a93..6ab86a90fb 100644
--- a/crates/sdk/src/codec.rs
+++ b/crates/sdk/src/codec.rs
@@ -1,7 +1,7 @@
 use std::io::{self, Cursor, Read, Result, Write};
 
 use openvm_circuit::{
-    arch::ContinuationVmProof, system::memory::tree::public_values::UserPublicValuesProof,
+    arch::ContinuationVmProof, system::memory::merkle::public_values::UserPublicValuesProof,
 };
 use openvm_continuations::verifier::{
     internal::types::VmStarkProof, root::types::RootVmVerifierInput,
@@ -63,7 +63,7 @@ impl Encode for ContinuationVmProof<SC> {
 
 impl Encode for VmStarkProof<SC> {
     fn encode<W: Write>(&self, writer: &mut W) -> Result<()> {
-        self.proof.encode(writer)?;
+        self.inner.encode(writer)?;
         encode_slice(&self.user_public_values, writer)
     }
 }
@@ -334,10 +334,10 @@ impl Decode for ContinuationVmProof<SC> {
 
 impl Decode for VmStarkProof<SC> {
     fn decode<R: Read>(reader: &mut R) -> Result<Self> {
-        let proof = Proof::decode(reader)?;
+        let inner = Proof::decode(reader)?;
         let user_public_values = decode_vec(reader)?;
         Ok(Self {
-            proof,
+            inner,
             user_public_values,
         })
     }
diff --git a/crates/sdk/src/commit.rs b/crates/sdk/src/commit.rs
index 53207d463b..70a66252be 100644
--- a/crates/sdk/src/commit.rs
+++ b/crates/sdk/src/commit.rs
@@ -1,12 +1,14 @@
 use std::{array::from_fn, sync::Arc};
 
 use num_bigint::BigUint;
-use openvm_circuit::{
-    arch::{instructions::exe::VmExe, VmConfig},
-    system::program::trace::VmCommittedExe,
-};
+use openvm_circuit::arch::{instructions::exe::VmExe, MemoryConfig};
+pub use openvm_circuit::system::program::trace::VmCommittedExe;
 use openvm_native_compiler::ir::DIGEST_SIZE;
-use openvm_stark_backend::{config::StarkGenericConfig, p3_field::PrimeField32};
+use openvm_stark_backend::{
+    config::{Com, StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::PrimeField32,
+};
 use openvm_stark_sdk::{
     config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
     engine::StarkFriEngine,
@@ -17,13 +19,13 @@ use openvm_stark_sdk::{
 use serde::{Deserialize, Serialize};
 use serde_with::serde_as;
 
-use crate::{types::BN254_BYTES, NonRootCommittedExe, F, SC};
+use crate::{types::BN254_BYTES, F, SC};
 
 /// Wrapper for an array of big-endian bytes, representing an unsigned big integer. Each commit can
 /// be converted to a Bn254Fr using the trivial identification as natural numbers or into a `u32`
 /// digest by decomposing the big integer base-`F::MODULUS`.
 #[serde_as]
-#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
+#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub struct CommitBytes(#[serde_as(as = "serde_with::hex::Hex")] [u8; BN254_BYTES]);
 
 impl CommitBytes {
@@ -56,9 +58,15 @@ impl CommitBytes {
     }
 }
 
+impl std::fmt::Display for CommitBytes {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "{}", hex::encode(self.0))
+    }
+}
+
 /// `AppExecutionCommit` has all the commitments users should check against the final proof.
 #[serde_as]
-#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
+#[derive(Copy, Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub struct AppExecutionCommit {
     /// Commitment of the executable. In base-F::MODULUS, it's computed as
     /// compress(
@@ -72,26 +80,40 @@ pub struct AppExecutionCommit {
     pub app_exe_commit: CommitBytes,
 
     /// Commitment of the leaf VM verifier program which commits the VmConfig of App VM.
-    /// Internal verifier will verify `leaf_vm_verifier_commit`.
+    // Internal verifier will verify `app_vm_commit`.
+    // Internally this is also known as `leaf_verifier_program_commit`.
     pub app_vm_commit: CommitBytes,
 }
 
 impl AppExecutionCommit {
     /// Users should use this function to compute `AppExecutionCommit` and check it against the
     /// final proof.
-    pub fn compute<VC: VmConfig<F>>(
-        app_vm_config: &VC,
-        app_exe: &NonRootCommittedExe,
-        leaf_vm_verifier_exe: &NonRootCommittedExe,
-    ) -> Self {
-        let exe_commit: [F; DIGEST_SIZE] = app_exe
-            .compute_exe_commit(&app_vm_config.system().memory_config)
-            .into();
-        let vm_commit: [F; DIGEST_SIZE] = leaf_vm_verifier_exe.committed_program.commitment.into();
+    pub fn compute<SC: StarkGenericConfig>(
+        app_memory_config: &MemoryConfig,
+        app_exe: &VmExe<Val<SC>>,
+        app_program_commit: Com<SC>,
+        leaf_verifier_program_commit: Com<SC>,
+    ) -> Self
+    where
+        Com<SC>: AsRef<[Val<SC>; DIGEST_SIZE]>
+            + From<[Val<SC>; DIGEST_SIZE]>
+            + Into<[Val<SC>; DIGEST_SIZE]>,
+        Val<SC>: PrimeField32,
+    {
+        let exe_commit: [Val<SC>; DIGEST_SIZE] = VmCommittedExe::<SC>::compute_exe_commit(
+            &app_program_commit,
+            app_exe,
+            app_memory_config,
+        )
+        .into();
+        let vm_commit: [Val<SC>; DIGEST_SIZE] = leaf_verifier_program_commit.into();
         Self::from_field_commit(exe_commit, vm_commit)
     }
 
-    pub fn from_field_commit(exe_commit: [F; DIGEST_SIZE], vm_commit: [F; DIGEST_SIZE]) -> Self {
+    pub fn from_field_commit<F: PrimeField32>(
+        exe_commit: [F; DIGEST_SIZE],
+        vm_commit: [F; DIGEST_SIZE],
+    ) -> Self {
         Self {
             app_exe_commit: CommitBytes::from_u32_digest(&exe_commit.map(|x| x.as_canonical_u32())),
             app_vm_commit: CommitBytes::from_u32_digest(&vm_commit.map(|x| x.as_canonical_u32())),
@@ -102,10 +124,10 @@ impl AppExecutionCommit {
 pub fn commit_app_exe(
     app_fri_params: FriParameters,
     app_exe: impl Into<VmExe<F>>,
-) -> Arc<NonRootCommittedExe> {
+) -> Arc<VmCommittedExe<SC>> {
     let exe: VmExe<_> = app_exe.into();
     let app_engine = BabyBearPoseidon2Engine::new(app_fri_params);
-    Arc::new(VmCommittedExe::<SC>::commit(exe, app_engine.config.pcs()))
+    Arc::new(VmCommittedExe::<SC>::commit(exe, app_engine.config().pcs()))
 }
 
 pub(crate) fn babybear_digest_to_bn254(digest: &[F; DIGEST_SIZE]) -> Bn254Fr {
diff --git a/crates/sdk/src/config/global.rs b/crates/sdk/src/config/global.rs
index faf8182246..67597466f4 100644
--- a/crates/sdk/src/config/global.rs
+++ b/crates/sdk/src/config/global.rs
@@ -1,55 +1,65 @@
 use bon::Builder;
-use derive_more::derive::From;
 use openvm_algebra_circuit::{
-    Fp2Extension, Fp2ExtensionExecutor, Fp2ExtensionPeriphery, ModularExtension,
-    ModularExtensionExecutor, ModularExtensionPeriphery,
+    AlgebraCpuProverExt, Fp2Extension, Fp2ExtensionExecutor, ModularExtension,
+    ModularExtensionExecutor,
 };
 use openvm_algebra_transpiler::{Fp2TranspilerExtension, ModularTranspilerExtension};
-use openvm_bigint_circuit::{Int256, Int256Executor, Int256Periphery};
+use openvm_bigint_circuit::{Int256, Int256CpuProverExt, Int256Executor};
 use openvm_bigint_transpiler::Int256TranspilerExtension;
 use openvm_circuit::{
-    arch::{
-        InitFileGenerator, SystemConfig, SystemExecutor, SystemPeriphery, VmChipComplex, VmConfig,
-        VmInventoryError,
-    },
-    circuit_derive::{Chip, ChipUsageGetter},
-    derive::{AnyEnum, InstructionExecutor},
+    arch::{instructions::NATIVE_AS, *},
+    derive::VmConfig,
+    system::{SystemChipInventory, SystemCpuBuilder, SystemExecutor},
 };
 use openvm_ecc_circuit::{
-    WeierstrassExtension, WeierstrassExtensionExecutor, WeierstrassExtensionPeriphery,
+    EccCpuProverExt, WeierstrassExtension, WeierstrassExtensionExecutor, P256_CONFIG,
+    SECP256K1_CONFIG,
 };
 use openvm_ecc_transpiler::EccTranspilerExtension;
-use openvm_keccak256_circuit::{Keccak256, Keccak256Executor, Keccak256Periphery};
+use openvm_keccak256_circuit::{Keccak256, Keccak256CpuProverExt, Keccak256Executor};
 use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
 use openvm_native_circuit::{
-    CastFExtension, CastFExtensionExecutor, CastFExtensionPeriphery, Native, NativeExecutor,
-    NativePeriphery,
+    CastFExtension, CastFExtensionExecutor, Native, NativeCpuProverExt, NativeExecutor,
 };
 use openvm_native_transpiler::LongFormTranspilerExtension;
 use openvm_pairing_circuit::{
-    PairingExtension, PairingExtensionExecutor, PairingExtensionPeriphery,
+    PairingCurve, PairingExtension, PairingExtensionExecutor, PairingProverExt,
+    BLS12_381_COMPLEX_STRUCT_NAME, BN254_COMPLEX_STRUCT_NAME,
 };
 use openvm_pairing_transpiler::PairingTranspilerExtension;
 use openvm_rv32im_circuit::{
-    Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32Io, Rv32IoExecutor, Rv32IoPeriphery, Rv32M,
-    Rv32MExecutor, Rv32MPeriphery,
+    Rv32I, Rv32IExecutor, Rv32ImCpuProverExt, Rv32Io, Rv32IoExecutor, Rv32M, Rv32MExecutor,
 };
 use openvm_rv32im_transpiler::{
     Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
 };
-use openvm_sha256_circuit::{Sha256, Sha256Executor, Sha256Periphery};
+use openvm_sha256_circuit::{Sha256, Sha256Executor, Sha2CpuProverExt};
 use openvm_sha256_transpiler::Sha256TranspilerExtension;
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::{Field, PrimeField32},
+    prover::cpu::{CpuBackend, CpuDevice},
+};
 use openvm_transpiler::transpiler::Transpiler;
 use serde::{Deserialize, Serialize};
 
-use crate::F;
+use super::AppFriParams;
+use crate::{
+    config::{AppConfig, TranspilerConfig},
+    F,
+};
 
+/// The recommended way to construct [SdkVmConfig] is using [SdkVmConfig::from_toml].
+///
+/// For construction without reliance on deserialization, you can use [SdkVmConfigBuilder], which
+/// follows a builder pattern. After calling [SdkVmConfigBuilder::build], call
+/// [SdkVmConfig::optimize] to apply some default optimizations to built configuration for best
+/// performance.
 #[derive(Builder, Clone, Debug, Serialize, Deserialize)]
+#[serde(from = "SdkVmConfigWithDefaultDeser")]
 pub struct SdkVmConfig {
-    #[serde(default)]
     pub system: SdkSystemConfig,
-
     pub rv32i: Option<UnitStruct>,
     pub io: Option<UnitStruct>,
     pub keccak: Option<UnitStruct>,
@@ -57,7 +67,13 @@ pub struct SdkVmConfig {
     pub native: Option<UnitStruct>,
     pub castf: Option<UnitStruct>,
 
+    /// NOTE: if enabling this together with the [Int256] extension, you should set the `rv32m`
+    /// field to have the same `range_tuple_checker_sizes` as the `bigint` field for best
+    /// performance.
     pub rv32m: Option<Rv32M>,
+    /// NOTE: if enabling this together with the [Rv32M] extension, you should set the `rv32m`
+    /// field to have the same `range_tuple_checker_sizes` as the `bigint` field for best
+    /// performance.
     pub bigint: Option<Int256>,
     pub modular: Option<ModularExtension>,
     pub fp2: Option<Fp2Extension>,
@@ -65,68 +81,96 @@ pub struct SdkVmConfig {
     pub ecc: Option<WeierstrassExtension>,
 }
 
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum SdkVmConfigExecutor<F: PrimeField32> {
-    #[any_enum]
-    System(SystemExecutor<F>),
-    #[any_enum]
-    Rv32i(Rv32IExecutor<F>),
-    #[any_enum]
-    Io(Rv32IoExecutor<F>),
-    #[any_enum]
-    Keccak(Keccak256Executor<F>),
-    #[any_enum]
-    Sha256(Sha256Executor<F>),
-    #[any_enum]
-    Native(NativeExecutor<F>),
-    #[any_enum]
-    Rv32m(Rv32MExecutor<F>),
-    #[any_enum]
-    BigInt(Int256Executor<F>),
-    #[any_enum]
-    Modular(ModularExtensionExecutor<F>),
-    #[any_enum]
-    Fp2(Fp2ExtensionExecutor<F>),
-    #[any_enum]
-    Pairing(PairingExtensionExecutor<F>),
-    #[any_enum]
-    Ecc(WeierstrassExtensionExecutor<F>),
-    #[any_enum]
-    CastF(CastFExtensionExecutor<F>),
+impl SdkVmConfig {
+    /// Standard configuration with a set of default VM extensions loaded.
+    ///
+    /// **Note**: To use this configuration, your `openvm.toml` must match, including the order of
+    /// the moduli and elliptic curve parameters of the respective extensions:
+    /// The `app_vm_config` field of your `openvm.toml` must exactly match the following:
+    ///
+    /// ```toml
+    #[doc = include_str!("openvm_standard.toml")]
+    /// ```
+    pub fn standard() -> SdkVmConfig {
+        let bn_config = PairingCurve::Bn254.curve_config();
+        let bls_config = PairingCurve::Bls12_381.curve_config();
+        SdkVmConfig::builder()
+            .system(Default::default())
+            .rv32i(Default::default())
+            .rv32m(Default::default())
+            .io(Default::default())
+            .keccak(Default::default())
+            .sha256(Default::default())
+            .bigint(Default::default())
+            .modular(ModularExtension::new(vec![
+                bn_config.modulus.clone(),
+                bn_config.scalar.clone(),
+                SECP256K1_CONFIG.modulus.clone(),
+                SECP256K1_CONFIG.scalar.clone(),
+                P256_CONFIG.modulus.clone(),
+                P256_CONFIG.scalar.clone(),
+                bls_config.modulus.clone(),
+                bls_config.scalar.clone(),
+            ]))
+            .fp2(Fp2Extension::new(vec![
+                (
+                    BN254_COMPLEX_STRUCT_NAME.to_string(),
+                    bn_config.modulus.clone(),
+                ),
+                (
+                    BLS12_381_COMPLEX_STRUCT_NAME.to_string(),
+                    bls_config.modulus.clone(),
+                ),
+            ]))
+            .ecc(WeierstrassExtension::new(vec![
+                bn_config.clone(),
+                SECP256K1_CONFIG.clone(),
+                P256_CONFIG.clone(),
+                bls_config.clone(),
+            ]))
+            .pairing(PairingExtension::new(vec![
+                PairingCurve::Bn254,
+                PairingCurve::Bls12_381,
+            ]))
+            .build()
+            .optimize()
+    }
+
+    /// Configuration with RISC-V RV32IM and IO VM extensions loaded.
+    ///
+    /// **Note**: To use this configuration, your `openvm.toml` must exactly match the following:
+    ///
+    /// ```toml
+    #[doc = include_str!("openvm_riscv32.toml")]
+    /// ```
+    pub fn riscv32() -> Self {
+        SdkVmConfig::builder()
+            .system(Default::default())
+            .rv32i(Default::default())
+            .rv32m(Default::default())
+            .io(Default::default())
+            .build()
+            .optimize()
+    }
+
+    /// `openvm_toml` should be the TOML string read from an openvm.toml file.
+    pub fn from_toml(openvm_toml: &str) -> Result<AppConfig<Self>, toml::de::Error> {
+        toml::from_str(openvm_toml)
+    }
 }
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum SdkVmConfigPeriphery<F: PrimeField32> {
-    #[any_enum]
-    System(SystemPeriphery<F>),
-    #[any_enum]
-    Rv32i(Rv32IPeriphery<F>),
-    #[any_enum]
-    Io(Rv32IoPeriphery<F>),
-    #[any_enum]
-    Keccak(Keccak256Periphery<F>),
-    #[any_enum]
-    Sha256(Sha256Periphery<F>),
-    #[any_enum]
-    Native(NativePeriphery<F>),
-    #[any_enum]
-    Rv32m(Rv32MPeriphery<F>),
-    #[any_enum]
-    BigInt(Int256Periphery<F>),
-    #[any_enum]
-    Modular(ModularExtensionPeriphery<F>),
-    #[any_enum]
-    Fp2(Fp2ExtensionPeriphery<F>),
-    #[any_enum]
-    Pairing(PairingExtensionPeriphery<F>),
-    #[any_enum]
-    Ecc(WeierstrassExtensionPeriphery<F>),
-    #[any_enum]
-    CastF(CastFExtensionPeriphery<F>),
+impl AppConfig<SdkVmConfig> {
+    pub fn standard() -> Self {
+        Self::new(AppFriParams::default().fri_params, SdkVmConfig::standard())
+    }
+
+    pub fn riscv32() -> Self {
+        Self::new(AppFriParams::default().fri_params, SdkVmConfig::riscv32())
+    }
 }
 
-impl SdkVmConfig {
-    pub fn transpiler(&self) -> Transpiler<F> {
+impl TranspilerConfig<F> for SdkVmConfig {
+    fn transpiler(&self) -> Transpiler<F> {
         let mut transpiler = Transpiler::default();
         if self.rv32i.is_some() {
             transpiler = transpiler.with_extension(Rv32ITranspilerExtension);
@@ -165,80 +209,211 @@ impl SdkVmConfig {
     }
 }
 
-impl<F: PrimeField32> VmConfig<F> for SdkVmConfig {
-    type Executor = SdkVmConfigExecutor<F>;
-    type Periphery = SdkVmConfigPeriphery<F>;
-
-    fn system(&self) -> &SystemConfig {
+impl AsRef<SystemConfig> for SdkVmConfig {
+    fn as_ref(&self) -> &SystemConfig {
         &self.system.config
     }
+}
 
-    fn system_mut(&mut self) -> &mut SystemConfig {
+impl AsMut<SystemConfig> for SdkVmConfig {
+    fn as_mut(&mut self) -> &mut SystemConfig {
         &mut self.system.config
     }
+}
 
-    fn create_chip_complex(
+impl SdkVmConfig {
+    pub fn optimize(mut self) -> Self {
+        self.apply_optimizations();
+        self
+    }
+
+    /// Apply small optimizations to the configuration.
+    pub fn apply_optimizations(&mut self) {
+        if self.native.is_none() && self.castf.is_none() {
+            // There should be no need to write to native address space if Native extension and
+            // CastF extension are not enabled.
+            self.system.config.memory_config.addr_spaces[NATIVE_AS as usize].num_cells = 0;
+        }
+        let rv32m = self.rv32m.as_mut();
+        let bigint = self.bigint.as_mut();
+        if let (Some(bigint), Some(rv32m)) = (bigint, rv32m) {
+            rv32m.range_tuple_checker_sizes[0] =
+                rv32m.range_tuple_checker_sizes[0].max(bigint.range_tuple_checker_sizes[0]);
+            rv32m.range_tuple_checker_sizes[1] =
+                rv32m.range_tuple_checker_sizes[1].max(bigint.range_tuple_checker_sizes[1]);
+            bigint.range_tuple_checker_sizes = rv32m.range_tuple_checker_sizes;
+        }
+    }
+
+    pub fn to_inner(&self) -> SdkVmConfigInner {
+        let config = self.clone().optimize();
+        let system = config.system.config.clone();
+        let rv32i = config.rv32i.map(|_| Rv32I);
+        let io = config.io.map(|_| Rv32Io);
+        let keccak = config.keccak.map(|_| Keccak256);
+        let sha256 = config.sha256.map(|_| Sha256);
+        let native = config.native.map(|_| Native);
+        let castf = config.castf.map(|_| CastFExtension);
+        let rv32m = config.rv32m;
+        let bigint = config.bigint;
+        let modular = config.modular.clone();
+        let fp2 = config.fp2.clone();
+        let pairing = config.pairing.clone();
+        let ecc = config.ecc.clone();
+
+        SdkVmConfigInner {
+            system,
+            rv32i,
+            io,
+            keccak,
+            sha256,
+            native,
+            castf,
+            rv32m,
+            bigint,
+            modular,
+            fp2,
+            pairing,
+            ecc,
+        }
+    }
+}
+
+// ======================= Implementation of VmConfig and VmBuilder ====================
+
+/// SDK CPU VmBuilder
+#[derive(Copy, Clone, Default)]
+pub struct SdkVmCpuBuilder;
+
+/// Internal struct to use for the VmConfig derive macro.
+/// Can be obtained via [`SdkVmConfig::to_inner`].
+#[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
+pub struct SdkVmConfigInner {
+    #[config(executor = "SystemExecutor<F>")]
+    pub system: SystemConfig,
+    #[extension(executor = "Rv32IExecutor")]
+    pub rv32i: Option<Rv32I>,
+    #[extension(executor = "Rv32IoExecutor")]
+    pub io: Option<Rv32Io>,
+    #[extension(executor = "Keccak256Executor")]
+    pub keccak: Option<Keccak256>,
+    #[extension(executor = "Sha256Executor")]
+    pub sha256: Option<Sha256>,
+    #[extension(executor = "NativeExecutor<F>")]
+    pub native: Option<Native>,
+    #[extension(executor = "CastFExtensionExecutor")]
+    pub castf: Option<CastFExtension>,
+
+    #[extension(executor = "Rv32MExecutor")]
+    pub rv32m: Option<Rv32M>,
+    #[extension(executor = "Int256Executor")]
+    pub bigint: Option<Int256>,
+    #[extension(executor = "ModularExtensionExecutor")]
+    pub modular: Option<ModularExtension>,
+    #[extension(executor = "Fp2ExtensionExecutor")]
+    pub fp2: Option<Fp2Extension>,
+    #[extension(executor = "PairingExtensionExecutor<F>")]
+    pub pairing: Option<PairingExtension>,
+    #[extension(executor = "WeierstrassExtensionExecutor")]
+    pub ecc: Option<WeierstrassExtension>,
+}
+
+// Generated by macro
+pub type SdkVmConfigExecutor<F> = SdkVmConfigInnerExecutor<F>;
+
+impl<F: Field> VmExecutionConfig<F> for SdkVmConfig
+where
+    SdkVmConfigInner: VmExecutionConfig<F>,
+{
+    type Executor = <SdkVmConfigInner as VmExecutionConfig<F>>::Executor;
+
+    fn create_executors(
         &self,
-    ) -> Result<VmChipComplex<F, Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut complex = self.system.config.create_chip_complex()?.transmute();
+    ) -> Result<ExecutorInventory<Self::Executor>, ExecutorInventoryError> {
+        self.to_inner().create_executors()
+    }
+}
 
-        if self.rv32i.is_some() {
-            complex = complex.extend(&Rv32I)?;
+impl<SC: StarkGenericConfig> VmCircuitConfig<SC> for SdkVmConfig
+where
+    SdkVmConfigInner: VmCircuitConfig<SC>,
+{
+    fn create_airs(&self) -> Result<AirInventory<SC>, AirInventoryError> {
+        self.to_inner().create_airs()
+    }
+}
+
+impl<E, SC> VmBuilder<E> for SdkVmCpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = SdkVmConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &SdkVmConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let config = config.to_inner();
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        if let Some(rv32i) = &config.rv32i {
+            VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, rv32i, inventory)?;
         }
-        if self.io.is_some() {
-            complex = complex.extend(&Rv32Io)?;
+        if let Some(io) = &config.io {
+            VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, io, inventory)?;
         }
-        if self.keccak.is_some() {
-            complex = complex.extend(&Keccak256)?;
+        if let Some(keccak) = &config.keccak {
+            VmProverExtension::<E, _, _>::extend_prover(&Keccak256CpuProverExt, keccak, inventory)?;
         }
-        if self.sha256.is_some() {
-            complex = complex.extend(&Sha256)?;
+        if let Some(sha256) = &config.sha256 {
+            VmProverExtension::<E, _, _>::extend_prover(&Sha2CpuProverExt, sha256, inventory)?;
         }
-        if self.native.is_some() {
-            complex = complex.extend(&Native)?;
+        if let Some(native) = &config.native {
+            VmProverExtension::<E, _, _>::extend_prover(&NativeCpuProverExt, native, inventory)?;
         }
-        if self.castf.is_some() {
-            complex = complex.extend(&CastFExtension)?;
+        if let Some(castf) = &config.castf {
+            VmProverExtension::<E, _, _>::extend_prover(&NativeCpuProverExt, castf, inventory)?;
         }
 
-        if let Some(rv32m) = self.rv32m {
-            let mut rv32m = rv32m;
-            if let Some(ref bigint) = self.bigint {
-                rv32m.range_tuple_checker_sizes[0] =
-                    rv32m.range_tuple_checker_sizes[0].max(bigint.range_tuple_checker_sizes[0]);
-                rv32m.range_tuple_checker_sizes[1] =
-                    rv32m.range_tuple_checker_sizes[1].max(bigint.range_tuple_checker_sizes[1]);
-            }
-            complex = complex.extend(&rv32m)?;
+        if let Some(rv32m) = &config.rv32m {
+            VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, rv32m, inventory)?;
         }
-        if let Some(bigint) = self.bigint {
-            let mut bigint = bigint;
-            if let Some(ref rv32m) = self.rv32m {
-                bigint.range_tuple_checker_sizes[0] =
-                    rv32m.range_tuple_checker_sizes[0].max(bigint.range_tuple_checker_sizes[0]);
-                bigint.range_tuple_checker_sizes[1] =
-                    rv32m.range_tuple_checker_sizes[1].max(bigint.range_tuple_checker_sizes[1]);
-            }
-            complex = complex.extend(&bigint)?;
+        if let Some(bigint) = &config.bigint {
+            VmProverExtension::<E, _, _>::extend_prover(&Int256CpuProverExt, bigint, inventory)?;
         }
-        if let Some(ref modular) = self.modular {
-            complex = complex.extend(modular)?;
+        if let Some(modular) = &config.modular {
+            VmProverExtension::<E, _, _>::extend_prover(&AlgebraCpuProverExt, modular, inventory)?;
         }
-        if let Some(ref fp2) = self.fp2 {
-            complex = complex.extend(fp2)?;
+        if let Some(fp2) = &config.fp2 {
+            VmProverExtension::<E, _, _>::extend_prover(&AlgebraCpuProverExt, fp2, inventory)?;
         }
-        if let Some(ref pairing) = self.pairing {
-            complex = complex.extend(pairing)?;
+        if let Some(pairing) = &config.pairing {
+            VmProverExtension::<E, _, _>::extend_prover(&PairingProverExt, pairing, inventory)?;
         }
-        if let Some(ref ecc) = self.ecc {
-            complex = complex.extend(ecc)?;
+        if let Some(ecc) = &config.ecc {
+            VmProverExtension::<E, _, _>::extend_prover(&EccCpuProverExt, ecc, inventory)?;
         }
-
-        Ok(complex)
+        Ok(chip_complex)
     }
 }
 
+// ======================= Boilerplate ====================
+
 impl InitFileGenerator for SdkVmConfig {
+    fn generate_init_file_contents(&self) -> Option<String> {
+        self.to_inner().generate_init_file_contents()
+    }
+}
+impl InitFileGenerator for SdkVmConfigInner {
     fn generate_init_file_contents(&self) -> Option<String> {
         if self.modular.is_some() || self.fp2.is_some() || self.ecc.is_some() {
             let mut contents = String::new();
@@ -273,7 +448,7 @@ impl InitFileGenerator for SdkVmConfig {
     }
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Default, Serialize, Deserialize)]
 pub struct SdkSystemConfig {
     pub config: SystemConfig,
 }
@@ -281,14 +456,6 @@ pub struct SdkSystemConfig {
 // Default implementation uses no init file
 impl InitFileGenerator for SdkSystemConfig {}
 
-impl Default for SdkSystemConfig {
-    fn default() -> Self {
-        Self {
-            config: SystemConfig::default().with_continuations(),
-        }
-    }
-}
-
 impl From<SystemConfig> for SdkSystemConfig {
     fn from(config: SystemConfig) -> Self {
         Self { config }
@@ -335,3 +502,74 @@ impl From<CastFExtension> for UnitStruct {
         UnitStruct {}
     }
 }
+
+#[derive(Deserialize)]
+struct SdkVmConfigWithDefaultDeser {
+    #[serde(default)]
+    pub system: SdkSystemConfig,
+
+    pub rv32i: Option<UnitStruct>,
+    pub io: Option<UnitStruct>,
+    pub keccak: Option<UnitStruct>,
+    pub sha256: Option<UnitStruct>,
+    pub native: Option<UnitStruct>,
+    pub castf: Option<UnitStruct>,
+
+    pub rv32m: Option<Rv32M>,
+    pub bigint: Option<Int256>,
+    pub modular: Option<ModularExtension>,
+    pub fp2: Option<Fp2Extension>,
+    pub pairing: Option<PairingExtension>,
+    pub ecc: Option<WeierstrassExtension>,
+}
+
+impl From<SdkVmConfigWithDefaultDeser> for SdkVmConfig {
+    fn from(config: SdkVmConfigWithDefaultDeser) -> Self {
+        let ret = Self {
+            system: config.system,
+            rv32i: config.rv32i,
+            io: config.io,
+            keccak: config.keccak,
+            sha256: config.sha256,
+            native: config.native,
+            castf: config.castf,
+            rv32m: config.rv32m,
+            bigint: config.bigint,
+            modular: config.modular,
+            fp2: config.fp2,
+            pairing: config.pairing,
+            ecc: config.ecc,
+        };
+        ret.optimize()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use itertools::zip_eq;
+
+    use super::*;
+
+    #[test]
+    fn test_app_config_consistency() {
+        let toml_config = SdkVmConfig::from_toml(include_str!("./openvm_standard.toml")).unwrap();
+        for (line1, line2) in zip_eq(
+            toml::to_string_pretty(&AppConfig::standard())
+                .unwrap()
+                .lines(),
+            toml::to_string_pretty(&toml_config).unwrap().lines(),
+        ) {
+            assert_eq!(line1, line2);
+        }
+
+        let toml_config = SdkVmConfig::from_toml(include_str!("./openvm_riscv32.toml")).unwrap();
+        for (line1, line2) in zip_eq(
+            toml::to_string_pretty(&AppConfig::riscv32())
+                .unwrap()
+                .lines(),
+            toml::to_string_pretty(&toml_config).unwrap().lines(),
+        ) {
+            assert_eq!(line1, line2);
+        }
+    }
+}
diff --git a/crates/sdk/src/config/mod.rs b/crates/sdk/src/config/mod.rs
index 3a231f180d..615189cf91 100644
--- a/crates/sdk/src/config/mod.rs
+++ b/crates/sdk/src/config/mod.rs
@@ -6,6 +6,7 @@ use openvm_continuations::verifier::{
 use openvm_native_circuit::NativeConfig;
 use openvm_native_compiler::{conversion::CompilerOptions, ir::DIGEST_SIZE};
 use openvm_stark_sdk::config::FriParameters;
+use openvm_transpiler::transpiler::Transpiler;
 use serde::{Deserialize, Serialize};
 
 mod global;
@@ -15,12 +16,17 @@ pub const DEFAULT_APP_LOG_BLOWUP: usize = 1;
 pub const DEFAULT_LEAF_LOG_BLOWUP: usize = 1;
 pub const DEFAULT_INTERNAL_LOG_BLOWUP: usize = 2;
 pub const DEFAULT_ROOT_LOG_BLOWUP: usize = 3;
+pub const DEFAULT_HALO2_VERIFIER_K: usize = 23;
 
 // Aggregation Tree Defaults
 const DEFAULT_NUM_CHILDREN_LEAF: usize = 1;
 const DEFAULT_NUM_CHILDREN_INTERNAL: usize = 3;
 const DEFAULT_MAX_INTERNAL_WRAPPER_LAYERS: usize = 4;
 
+pub trait TranspilerConfig<F> {
+    fn transpiler(&self) -> Transpiler<F>;
+}
+
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct AppConfig<VC> {
     #[serde(default)]
@@ -33,16 +39,8 @@ pub struct AppConfig<VC> {
     pub compiler_options: CompilerOptions,
 }
 
-#[derive(Clone, Serialize, Deserialize)]
-pub struct AggConfig {
-    /// STARK aggregation config
-    pub agg_stark_config: AggStarkConfig,
-    /// STARK-to-SNARK and SNARK-to-SNARK aggregation config
-    pub halo2_config: Halo2Config,
-}
-
 #[derive(Clone, Copy, Debug, Serialize, Deserialize)]
-pub struct AggStarkConfig {
+pub struct AggregationConfig {
     pub max_num_user_public_values: usize,
     pub leaf_fri_params: FriParameters,
     pub internal_fri_params: FriParameters,
@@ -55,7 +53,7 @@ pub struct AggStarkConfig {
     pub root_max_constraint_degree: usize,
 }
 
-#[derive(Clone, Serialize, Deserialize)]
+#[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 pub struct Halo2Config {
     /// Log degree for the outer recursion verifier circuit.
     pub verifier_k: usize,
@@ -121,7 +119,7 @@ impl<VC> AppConfig<VC> {
     }
 }
 
-impl Default for AggStarkConfig {
+impl Default for AggregationConfig {
     fn default() -> Self {
         Self {
             max_num_user_public_values: DEFAULT_MAX_NUM_PUBLIC_VALUES,
@@ -141,20 +139,17 @@ impl Default for AggStarkConfig {
     }
 }
 
-impl Default for AggConfig {
+impl Default for Halo2Config {
     fn default() -> Self {
         Self {
-            agg_stark_config: AggStarkConfig::default(),
-            halo2_config: Halo2Config {
-                verifier_k: 24,
-                wrapper_k: None,
-                profiling: false,
-            },
+            verifier_k: DEFAULT_HALO2_VERIFIER_K,
+            wrapper_k: None,
+            profiling: false,
         }
     }
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub struct AppFriParams {
     pub fri_params: FriParameters,
 }
@@ -175,7 +170,7 @@ impl From<FriParameters> for AppFriParams {
     }
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize)]
+#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)]
 pub struct LeafFriParams {
     pub fri_params: FriParameters,
 }
@@ -196,9 +191,9 @@ impl From<FriParameters> for LeafFriParams {
     }
 }
 
-const SBOX_SIZE: usize = 7;
+pub const SBOX_SIZE: usize = 7;
 
-impl AggStarkConfig {
+impl AggregationConfig {
     pub fn leaf_vm_config(&self) -> NativeConfig {
         let mut config = NativeConfig::aggregation(
             VmVerifierPvs::<u8>::width(),
diff --git a/crates/sdk/src/config/openvm_riscv32.toml b/crates/sdk/src/config/openvm_riscv32.toml
new file mode 100644
index 0000000000..19a1e670e5
--- /dev/null
+++ b/crates/sdk/src/config/openvm_riscv32.toml
@@ -0,0 +1,3 @@
+[app_vm_config.rv32i]
+[app_vm_config.rv32m]
+[app_vm_config.io]
diff --git a/crates/sdk/src/config/openvm_standard.toml b/crates/sdk/src/config/openvm_standard.toml
new file mode 100644
index 0000000000..f1f9267191
--- /dev/null
+++ b/crates/sdk/src/config/openvm_standard.toml
@@ -0,0 +1,72 @@
+[app_vm_config.rv32i]
+[app_vm_config.rv32m]
+[app_vm_config.io]
+
+[app_vm_config.keccak]
+[app_vm_config.sha256]
+[app_vm_config.bigint]
+
+[app_vm_config.modular]
+supported_moduli = [
+    # bn254 (alt bn128)
+    "21888242871839275222246405745257275088696311157297823662689037894645226208583", # coordinate field
+    "21888242871839275222246405745257275088548364400416034343698204186575808495617", # scalar field
+    # secp256k1 (k256)
+    "115792089237316195423570985008687907853269984665640564039457584007908834671663", # coordinate field
+    "115792089237316195423570985008687907852837564279074904382605163141518161494337", # scalar field
+    # secp256r1 (p256)
+    "115792089210356248762697446949407573530086143415290314195533631308867097853951", # coordinate
+    "115792089210356248762697446949407573529996955224135760342422259061068512044369", # scalar
+    # bls12_381
+    "4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787", # coordinate field
+    "52435875175126190479447740508185965837690552500527637822603658699938581184513",                                       # scalar field
+]
+
+[app_vm_config.fp2]
+supported_moduli = [
+    [
+        "Bn254Fp2",
+        # bn254 (alt bn128)
+        "21888242871839275222246405745257275088696311157297823662689037894645226208583",
+    ],
+    # Bls12_381
+    [
+        "Bls12_381Fp2",
+        "4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787",
+    ],
+]
+
+# bn254 (alt bn128)
+[[app_vm_config.ecc.supported_curves]]
+struct_name = "Bn254G1Affine"
+modulus = "21888242871839275222246405745257275088696311157297823662689037894645226208583"
+scalar = "21888242871839275222246405745257275088548364400416034343698204186575808495617"
+a = "0"
+b = "3"
+
+# secp256k1 (k256)
+[[app_vm_config.ecc.supported_curves]]
+struct_name = "Secp256k1Point"
+modulus = "115792089237316195423570985008687907853269984665640564039457584007908834671663"
+scalar = "115792089237316195423570985008687907852837564279074904382605163141518161494337"
+a = "0"
+b = "7"
+
+# secp256r1 (p256)
+[[app_vm_config.ecc.supported_curves]]
+struct_name = "P256Point"
+modulus = "115792089210356248762697446949407573530086143415290314195533631308867097853951"
+scalar = "115792089210356248762697446949407573529996955224135760342422259061068512044369"
+a = "115792089210356248762697446949407573530086143415290314195533631308867097853948"
+b = "41058363725152142129326129780047268409114441015993725554835256314039467401291"
+
+# bls12_381
+[[app_vm_config.ecc.supported_curves]]
+struct_name = "Bls12_381G1Affine"
+modulus = "4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787"
+scalar = "52435875175126190479447740508185965837690552500527637822603658699938581184513"
+a = "0"
+b = "4"
+
+[app_vm_config.pairing]
+supported_curves = ["Bn254", "Bls12_381"]
diff --git a/crates/sdk/src/error.rs b/crates/sdk/src/error.rs
new file mode 100644
index 0000000000..05b690202f
--- /dev/null
+++ b/crates/sdk/src/error.rs
@@ -0,0 +1,39 @@
+use openvm_circuit::arch::{VirtualMachineError, VmVerificationError};
+use openvm_transpiler::transpiler::TranspilerError;
+use thiserror::Error;
+
+use crate::commit::CommitBytes;
+
+#[derive(Error, Debug)]
+pub enum SdkError {
+    #[error("I/O error: {0}")]
+    Io(#[from] std::io::Error),
+    #[error("Failed to build guest: code = {0}")]
+    BuildFailedWithCode(i32),
+    #[error("Failed to build guest (OPENVM_SKIP_BUILD is set)")]
+    BuildFailed,
+    #[error("SDK must set a transpiler")]
+    TranspilerNotAvailable,
+    #[error("Transpiler error: {0}")]
+    Transpiler(#[from] TranspilerError),
+    #[error("VM error: {0}")]
+    Vm(#[from] VirtualMachineError),
+    #[error("Invalid app exe commit: expected {expected}, actual {actual}")]
+    InvalidAppExeCommit {
+        expected: CommitBytes,
+        actual: CommitBytes,
+    },
+    #[error("Invalid app vm commit: expected {expected}, actual {actual}")]
+    InvalidAppVmCommit {
+        expected: CommitBytes,
+        actual: CommitBytes,
+    },
+    #[error("Other error: {0}")]
+    Other(eyre::Error),
+}
+
+impl From<VmVerificationError> for SdkError {
+    fn from(error: VmVerificationError) -> Self {
+        SdkError::Vm(error.into())
+    }
+}
diff --git a/crates/sdk/src/fs.rs b/crates/sdk/src/fs.rs
index e795eebc86..83e72b3ca1 100644
--- a/crates/sdk/src/fs.rs
+++ b/crates/sdk/src/fs.rs
@@ -4,110 +4,19 @@ use std::{
 };
 
 use eyre::{Report, Result};
-use openvm_circuit::arch::{instructions::exe::VmExe, ContinuationVmProof, VmConfig};
-use openvm_continuations::verifier::root::types::RootVmVerifierInput;
 #[cfg(feature = "evm-prove")]
 use openvm_native_recursion::halo2::wrapper::EvmVerifierByteCode;
 use serde::{de::DeserializeOwned, Serialize};
 
-use crate::{
-    codec::{Decode, Encode},
-    keygen::{AggStarkProvingKey, AppProvingKey, AppVerifyingKey},
-    F, SC,
-};
+use crate::codec::{Decode, Encode};
 #[cfg(feature = "evm-prove")]
-use crate::{
-    keygen::Halo2ProvingKey,
-    types::{EvmHalo2Verifier, EvmProof},
-    OPENVM_VERSION,
-};
+use crate::{types::EvmHalo2Verifier, OPENVM_VERSION};
 
 pub const EVM_HALO2_VERIFIER_INTERFACE_NAME: &str = "IOpenVmHalo2Verifier.sol";
 pub const EVM_HALO2_VERIFIER_PARENT_NAME: &str = "Halo2Verifier.sol";
 pub const EVM_HALO2_VERIFIER_BASE_NAME: &str = "OpenVmHalo2Verifier.sol";
 pub const EVM_VERIFIER_ARTIFACT_FILENAME: &str = "verifier.bytecode.json";
 
-pub fn read_exe_from_file<P: AsRef<Path>>(path: P) -> Result<VmExe<F>> {
-    read_from_file_bitcode(&path)
-}
-
-pub fn write_exe_to_file<P: AsRef<Path>>(exe: VmExe<F>, path: P) -> Result<()> {
-    write_to_file_bitcode(&path, exe)
-}
-
-pub fn read_app_pk_from_file<VC: VmConfig<F>, P: AsRef<Path>>(
-    path: P,
-) -> Result<AppProvingKey<VC>> {
-    read_from_file_bitcode(&path)
-}
-
-pub fn write_app_pk_to_file<VC: VmConfig<F>, P: AsRef<Path>>(
-    app_pk: AppProvingKey<VC>,
-    path: P,
-) -> Result<()> {
-    write_to_file_bitcode(&path, app_pk)
-}
-
-pub fn read_app_vk_from_file<P: AsRef<Path>>(path: P) -> Result<AppVerifyingKey> {
-    read_from_file_bitcode(&path)
-}
-
-pub fn write_app_vk_to_file<P: AsRef<Path>>(app_vk: AppVerifyingKey, path: P) -> Result<()> {
-    write_to_file_bitcode(&path, app_vk)
-}
-
-pub fn read_app_proof_from_file<P: AsRef<Path>>(path: P) -> Result<ContinuationVmProof<SC>> {
-    decode_from_file(&path)
-}
-
-pub fn write_app_proof_to_file<P: AsRef<Path>>(
-    proof: ContinuationVmProof<SC>,
-    path: P,
-) -> Result<()> {
-    encode_to_file(&path, proof)
-}
-
-pub fn read_root_verifier_input_from_file<P: AsRef<Path>>(
-    path: P,
-) -> Result<RootVmVerifierInput<SC>> {
-    decode_from_file(&path)
-}
-
-pub fn write_root_verifier_input_to_file<P: AsRef<Path>>(
-    input: RootVmVerifierInput<SC>,
-    path: P,
-) -> Result<()> {
-    encode_to_file(&path, input)
-}
-
-pub fn read_agg_stark_pk_from_file<P: AsRef<Path>>(path: P) -> Result<AggStarkProvingKey> {
-    read_from_file_bitcode(&path)
-}
-
-pub fn write_agg_stark_pk_to_file<P: AsRef<Path>>(pk: &AggStarkProvingKey, path: P) -> Result<()> {
-    write_to_file_bitcode(&path, pk)
-}
-
-#[cfg(feature = "evm-prove")]
-pub fn read_agg_halo2_pk_from_file<P: AsRef<Path>>(path: P) -> Result<Halo2ProvingKey> {
-    read_from_file_bitcode(&path)
-}
-
-#[cfg(feature = "evm-prove")]
-pub fn write_agg_halo2_pk_to_file<P: AsRef<Path>>(pk: &Halo2ProvingKey, path: P) -> Result<()> {
-    write_to_file_bitcode(&path, pk)
-}
-
-#[cfg(feature = "evm-prove")]
-pub fn read_evm_proof_from_file<P: AsRef<Path>>(path: P) -> Result<EvmProof> {
-    read_from_file_json(&path)
-}
-
-#[cfg(feature = "evm-prove")]
-pub fn write_evm_proof_to_file<P: AsRef<Path>>(proof: EvmProof, path: P) -> Result<()> {
-    write_to_file_json(&path, proof)
-}
-
 #[cfg(feature = "evm-prove")]
 pub fn read_evm_halo2_verifier_from_folder<P: AsRef<Path>>(folder: P) -> Result<EvmHalo2Verifier> {
     use std::fs::read_to_string;
@@ -193,7 +102,7 @@ pub fn write_object_to_file<T: Serialize, P: AsRef<Path>>(path: P, data: T) -> R
     write_to_file_bitcode(path, data)
 }
 
-pub fn read_from_file_bitcode<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
+fn read_from_file_bitcode<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> Result<T> {
     let ret = read(&path)
         .map_err(|e| read_error(&path, e.into()))
         .and_then(|data| {
@@ -202,7 +111,7 @@ pub fn read_from_file_bitcode<T: DeserializeOwned, P: AsRef<Path>>(path: P) -> R
     Ok(ret)
 }
 
-pub fn write_to_file_bitcode<T: Serialize, P: AsRef<Path>>(path: P, data: T) -> Result<()> {
+fn write_to_file_bitcode<T: Serialize, P: AsRef<Path>>(path: P, data: T) -> Result<()> {
     if let Some(parent) = path.as_ref().parent() {
         create_dir_all(parent).map_err(|e| write_error(&path, e.into()))?;
     }
diff --git a/crates/sdk/src/keygen/dummy.rs b/crates/sdk/src/keygen/dummy.rs
index 3fe2bcd300..4a9b20e4db 100644
--- a/crates/sdk/src/keygen/dummy.rs
+++ b/crates/sdk/src/keygen/dummy.rs
@@ -6,8 +6,9 @@ use openvm_circuit::{
             exe::VmExe, instruction::Instruction, program::Program, LocalOpcode,
             SystemOpcode::TERMINATE,
         },
-        ContinuationVmProof, SingleSegmentVmExecutor, VirtualMachine, VmComplexTraceHeights,
-        VmConfig, VmExecutor,
+        ContinuationVmProof, Executor, MatrixRecordArena, MeteredExecutor,
+        PreflightExecutionOutput, PreflightExecutor, SingleSegmentVmProver, SystemConfig,
+        VirtualMachine, VirtualMachineError, VmBuilder, VmExecutionConfig, PUBLIC_VALUES_AIR_ID,
     },
     system::program::trace::VmCommittedExe,
     utils::next_power_of_two_or_zero,
@@ -17,118 +18,134 @@ use openvm_continuations::verifier::{
     leaf::{types::LeafVmVerifierInput, LeafVmVerifierConfig},
     root::types::RootVmVerifierInput,
 };
-use openvm_native_circuit::NativeConfig;
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder, NATIVE_MAX_TRACE_HEIGHTS};
 use openvm_native_compiler::ir::DIGEST_SIZE;
 use openvm_native_recursion::hints::Hintable;
-use openvm_rv32im_circuit::Rv32ImConfig;
+use openvm_rv32im_circuit::{Rv32ImConfig, Rv32ImCpuBuilder};
+use openvm_stark_backend::{
+    p3_matrix::dense::RowMajorMatrix,
+    prover::{
+        cpu::CpuBackend,
+        types::{AirProvingContext, ProvingContext},
+    },
+};
 use openvm_stark_sdk::{
     config::{
         baby_bear_poseidon2::BabyBearPoseidon2Engine,
-        fri_params::standard_fri_params_with_100_bits_conjectured_security, FriParameters,
+        baby_bear_poseidon2_root::{BabyBearPoseidon2RootConfig, BabyBearPoseidon2RootEngine},
+        fri_params::standard_fri_params_with_100_bits_conjectured_security,
+        FriParameters,
     },
     engine::StarkFriEngine,
-    openvm_stark_backend::{
-        config::StarkGenericConfig, p3_field::FieldAlgebra, proof::Proof, Chip,
-    },
+    openvm_stark_backend::{p3_field::FieldAlgebra, proof::Proof},
 };
 
 use crate::{
-    prover::vm::{
-        local::VmLocalProver, types::VmProvingKey, ContinuationVmProver, SingleSegmentVmProver,
-    },
-    NonRootCommittedExe, F, SC,
+    prover::vm::{new_local_prover, types::VmProvingKey},
+    F, SC,
 };
 
+/// Given a dummy internal proof, which is the input to the root verifier circuit, we will run
+/// tracegen on the root verifier circuit to determine the trace heights. These trace heights will
+/// become the fixed trace heights that we **force** the root verifier circuit's trace matrices to
+/// have.
+///
 /// Returns:
 /// - trace heights ordered by AIR ID
-/// - internal ordering of trace heights.
 ///
 /// All trace heights are rounded to the next power of two (or 0 -> 0).
 pub(super) fn compute_root_proof_heights(
-    root_vm_config: NativeConfig,
-    root_exe: VmExe<F>,
+    root_vm: &mut VirtualMachine<BabyBearPoseidon2RootEngine, NativeCpuBuilder>,
+    root_committed_exe: &VmCommittedExe<BabyBearPoseidon2RootConfig>,
     dummy_internal_proof: &Proof<SC>,
-) -> (Vec<usize>, VmComplexTraceHeights) {
-    let num_user_public_values = root_vm_config.system.num_public_values - 2 * DIGEST_SIZE;
+) -> Result<Vec<u32>, VirtualMachineError> {
+    let num_public_values = root_vm.config().as_ref().num_public_values;
+    let num_user_public_values = num_public_values - 2 * DIGEST_SIZE;
     let root_input = RootVmVerifierInput {
         proofs: vec![dummy_internal_proof.clone()],
         public_values: vec![F::ZERO; num_user_public_values],
     };
-    let vm = SingleSegmentVmExecutor::new(root_vm_config);
-    let res = vm
-        .execute_and_compute_heights(root_exe, root_input.write())
-        .unwrap();
-    let air_heights: Vec<_> = res
-        .air_heights
+    // The following is the same as impl SingleSegmentVmProver for VmLocalProver except we stop
+    // after tracegen:
+    let mut trace_heights = NATIVE_MAX_TRACE_HEIGHTS.to_vec();
+    trace_heights[PUBLIC_VALUES_AIR_ID] = num_public_values as u32;
+    let state = root_vm.create_initial_state(&root_committed_exe.exe, root_input.write());
+    let cached_program_trace = root_vm.transport_committed_exe_to_device(root_committed_exe);
+    root_vm.load_program(cached_program_trace);
+    root_vm.transport_init_memory_to_device(&state.memory);
+    let mut preflight_interpreter = root_vm.preflight_interpreter(&root_committed_exe.exe)?;
+    let PreflightExecutionOutput {
+        system_records,
+        record_arenas,
+        ..
+    } = root_vm.execute_preflight(&mut preflight_interpreter, state, None, &trace_heights)?;
+    let ctx = root_vm.generate_proving_ctx(system_records, record_arenas)?;
+    let air_heights = ctx
         .into_iter()
-        .map(next_power_of_two_or_zero)
+        .map(|(_, air_ctx)| {
+            next_power_of_two_or_zero(air_ctx.main_trace_height())
+                .try_into()
+                .unwrap()
+        })
         .collect();
-    let mut vm_heights = res.vm_heights;
-    vm_heights.round_to_next_power_of_two_or_zero();
-    (air_heights, vm_heights)
+    Ok(air_heights)
 }
 
 pub(super) fn dummy_internal_proof(
     internal_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
-    internal_exe: Arc<NonRootCommittedExe>,
+    internal_committed_exe: Arc<VmCommittedExe<SC>>,
     leaf_proof: Proof<SC>,
-) -> Proof<SC> {
+) -> Result<Proof<SC>, VirtualMachineError> {
     let mut internal_inputs = InternalVmVerifierInput::chunk_leaf_or_internal_proofs(
-        internal_exe.get_program_commit().into(),
+        internal_committed_exe.get_program_commit().into(),
         &[leaf_proof],
         1,
     );
     let internal_input = internal_inputs.pop().unwrap();
-    let internal_prover = VmLocalProver::<SC, NativeConfig, BabyBearPoseidon2Engine>::new(
-        internal_vm_pk,
-        internal_exe,
-    );
-    SingleSegmentVmProver::prove(&internal_prover, internal_input.write())
+    let mut internal_prover = new_local_prover::<BabyBearPoseidon2Engine, _>(
+        NativeCpuBuilder,
+        &internal_vm_pk,
+        internal_committed_exe.exe.clone(),
+    )?;
+    SingleSegmentVmProver::prove(
+        &mut internal_prover,
+        internal_input.write(),
+        NATIVE_MAX_TRACE_HEIGHTS,
+    )
 }
 
 pub(super) fn dummy_internal_proof_riscv_app_vm(
     leaf_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
     internal_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
-    internal_exe: Arc<NonRootCommittedExe>,
+    internal_exe: Arc<VmCommittedExe<SC>>,
     num_public_values: usize,
-) -> Proof<SC> {
+) -> Result<Proof<SC>, VirtualMachineError> {
     let fri_params = standard_fri_params_with_100_bits_conjectured_security(1);
-    let leaf_proof = dummy_leaf_proof_riscv_app_vm(leaf_vm_pk, num_public_values, fri_params);
+    let leaf_proof = dummy_leaf_proof_riscv_app_vm(leaf_vm_pk, num_public_values, fri_params)?;
     dummy_internal_proof(internal_vm_pk, internal_exe, leaf_proof)
 }
 
-#[allow(dead_code)]
-pub fn dummy_leaf_proof<VC: VmConfig<F>>(
-    leaf_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
-    app_vm_pk: Arc<VmProvingKey<SC, VC>>,
-    overridden_heights: Option<VmComplexTraceHeights>,
-) -> Proof<SC>
-where
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
-{
-    let app_proof = dummy_app_proof_impl(app_vm_pk.clone(), overridden_heights);
-    dummy_leaf_proof_impl(leaf_vm_pk, app_vm_pk, &app_proof)
-}
-
 pub(super) fn dummy_leaf_proof_riscv_app_vm(
     leaf_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
     num_public_values: usize,
     app_fri_params: FriParameters,
-) -> Proof<SC> {
-    let app_vm_pk = Arc::new(dummy_riscv_app_vm_pk(num_public_values, app_fri_params));
-    let app_proof = dummy_app_proof_impl(app_vm_pk.clone(), None);
-    dummy_leaf_proof_impl(leaf_vm_pk, app_vm_pk, &app_proof)
+) -> Result<Proof<SC>, VirtualMachineError> {
+    let app_vm_pk = Arc::new(dummy_riscv_app_vm_pk(num_public_values, app_fri_params)?);
+    let app_proof = dummy_app_proof(Rv32ImCpuBuilder, app_vm_pk.clone())?;
+    dummy_leaf_proof(leaf_vm_pk, app_vm_pk, &app_proof)
 }
 
-fn dummy_leaf_proof_impl<VC: VmConfig<F>>(
+fn dummy_leaf_proof<VC>(
     leaf_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
     app_vm_pk: Arc<VmProvingKey<SC, VC>>,
     app_proof: &ContinuationVmProof<SC>,
-) -> Proof<SC> {
+) -> Result<Proof<SC>, VirtualMachineError>
+where
+    VC: AsRef<SystemConfig>,
+{
     let leaf_program = LeafVmVerifierConfig {
         app_fri_params: app_vm_pk.fri_params,
-        app_system_config: app_vm_pk.vm_config.system().clone(),
+        app_system_config: app_vm_pk.vm_config.as_ref().clone(),
         compiler_options: Default::default(),
     }
     .build_program(&app_vm_pk.vm_pk.get_vk());
@@ -137,74 +154,60 @@ fn dummy_leaf_proof_impl<VC: VmConfig<F>>(
         1,
         "Dummy proof should only have 1 segment"
     );
-    let e = BabyBearPoseidon2Engine::new(leaf_vm_pk.fri_params);
-    let leaf_exe = Arc::new(VmCommittedExe::<SC>::commit(
-        leaf_program.into(),
-        e.config.pcs(),
-    ));
-    let leaf_prover =
-        VmLocalProver::<SC, NativeConfig, BabyBearPoseidon2Engine>::new(leaf_vm_pk, leaf_exe);
+    let leaf_exe = Arc::new(VmExe::new(leaf_program));
+    let mut leaf_prover =
+        new_local_prover::<BabyBearPoseidon2Engine, _>(NativeCpuBuilder, &leaf_vm_pk, leaf_exe)?;
     let mut leaf_inputs = LeafVmVerifierInput::chunk_continuation_vm_proof(app_proof, 1);
     let leaf_input = leaf_inputs.pop().unwrap();
-    SingleSegmentVmProver::prove(&leaf_prover, leaf_input.write_to_stream())
+    SingleSegmentVmProver::prove(
+        &mut leaf_prover,
+        leaf_input.write_to_stream(),
+        NATIVE_MAX_TRACE_HEIGHTS,
+    )
 }
 
 fn dummy_riscv_app_vm_pk(
     num_public_values: usize,
     fri_params: FriParameters,
-) -> VmProvingKey<SC, Rv32ImConfig> {
+) -> Result<VmProvingKey<SC, Rv32ImConfig>, VirtualMachineError> {
     let vm_config = Rv32ImConfig::with_public_values(num_public_values);
-    let vm = VirtualMachine::new(BabyBearPoseidon2Engine::new(fri_params), vm_config.clone());
-    let vm_pk = vm.keygen();
-    VmProvingKey {
+    let (_, vm_pk) = VirtualMachine::new_with_keygen(
+        BabyBearPoseidon2Engine::new(fri_params),
+        Rv32ImCpuBuilder,
+        vm_config.clone(),
+    )?;
+    Ok(VmProvingKey {
         fri_params,
         vm_config,
         vm_pk,
-    }
+    })
 }
 
-fn dummy_app_proof_impl<VC: VmConfig<F>>(
+fn dummy_app_proof<VB, VC>(
+    app_vm_builder: VB,
     app_vm_pk: Arc<VmProvingKey<SC, VC>>,
-    overridden_heights: Option<VmComplexTraceHeights>,
-) -> ContinuationVmProof<SC>
+) -> Result<ContinuationVmProof<SC>, VirtualMachineError>
 where
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
+    VB: VmBuilder<BabyBearPoseidon2Engine, VmConfig = VC, RecordArena = MatrixRecordArena<F>>,
+    VC: VmExecutionConfig<F>,
+    <VC as VmExecutionConfig<F>>::Executor: Executor<F> + MeteredExecutor<F> + PreflightExecutor<F>,
 {
-    let fri_params = app_vm_pk.fri_params;
-    let dummy_exe = dummy_app_committed_exe(fri_params);
-    // Enforce each AIR to have at least 1 row.
-    let overridden_heights = if let Some(overridden_heights) = overridden_heights {
-        overridden_heights
-    } else {
-        // We first execute once to get the trace heights from dummy_exe, then pad to powers of 2
-        // (forcing trace height 0 to 1)
-        let executor = VmExecutor::new(app_vm_pk.vm_config.clone());
-        let mut results = executor
-            .execute_segments(dummy_exe.exe.clone(), vec![])
-            .unwrap();
-        // ASSUMPTION: the dummy exe has only 1 segment
-        assert_eq!(results.len(), 1, "dummy exe should have only 1 segment");
-        let mut result = results.pop().unwrap();
-        result.chip_complex.finalize_memory();
-        let mut vm_heights = result.chip_complex.get_internal_trace_heights();
-        vm_heights.round_to_next_power_of_two();
-        vm_heights
+    let dummy_exe = Arc::new(VmExe::new(dummy_app_program()));
+    let mut app_prover =
+        new_local_prover::<BabyBearPoseidon2Engine, VB>(app_vm_builder, &app_vm_pk, dummy_exe)?;
+    // Force all AIRs to have non-empty trace matrices (height 0 -> height 1)
+    let modify_ctx = |_seg_idx: usize, ctx: &mut ProvingContext<CpuBackend<SC>>| {
+        for (i, pk) in app_vm_pk.vm_pk.per_air.iter().enumerate() {
+            let width = pk.vk.params.width.common_main;
+            if ctx.per_air[i].0 != i {
+                let dummy_trace = RowMajorMatrix::new_row(F::zero_vec(width));
+                let dummy_ctx = AirProvingContext::simple_no_pis(Arc::new(dummy_trace));
+                ctx.per_air.insert(i, (i, dummy_ctx));
+            }
+        }
     };
-    // For the dummy proof, we must override the trace heights.
-    let app_prover =
-        VmLocalProver::<SC, VC, BabyBearPoseidon2Engine>::new_with_overridden_trace_heights(
-            app_vm_pk,
-            dummy_exe,
-            Some(overridden_heights),
-        );
-    ContinuationVmProver::prove(&app_prover, vec![])
-}
-
-fn dummy_app_committed_exe(fri_params: FriParameters) -> Arc<NonRootCommittedExe> {
-    let program = dummy_app_program();
-    let e = BabyBearPoseidon2Engine::new(fri_params);
-    Arc::new(VmCommittedExe::<SC>::commit(program.into(), e.config.pcs()))
+    let dummy_proof = app_prover.prove_continuations(vec![], modify_ctx)?;
+    Ok(dummy_proof)
 }
 
 fn dummy_app_program() -> Program<F> {
diff --git a/crates/sdk/src/keygen/mod.rs b/crates/sdk/src/keygen/mod.rs
index 0806cc6f3d..8b12782c51 100644
--- a/crates/sdk/src/keygen/mod.rs
+++ b/crates/sdk/src/keygen/mod.rs
@@ -1,18 +1,19 @@
 use std::sync::Arc;
 
 use derivative::Derivative;
-use dummy::{compute_root_proof_heights, dummy_internal_proof_riscv_app_vm};
+// use dummy::{compute_root_proof_heights, dummy_internal_proof_riscv_app_vm};
 use openvm_circuit::{
-    arch::{VirtualMachine, VmComplexTraceHeights, VmConfig},
-    system::{memory::dimensions::MemoryDimensions, program::trace::VmCommittedExe},
+    arch::{AirInventoryError, SystemConfig, VirtualMachine, VirtualMachineError, VmCircuitConfig},
+    system::memory::dimensions::MemoryDimensions,
 };
 use openvm_continuations::verifier::{
     internal::InternalVmVerifierConfig, leaf::LeafVmVerifierConfig, root::RootVmVerifierConfig,
 };
-use openvm_native_circuit::NativeConfig;
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder};
 use openvm_native_compiler::ir::DIGEST_SIZE;
 use openvm_stark_backend::{
     config::Val,
+    engine::StarkEngine,
     p3_field::{FieldExtensionAlgebra, PrimeField32, TwoAdicField},
 };
 use openvm_stark_sdk::{
@@ -25,15 +26,12 @@ use openvm_stark_sdk::{
         config::{Com, StarkGenericConfig},
         keygen::types::MultiStarkVerifyingKey,
         proof::Proof,
-        Chip,
     },
-    p3_bn254_fr::Bn254Fr,
 };
 use serde::{Deserialize, Serialize};
-use tracing::info_span;
+use tracing::{info_span, instrument};
 #[cfg(feature = "evm-prove")]
 use {
-    crate::config::AggConfig,
     openvm_continuations::static_verifier::StaticVerifierPvHandler,
     openvm_native_recursion::halo2::{
         utils::Halo2ParamsReader, verifier::Halo2VerifierProvingKey,
@@ -41,12 +39,18 @@ use {
     },
 };
 
+#[cfg(feature = "evm-prove")]
+use crate::config::Halo2Config;
 use crate::{
-    commit::babybear_digest_to_bn254,
-    config::{AggStarkConfig, AppConfig},
-    keygen::perm::AirIdPermutation,
+    commit::VmCommittedExe,
+    config::{AggregationConfig, AppConfig},
+    keygen::{
+        dummy::{compute_root_proof_heights, dummy_internal_proof_riscv_app_vm},
+        perm::AirIdPermutation,
+    },
     prover::vm::types::VmProvingKey,
-    NonRootCommittedExe, RootSC, F, SC,
+    util::check_max_constraint_degrees,
+    RootSC, SC,
 };
 
 pub mod asm;
@@ -55,62 +59,78 @@ pub mod perm;
 #[cfg(feature = "evm-prove")]
 pub mod static_verifier;
 
+/// This is lightweight to clone as it contains smart pointers to the proving keys.
 #[derive(Clone, Serialize, Deserialize)]
 pub struct AppProvingKey<VC> {
-    pub leaf_committed_exe: Arc<NonRootCommittedExe>,
+    /// The committed executable of the leaf verifier program that verifies proofs of the App VM
+    /// circuit. The App VM circuit constraints are statically compiled into this executable.
+    pub leaf_committed_exe: Arc<VmCommittedExe<SC>>,
     pub leaf_fri_params: FriParameters,
     pub app_vm_pk: Arc<VmProvingKey<SC, VC>>,
 }
 
 #[derive(Clone, Serialize, Deserialize)]
 pub struct AppVerifyingKey {
+    /// We store the FRI parameters used to generate the proof separately.
     pub fri_params: FriParameters,
-    pub app_vm_vk: MultiStarkVerifyingKey<SC>,
+    /// STARK backend verifying key
+    pub vk: MultiStarkVerifyingKey<SC>,
     pub memory_dimensions: MemoryDimensions,
 }
 
-#[cfg(feature = "evm-prove")]
+/// The STARK proving keys necessary for aggregation of app proofs into a single aggregate STARK
+/// proof.
+///
+/// This is lightweight to clone as it contains smart pointers to the proving keys.
 #[derive(Clone, Serialize, Deserialize)]
 pub struct AggProvingKey {
-    pub agg_stark_pk: AggStarkProvingKey,
-    pub halo2_pk: Halo2ProvingKey,
-}
-
-#[derive(Clone, Serialize, Deserialize)]
-pub struct AggStarkProvingKey {
     pub leaf_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
     pub internal_vm_pk: Arc<VmProvingKey<SC, NativeConfig>>,
-    pub internal_committed_exe: Arc<NonRootCommittedExe>,
+    pub internal_committed_exe: Arc<VmCommittedExe<SC>>,
     pub root_verifier_pk: RootVerifierProvingKey,
 }
 
-/// Attention: the size of this struct is VERY large, usually >10GB.
+#[derive(Clone, Serialize, Deserialize)]
+pub struct AggVerifyingKey {
+    pub(super) leaf_fri_params: FriParameters,
+    pub(super) leaf_vk: MultiStarkVerifyingKey<SC>,
+    /// FRI parameters used to generate the last internal proof.
+    pub(super) internal_fri_params: FriParameters,
+    pub(super) internal_vk: MultiStarkVerifyingKey<SC>,
+    pub(super) internal_verifier_program_commit: Com<SC>,
+}
+
+/// Attention: the serialized size of this struct is VERY large, usually >10GB.
+///
+/// This is lightweight to clone as it contains smart pointers to the proving keys.
 #[cfg(feature = "evm-prove")]
 #[derive(Clone, Serialize, Deserialize)]
 pub struct Halo2ProvingKey {
     /// Static verifier to verify a stark proof of the root verifier.
-    pub verifier: Halo2VerifierProvingKey,
+    pub verifier: Arc<Halo2VerifierProvingKey>,
     /// Wrapper circuit to verify static verifier and reduce the verification costs in the final
     /// proof.
-    pub wrapper: Halo2WrapperProvingKey,
+    pub wrapper: Arc<Halo2WrapperProvingKey>,
     /// Whether to collect detailed profiling metrics
     pub profiling: bool,
 }
 
-impl<VC: VmConfig<F>> AppProvingKey<VC>
+impl<VC> AppProvingKey<VC>
 where
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
+    VC: Clone + VmCircuitConfig<SC> + AsRef<SystemConfig>,
 {
-    pub fn keygen(config: AppConfig<VC>) -> Self {
+    pub fn keygen(config: AppConfig<VC>) -> Result<Self, AirInventoryError> {
         let app_engine = BabyBearPoseidon2Engine::new(config.app_fri_params.fri_params);
         let app_vm_pk = {
-            let vm = VirtualMachine::new(app_engine, config.app_vm_config.clone());
-            let vm_pk = vm.keygen();
+            let vm_pk = config.app_vm_config.create_airs()?.keygen(&app_engine);
             assert!(
                 vm_pk.max_constraint_degree
                     <= config.app_fri_params.fri_params.max_constraint_degree()
             );
+            check_max_constraint_degrees(
+                config.app_vm_config.as_ref(),
+                &config.app_fri_params.fri_params,
+            );
             VmProvingKey {
                 fri_params: config.app_fri_params.fri_params,
                 vm_config: config.app_vm_config.clone(),
@@ -126,49 +146,58 @@ where
             let leaf_engine = BabyBearPoseidon2Engine::new(config.leaf_fri_params.fri_params);
             let leaf_program = LeafVmVerifierConfig {
                 app_fri_params: config.app_fri_params.fri_params,
-                app_system_config: config.app_vm_config.system().clone(),
+                app_system_config: config.app_vm_config.as_ref().clone(),
                 compiler_options: config.compiler_options,
             }
             .build_program(&app_vm_pk.vm_pk.get_vk());
             Arc::new(VmCommittedExe::commit(
                 leaf_program.into(),
-                leaf_engine.config.pcs(),
+                leaf_engine.config().pcs(),
             ))
         };
-        Self {
+        Ok(Self {
             leaf_committed_exe,
             leaf_fri_params: config.leaf_fri_params.fri_params,
             app_vm_pk: Arc::new(app_vm_pk),
-        }
+        })
     }
 
     pub fn num_public_values(&self) -> usize {
-        self.app_vm_pk.vm_config.system().num_public_values
+        self.app_vm_pk.vm_config.as_ref().num_public_values
     }
 
     pub fn get_app_vk(&self) -> AppVerifyingKey {
         AppVerifyingKey {
             fri_params: self.app_vm_pk.fri_params,
-            app_vm_vk: self.app_vm_pk.vm_pk.get_vk(),
+            vk: self.app_vm_pk.vm_pk.get_vk(),
             memory_dimensions: self
                 .app_vm_pk
                 .vm_config
-                .system()
+                .as_ref()
                 .memory_config
                 .memory_dimensions(),
         }
     }
 
+    pub fn leaf_verifier_program_commit(&self) -> Com<SC> {
+        self.leaf_committed_exe.get_program_commit()
+    }
+
     pub fn app_fri_params(&self) -> FriParameters {
         self.app_vm_pk.fri_params
     }
 
-    pub fn commit_in_bn254(&self) -> Bn254Fr {
-        babybear_digest_to_bn254(&self.commit_in_babybear())
+    pub fn vm_config(&self) -> &VC {
+        &self.app_vm_pk.vm_config
     }
 
-    pub fn commit_in_babybear(&self) -> [F; DIGEST_SIZE] {
-        self.leaf_committed_exe.get_program_commit().into()
+    pub fn app_config(&self) -> AppConfig<VC> {
+        AppConfig {
+            app_fri_params: self.app_fri_params().into(),
+            app_vm_config: self.vm_config().clone(),
+            leaf_fri_params: self.leaf_fri_params.into(),
+            compiler_options: Default::default(),
+        }
     }
 }
 
@@ -255,28 +284,40 @@ fn check_recursive_verifier_size<SC: StarkGenericConfig>(
     }
 }
 
-impl AggStarkProvingKey {
-    pub fn keygen(config: AggStarkConfig) -> Self {
-        tracing::info_span!("agg_stark_keygen", group = "agg_stark_keygen")
-            .in_scope(|| Self::dummy_proof_and_keygen(config).0)
+impl AggProvingKey {
+    #[instrument(
+        name = "agg_stark_keygen",
+        fields(group = "agg_stark_keygen"),
+        skip_all
+    )]
+    pub fn keygen(config: AggregationConfig) -> Result<Self, VirtualMachineError> {
+        let (pk, _) = Self::dummy_proof_and_keygen(config)?;
+        Ok(pk)
     }
 
-    pub fn dummy_proof_and_keygen(config: AggStarkConfig) -> (Self, Proof<SC>) {
+    #[tracing::instrument(level = "info", fields(group = "agg_keygen"), skip_all)]
+    pub(crate) fn dummy_proof_and_keygen(
+        config: AggregationConfig,
+    ) -> Result<(Self, Proof<SC>), VirtualMachineError> {
         let leaf_vm_config = config.leaf_vm_config();
         let internal_vm_config = config.internal_vm_config();
         let root_vm_config = config.root_verifier_vm_config();
 
         let leaf_engine = BabyBearPoseidon2Engine::new(config.leaf_fri_params);
-        let leaf_vm_pk = Arc::new({
-            let vm = VirtualMachine::new(leaf_engine, leaf_vm_config.clone());
-            let vm_pk = vm.keygen();
+        let leaf_vm_pk = {
+            let (_, vm_pk) = VirtualMachine::new_with_keygen(
+                leaf_engine,
+                NativeCpuBuilder,
+                leaf_vm_config.clone(),
+            )?;
             assert!(vm_pk.max_constraint_degree <= config.leaf_fri_params.max_constraint_degree());
-            VmProvingKey {
+            check_max_constraint_degrees(&leaf_vm_config.system, &config.leaf_fri_params);
+            Arc::new(VmProvingKey {
                 fri_params: config.leaf_fri_params,
                 vm_config: leaf_vm_config,
                 vm_pk,
-            }
-        });
+            })
+        };
         let leaf_vm_vk = leaf_vm_pk.vm_pk.get_vk();
         check_recursive_verifier_size(
             &leaf_vm_vk,
@@ -285,17 +326,17 @@ impl AggStarkProvingKey {
         );
 
         let internal_engine = BabyBearPoseidon2Engine::new(config.internal_fri_params);
-        let internal_vm = VirtualMachine::new(internal_engine, internal_vm_config.clone());
-        let internal_vm_pk = Arc::new({
-            let vm_pk = internal_vm.keygen();
-            assert!(
-                vm_pk.max_constraint_degree <= config.internal_fri_params.max_constraint_degree()
-            );
-            VmProvingKey {
-                fri_params: config.internal_fri_params,
-                vm_config: internal_vm_config,
-                vm_pk,
-            }
+        let (internal_vm, vm_pk) = VirtualMachine::new_with_keygen(
+            internal_engine,
+            NativeCpuBuilder,
+            internal_vm_config.clone(),
+        )?;
+        check_max_constraint_degrees(&internal_vm_config.system, &config.internal_fri_params);
+        assert!(vm_pk.max_constraint_degree <= config.internal_fri_params.max_constraint_degree());
+        let internal_vm_pk = Arc::new(VmProvingKey {
+            fri_params: config.internal_fri_params,
+            vm_config: internal_vm_config,
+            vm_pk,
         });
         let internal_vm_vk = internal_vm_pk.vm_pk.get_vk();
         check_recursive_verifier_size(
@@ -310,9 +351,9 @@ impl AggStarkProvingKey {
             compiler_options: config.compiler_options,
         }
         .build_program(&leaf_vm_vk, &internal_vm_vk);
-        let internal_committed_exe = Arc::new(VmCommittedExe::<SC>::commit(
+        let internal_committed_exe = Arc::new(VmCommittedExe::commit(
             internal_program.into(),
-            internal_vm.engine.config.pcs(),
+            internal_vm.engine.config().pcs(),
         ));
 
         let internal_proof = dummy_internal_proof_riscv_app_vm(
@@ -320,7 +361,7 @@ impl AggStarkProvingKey {
             internal_vm_pk.clone(),
             internal_committed_exe.clone(),
             config.max_num_user_public_values,
-        );
+        )?;
 
         let root_verifier_pk = {
             let mut root_engine = BabyBearPoseidon2RootEngine::new(config.root_fri_params);
@@ -333,22 +374,27 @@ impl AggStarkProvingKey {
                 compiler_options: config.compiler_options,
             }
             .build_program(&leaf_vm_vk, &internal_vm_vk);
-            let root_committed_exe = Arc::new(VmCommittedExe::<RootSC>::commit(
+            let (mut vm, mut vm_pk) = VirtualMachine::new_with_keygen(
+                root_engine,
+                NativeCpuBuilder,
+                root_vm_config.clone(),
+            )?;
+            let root_committed_exe = Arc::new(VmCommittedExe::commit(
                 root_program.into(),
-                root_engine.config.pcs(),
+                vm.engine.config().pcs(),
             ));
 
-            let vm = VirtualMachine::new(root_engine, root_vm_config.clone());
-            let mut vm_pk = vm.keygen();
             assert!(vm_pk.max_constraint_degree <= config.root_fri_params.max_constraint_degree());
 
-            let (air_heights, vm_heights) = compute_root_proof_heights(
-                root_vm_config.clone(),
-                root_committed_exe.exe.clone(),
-                &internal_proof,
-            );
+            let air_heights =
+                compute_root_proof_heights(&mut vm, &root_committed_exe, &internal_proof)?;
             let root_air_perm = AirIdPermutation::compute(&air_heights);
+            // ATTENTION: make sure to permute everything in vm_pk that references the original AIR
+            // ID ordering:
             root_air_perm.permute(&mut vm_pk.per_air);
+            for thc in &mut vm_pk.trace_height_constraints {
+                root_air_perm.permute(&mut thc.coefficients);
+            }
 
             RootVerifierProvingKey {
                 vm_pk: Arc::new(VmProvingKey {
@@ -358,10 +404,9 @@ impl AggStarkProvingKey {
                 }),
                 root_committed_exe,
                 air_heights,
-                vm_heights,
             }
         };
-        (
+        Ok((
             Self {
                 leaf_vm_pk,
                 internal_vm_pk,
@@ -369,11 +414,22 @@ impl AggStarkProvingKey {
                 root_verifier_pk,
             },
             internal_proof,
-        )
+        ))
     }
 
-    pub fn internal_program_commit(&self) -> [F; DIGEST_SIZE] {
-        self.internal_committed_exe.get_program_commit().into()
+    pub fn get_agg_vk(&self) -> AggVerifyingKey {
+        let leaf_fri_params = self.leaf_vm_pk.fri_params;
+        let leaf_vk = self.leaf_vm_pk.vm_pk.get_vk();
+        let internal_fri_params = self.internal_vm_pk.fri_params;
+        let internal_vk = self.internal_vm_pk.vm_pk.get_vk();
+        let internal_verifier_program_commit = self.internal_committed_exe.get_program_commit();
+        AggVerifyingKey {
+            leaf_fri_params,
+            leaf_vk,
+            internal_fri_params,
+            internal_vk,
+            internal_verifier_program_commit,
+        }
     }
 
     pub fn num_user_public_values(&self) -> usize {
@@ -389,7 +445,7 @@ impl AggStarkProvingKey {
 /// Proving key for the root verifier.
 /// Properties:
 /// - Traces heights of each AIR is constant. This is required by the static verifier.
-/// - Instead of the AIR order specified by VC. AIRs are ordered by trace heights.
+/// - Instead of the AIR order specified by VmConfig. AIRs are ordered by trace heights.
 #[derive(Serialize, Deserialize, Derivative)]
 #[derivative(Clone(bound = "Com<SC>: Clone"))]
 pub struct RootVerifierProvingKey {
@@ -400,39 +456,34 @@ pub struct RootVerifierProvingKey {
     pub vm_pk: Arc<VmProvingKey<RootSC, NativeConfig>>,
     /// Committed executable for the root VM.
     pub root_committed_exe: Arc<VmCommittedExe<RootSC>>,
-    /// The constant trace heights, ordered by AIR ID.
-    pub air_heights: Vec<usize>,
-    /// The constant trace heights in a semantic way for VM.
-    pub vm_heights: VmComplexTraceHeights,
+    /// The constant trace heights, ordered by AIR ID (the original ordering from VmConfig).
+    pub air_heights: Vec<u32>,
 }
 
+#[cfg(feature = "evm-prove")]
 impl RootVerifierProvingKey {
-    pub fn air_id_permutation(&self) -> AirIdPermutation {
+    pub(crate) fn air_id_permutation(&self) -> AirIdPermutation {
         AirIdPermutation::compute(&self.air_heights)
     }
 }
 
 #[cfg(feature = "evm-prove")]
-impl AggProvingKey {
+impl Halo2ProvingKey {
     /// Attention:
     /// - This function is very expensive. Usually it requires >64GB memory and takes >10 minutes.
-    /// - Please make sure SRS(KZG parameters) is already downloaded.
-    #[tracing::instrument(level = "info", fields(group = "agg_keygen"), skip_all)]
+    ///   /// - Please make sure SRS(KZG parameters) is already downloaded.
+    #[tracing::instrument(level = "info", fields(group = "halo2_keygen"), skip_all)]
     pub fn keygen(
-        config: AggConfig,
+        halo2_config: Halo2Config,
         reader: &impl Halo2ParamsReader,
         pv_handler: &impl StaticVerifierPvHandler,
-    ) -> Self {
-        let AggConfig {
-            agg_stark_config,
-            halo2_config,
-        } = config;
-        let (agg_stark_pk, dummy_internal_proof) =
-            AggStarkProvingKey::dummy_proof_and_keygen(agg_stark_config);
-        let dummy_root_proof = agg_stark_pk
+        agg_pk: &AggProvingKey,
+        dummy_internal_proof: Proof<SC>,
+    ) -> Result<Self, VirtualMachineError> {
+        let dummy_root_proof = agg_pk
             .root_verifier_pk
-            .generate_dummy_root_proof(dummy_internal_proof);
-        let verifier = agg_stark_pk.root_verifier_pk.keygen_static_verifier(
+            .generate_dummy_root_proof(dummy_internal_proof)?;
+        let verifier = agg_pk.root_verifier_pk.keygen_static_verifier(
             &reader.read_params(halo2_config.verifier_k),
             dummy_root_proof,
             pv_handler,
@@ -443,28 +494,28 @@ impl AggProvingKey {
         } else {
             Halo2WrapperProvingKey::keygen_auto_tune(reader, dummy_snark)
         };
-        let halo2_pk = Halo2ProvingKey {
-            verifier,
-            wrapper,
+        Ok(Halo2ProvingKey {
+            verifier: Arc::new(verifier),
+            wrapper: Arc::new(wrapper),
             profiling: halo2_config.profiling,
-        };
-        Self {
-            agg_stark_pk,
-            halo2_pk,
-        }
+        })
     }
 }
 
-pub fn leaf_keygen(
+/// For internal use only.
+pub fn _leaf_keygen(
     fri_params: FriParameters,
     leaf_vm_config: NativeConfig,
-) -> Arc<VmProvingKey<SC, NativeConfig>> {
+) -> Result<Arc<VmProvingKey<SC, NativeConfig>>, AirInventoryError> {
     let leaf_engine = BabyBearPoseidon2Engine::new(fri_params);
-    let leaf_vm_pk = info_span!("keygen", group = "leaf")
-        .in_scope(|| VirtualMachine::new(leaf_engine, leaf_vm_config.clone()).keygen());
-    Arc::new(VmProvingKey {
+    let leaf_vm_pk = info_span!("keygen", group = "leaf").in_scope(|| {
+        leaf_vm_config
+            .create_airs()
+            .map(|airs| airs.keygen(&leaf_engine))
+    })?;
+    Ok(Arc::new(VmProvingKey {
         fri_params,
         vm_config: leaf_vm_config,
         vm_pk: leaf_vm_pk,
-    })
+    }))
 }
diff --git a/crates/sdk/src/keygen/perm.rs b/crates/sdk/src/keygen/perm.rs
index 18d76b4958..f52d1ebd12 100644
--- a/crates/sdk/src/keygen/perm.rs
+++ b/crates/sdk/src/keygen/perm.rs
@@ -1,14 +1,15 @@
 use std::cmp::Reverse;
 
-use openvm_circuit::arch::{CONNECTOR_AIR_ID, PROGRAM_AIR_ID, PUBLIC_VALUES_AIR_ID};
+#[cfg(feature = "evm-prove")]
 use openvm_continuations::verifier::common::types::SpecialAirIds;
 
-pub struct AirIdPermutation {
+/// Permutation of the AIR IDs to order them by forced trace heights.
+pub(crate) struct AirIdPermutation {
     pub perm: Vec<usize>,
 }
 
 impl AirIdPermutation {
-    pub fn compute(heights: &[usize]) -> AirIdPermutation {
+    pub fn compute(heights: &[u32]) -> AirIdPermutation {
         let mut height_with_air_id: Vec<_> = heights.iter().copied().enumerate().collect();
         height_with_air_id.sort_by_key(|(_, h)| Reverse(*h));
         AirIdPermutation {
@@ -18,7 +19,10 @@ impl AirIdPermutation {
                 .collect(),
         }
     }
+    #[cfg(feature = "evm-prove")]
     pub fn get_special_air_ids(&self) -> SpecialAirIds {
+        use openvm_circuit::arch::{CONNECTOR_AIR_ID, PROGRAM_AIR_ID, PUBLIC_VALUES_AIR_ID};
+
         let perm_len = self.perm.len();
         let mut ret = SpecialAirIds {
             program_air_id: perm_len,
diff --git a/crates/sdk/src/keygen/static_verifier.rs b/crates/sdk/src/keygen/static_verifier.rs
index fd8e75a67d..51f86d059e 100644
--- a/crates/sdk/src/keygen/static_verifier.rs
+++ b/crates/sdk/src/keygen/static_verifier.rs
@@ -1,7 +1,9 @@
+use openvm_circuit::arch::{SingleSegmentVmProver, VirtualMachineError};
 use openvm_continuations::{
     static_verifier::{StaticVerifierConfig, StaticVerifierPvHandler},
     verifier::root::types::RootVmVerifierInput,
 };
+use openvm_native_circuit::NATIVE_MAX_TRACE_HEIGHTS;
 use openvm_native_compiler::prelude::*;
 use openvm_native_recursion::{
     halo2::{verifier::Halo2VerifierProvingKey, Halo2Params, Halo2Prover},
@@ -10,11 +12,7 @@ use openvm_native_recursion::{
 };
 use openvm_stark_sdk::openvm_stark_backend::{p3_field::FieldAlgebra, proof::Proof};
 
-use crate::{
-    keygen::RootVerifierProvingKey,
-    prover::{vm::SingleSegmentVmProver, RootVerifierLocalProver},
-    RootSC, F, SC,
-};
+use crate::{keygen::RootVerifierProvingKey, prover::RootVerifierLocalProver, RootSC, F, SC};
 
 impl RootVerifierProvingKey {
     /// Keygen the static verifier for this root verifier.
@@ -43,23 +41,21 @@ impl RootVerifierProvingKey {
         }
     }
 
-    pub fn generate_dummy_root_proof(&self, dummy_internal_proof: Proof<SC>) -> Proof<RootSC> {
-        let prover = RootVerifierLocalProver::new(self.clone());
+    pub fn generate_dummy_root_proof(
+        &self,
+        dummy_internal_proof: Proof<SC>,
+    ) -> Result<Proof<RootSC>, VirtualMachineError> {
+        let mut prover = RootVerifierLocalProver::new(self)?;
         // 2 * DIGEST_SIZE for exe_commit and leaf_commit
-        let num_public_values = prover
-            .root_verifier_pk
-            .vm_pk
-            .vm_config
-            .system
-            .num_public_values
-            - 2 * DIGEST_SIZE;
+        let num_public_values = prover.vm_config().as_ref().num_public_values - 2 * DIGEST_SIZE;
         SingleSegmentVmProver::prove(
-            &prover,
+            &mut prover,
             RootVmVerifierInput {
                 proofs: vec![dummy_internal_proof],
                 public_values: vec![F::ZERO; num_public_values],
             }
             .write(),
+            NATIVE_MAX_TRACE_HEIGHTS,
         )
     }
 }
diff --git a/crates/sdk/src/lib.rs b/crates/sdk/src/lib.rs
index c2c874d3f1..69ea7fc5d9 100644
--- a/crates/sdk/src/lib.rs
+++ b/crates/sdk/src/lib.rs
@@ -1,10 +1,16 @@
-use std::{borrow::Borrow, fs::read, marker::PhantomData, path::Path, sync::Arc};
+use std::{
+    borrow::Borrow,
+    fs::read,
+    marker::PhantomData,
+    path::Path,
+    sync::{Arc, OnceLock},
+};
 
 #[cfg(feature = "evm-verify")]
 use alloy_sol_types::sol;
-use commit::{commit_app_exe, AppExecutionCommit};
+use commit::AppExecutionCommit;
 use config::{AggregationTreeConfig, AppConfig};
-use eyre::Result;
+use getset::{Getters, MutGetters, WithSetters};
 use keygen::{AppProvingKey, AppVerifyingKey};
 use openvm_build::{
     build_guest_package, find_unique_executable, get_package, GuestOptions, TargetFilter,
@@ -13,66 +19,67 @@ use openvm_circuit::{
     arch::{
         hasher::{poseidon2::vm_poseidon2_hasher, Hasher},
         instructions::exe::VmExe,
-        verify_segments, ContinuationVmProof, ExecutionError, InitFileGenerator,
-        VerifiedExecutionPayload, VmConfig, VmExecutor, CONNECTOR_AIR_ID, PROGRAM_AIR_ID,
-        PROGRAM_CACHED_TRACE_INDEX, PUBLIC_VALUES_AIR_ID,
+        Executor, InitFileGenerator, MeteredExecutor, PreflightExecutor, VirtualMachineError,
+        VmBuilder, VmExecutionConfig, VmExecutor, VmVerificationError, CONNECTOR_AIR_ID,
+        PROGRAM_AIR_ID, PROGRAM_CACHED_TRACE_INDEX, PUBLIC_VALUES_AIR_ID,
     },
     system::{
-        memory::{tree::public_values::extract_public_values, CHUNK},
-        program::trace::{compute_exe_commit, VmCommittedExe},
+        memory::{
+            merkle::public_values::{extract_public_values, UserPublicValuesProofError},
+            CHUNK,
+        },
+        program::trace::compute_exe_commit,
     },
 };
 #[cfg(feature = "evm-prove")]
-pub use openvm_continuations::static_verifier::{
-    DefaultStaticVerifierPvHandler, StaticVerifierPvHandler,
-};
+pub use openvm_continuations::static_verifier::DefaultStaticVerifierPvHandler;
 use openvm_continuations::verifier::{
     common::types::VmVerifierPvs,
     internal::types::{InternalVmVerifierPvs, VmStarkProof},
-    root::{types::RootVmVerifierInput, RootVmVerifierConfig},
+    root::RootVmVerifierConfig,
 };
 // Re-exports:
 pub use openvm_continuations::{RootSC, C, F, SC};
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder};
+use openvm_native_compiler::conversion::CompilerOptions;
 #[cfg(feature = "evm-prove")]
-use openvm_native_recursion::halo2::utils::Halo2ParamsReader;
+use openvm_native_recursion::halo2::utils::{CacheHalo2ParamsReader, Halo2ParamsReader};
 use openvm_stark_backend::proof::Proof;
 use openvm_stark_sdk::{
-    config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
-    engine::StarkFriEngine,
-    openvm_stark_backend::Chip,
-    p3_bn254_fr::Bn254Fr,
+    config::baby_bear_poseidon2::BabyBearPoseidon2Engine,
+    engine::{StarkEngine, StarkFriEngine},
 };
 use openvm_transpiler::{
-    elf::Elf,
-    openvm_platform::memory::MEM_SIZE,
-    transpiler::{Transpiler, TranspilerError},
-    FromElf,
+    elf::Elf, openvm_platform::memory::MEM_SIZE, transpiler::Transpiler, FromElf,
 };
 #[cfg(feature = "evm-verify")]
 use snark_verifier_sdk::{evm::gen_evm_verifier_sol_code, halo2::aggregation::AggregationCircuit};
 
 #[cfg(feature = "evm-prove")]
-use crate::{config::AggConfig, keygen::AggProvingKey, prover::EvmHalo2Prover, types::EvmProof};
 use crate::{
-    config::{AggStarkConfig, SdkVmConfig},
-    keygen::{asm::program_to_asm, AggStarkProvingKey},
+    config::Halo2Config, keygen::Halo2ProvingKey, prover::EvmHalo2Prover, types::EvmProof,
+};
+use crate::{
+    config::{AggregationConfig, SdkVmCpuBuilder, TranspilerConfig},
+    keygen::{asm::program_to_asm, AggProvingKey, AggVerifyingKey},
     prover::{AppProver, StarkProver},
+    types::ExecutableFormat,
 };
 
 pub mod codec;
 pub mod commit;
 pub mod config;
+pub mod fs;
 pub mod keygen;
 pub mod prover;
+pub mod types;
+pub mod util;
 
+mod error;
 mod stdin;
+pub use error::SdkError;
 pub use stdin::*;
 
-pub mod fs;
-pub mod types;
-
-pub type NonRootCommittedExe = VmCommittedExe<SC>;
-
 pub const EVM_HALO2_VERIFIER_INTERFACE: &str =
     include_str!("../contracts/src/IOpenVmHalo2Verifier.sol");
 pub const EVM_HALO2_VERIFIER_TEMPLATE: &str =
@@ -89,371 +96,624 @@ sol! {
     concat!(env!("CARGO_MANIFEST_DIR"), "/contracts/abi/IOpenVmHalo2Verifier.json"),
 }
 
-/// The payload of a verified guest VM execution with user public values extracted and
-/// verified.
-pub struct VerifiedContinuationVmPayload {
-    /// The Merklelized hash of:
-    /// - Program code commitment (commitment of the cached trace)
-    /// - Merkle root of the initial memory
-    /// - Starting program counter (`pc_start`)
-    ///
-    /// The Merklelization uses Poseidon2 as a cryptographic hash function (for the leaves)
-    /// and a cryptographic compression function (for internal nodes).
-    pub exe_commit: [F; CHUNK],
-    pub user_public_values: Vec<F>,
-}
-
-pub struct GenericSdk<E: StarkFriEngine<SC>> {
+// The SDK is only generic in the engine for the non-root SC. The root SC is fixed to
+// BabyBearPoseidon2RootEngine right now.
+/// The SDK provides convenience methods and constructors for provers.
+///
+/// The SDK is stateful to cache results of computations that depend only on the App VM config and
+/// aggregation config. The SDK will not cache any state that depends on the program executable.
+///
+/// Some commonly used methods are:
+/// - [`execute`](Self::execute)
+/// - [`prove`](Self::prove)
+/// - [`verify_proof`](Self::verify_proof)
+#[derive(Getters, MutGetters, WithSetters)]
+pub struct GenericSdk<E, VB, NativeBuilder>
+where
+    E: StarkEngine<SC = SC>,
+    VB: VmBuilder<E>,
+    VB::VmConfig: VmExecutionConfig<F>,
+{
+    #[getset(get = "pub", get_mut = "pub", set_with = "pub")]
+    app_config: AppConfig<VB::VmConfig>,
+    #[getset(get = "pub", get_mut = "pub", set_with = "pub")]
+    agg_config: AggregationConfig,
+    #[getset(get = "pub", get_mut = "pub", set_with = "pub")]
     agg_tree_config: AggregationTreeConfig,
+    #[cfg(feature = "evm-prove")]
+    #[getset(get = "pub", get_mut = "pub", set_with = "pub")]
+    halo2_config: Halo2Config,
+
+    /// The `executor` may be used to construct different types of interpreters, given the program,
+    /// for more specific execution purposes. By default, it is recommended to use the
+    /// [`execute`](GenericSdk::execute) method.
+    #[getset(get = "pub")]
+    executor: VmExecutor<F, VB::VmConfig>,
+
+    app_pk: OnceLock<AppProvingKey<VB::VmConfig>>,
+    /// STARK aggregation proving key and dummy internal proof. Dummy internal proof is saved for
+    /// halo2 pkey generation usage.
+    agg_pk: OnceLock<AggProvingKey>,
+    dummy_internal_proof: OnceLock<Proof<SC>>,
+
+    #[cfg(feature = "evm-prove")]
+    #[getset(get = "pub", get_mut = "pub", set_with = "pub")]
+    halo2_params_reader: CacheHalo2ParamsReader,
+    #[cfg(feature = "evm-prove")]
+    halo2_pk: OnceLock<Halo2ProvingKey>,
+
+    #[getset(get = "pub")]
+    app_vm_builder: VB,
+    #[getset(get = "pub")]
+    native_builder: NativeBuilder,
+    transpiler: Option<Transpiler<F>>,
+
     _phantom: PhantomData<E>,
 }
 
-impl<E: StarkFriEngine<SC>> Default for GenericSdk<E> {
-    fn default() -> Self {
-        Self {
-            agg_tree_config: AggregationTreeConfig::default(),
-            _phantom: PhantomData,
-        }
-    }
-}
+pub type Sdk = GenericSdk<BabyBearPoseidon2Engine, SdkVmCpuBuilder, NativeCpuBuilder>;
 
-pub type Sdk = GenericSdk<BabyBearPoseidon2Engine>;
+impl Sdk {
+    /// Creates SDK with a standard configuration that includes a set of default VM extensions
+    /// loaded.
+    ///
+    /// **Note**: To use this configuration, your `openvm.toml` must match, including the order of
+    /// the moduli and elliptic curve parameters of the respective extensions:
+    /// The `app_vm_config` field of your `openvm.toml` must exactly match the following:
+    ///
+    /// ```toml
+    #[doc = include_str!("./config/openvm_standard.toml")]
+    /// ```
+    pub fn standard() -> Self {
+        GenericSdk::new(AppConfig::standard()).unwrap()
+    }
 
-impl<E: StarkFriEngine<SC>> GenericSdk<E> {
-    pub fn new() -> Self {
-        Self::default()
+    /// Creates SDK with a configuration with RISC-V RV32IM and IO VM extensions loaded.
+    ///
+    /// **Note**: To use this configuration, your `openvm.toml` must exactly match the following:
+    ///
+    /// ```toml
+    #[doc = include_str!("./config/openvm_riscv32.toml")]
+    /// ```
+    pub fn riscv32() -> Self {
+        GenericSdk::new(AppConfig::riscv32()).unwrap()
     }
+}
 
-    pub fn with_agg_tree_config(mut self, agg_tree_config: AggregationTreeConfig) -> Self {
-        self.agg_tree_config = agg_tree_config;
-        self
+// The SDK is only functional for SC = BabyBearPoseidon2Config because that is what recursive
+// aggregation supports.
+impl<E, VB, NativeBuilder> GenericSdk<E, VB, NativeBuilder>
+where
+    E: StarkFriEngine<SC = SC>,
+    VB: VmBuilder<E> + Clone,
+    <VB::VmConfig as VmExecutionConfig<F>>::Executor:
+        Executor<F> + MeteredExecutor<F> + PreflightExecutor<F, VB::RecordArena>,
+    NativeBuilder: VmBuilder<E, VmConfig = NativeConfig> + Clone,
+    <NativeConfig as VmExecutionConfig<F>>::Executor:
+        PreflightExecutor<F, <NativeBuilder as VmBuilder<E>>::RecordArena>,
+{
+    /// Creates SDK custom to the given [AppConfig], with a RISC-V transpiler.
+    pub fn new(app_config: AppConfig<VB::VmConfig>) -> Result<Self, SdkError>
+    where
+        VB: Default,
+        NativeBuilder: Default,
+        VB::VmConfig: TranspilerConfig<F>,
+    {
+        let transpiler = app_config.app_vm_config.transpiler();
+        let sdk = Self::new_without_transpiler(app_config)?.with_transpiler(transpiler);
+        Ok(sdk)
     }
 
-    pub fn agg_tree_config(&self) -> &AggregationTreeConfig {
-        &self.agg_tree_config
+    /// **Note**: This function does not set the transpiler, which must be done separately to
+    /// support RISC-V ELFs.
+    pub fn new_without_transpiler(app_config: AppConfig<VB::VmConfig>) -> Result<Self, SdkError>
+    where
+        VB: Default,
+        NativeBuilder: Default,
+    {
+        let system_config = app_config.app_vm_config.as_ref();
+        let profiling = system_config.profiling;
+        let compiler_options = CompilerOptions {
+            enable_cycle_tracker: profiling,
+            ..Default::default()
+        };
+        let executor = VmExecutor::new(app_config.app_vm_config.clone())
+            .map_err(|e| SdkError::Vm(e.into()))?;
+        let agg_config = AggregationConfig {
+            max_num_user_public_values: system_config.num_public_values,
+            leaf_fri_params: app_config.leaf_fri_params.fri_params,
+            profiling,
+            compiler_options,
+            ..Default::default()
+        };
+        #[cfg(feature = "evm-prove")]
+        let halo2_config = Halo2Config {
+            profiling,
+            ..Default::default()
+        };
+        Ok(Self {
+            app_config,
+            agg_config,
+            #[cfg(feature = "evm-prove")]
+            halo2_config,
+            agg_tree_config: Default::default(),
+            app_vm_builder: Default::default(),
+            native_builder: Default::default(),
+            transpiler: None,
+            executor,
+            app_pk: OnceLock::new(),
+            agg_pk: OnceLock::new(),
+            dummy_internal_proof: OnceLock::new(),
+            #[cfg(feature = "evm-prove")]
+            halo2_params_reader: CacheHalo2ParamsReader::new_with_default_params_dir(),
+            #[cfg(feature = "evm-prove")]
+            halo2_pk: OnceLock::new(),
+            _phantom: PhantomData,
+        })
     }
 
+    /// Builds the guest package located at `pkg_dir`. This function requires that the build target
+    /// is unique and errors otherwise. Returns the built ELF file decoded in the [Elf] type.
     pub fn build<P: AsRef<Path>>(
         &self,
         guest_opts: GuestOptions,
-        vm_config: &SdkVmConfig,
         pkg_dir: P,
         target_filter: &Option<TargetFilter>,
         init_file_name: Option<&str>, // If None, we use "openvm-init.rs"
-    ) -> Result<Elf> {
-        vm_config.write_to_init_file(pkg_dir.as_ref(), init_file_name)?;
+    ) -> Result<Elf, SdkError> {
+        self.app_config
+            .app_vm_config
+            .write_to_init_file(pkg_dir.as_ref(), init_file_name)?;
         let pkg = get_package(pkg_dir.as_ref());
         let target_dir = match build_guest_package(&pkg, &guest_opts, None, target_filter) {
             Ok(target_dir) => target_dir,
             Err(Some(code)) => {
-                return Err(eyre::eyre!("Failed to build guest: code = {}", code));
+                return Err(SdkError::BuildFailedWithCode(code));
             }
             Err(None) => {
-                return Err(eyre::eyre!(
-                    "Failed to build guest (OPENVM_SKIP_BUILD is set)"
-                ));
+                return Err(SdkError::BuildFailed);
             }
         };
 
-        let elf_path = find_unique_executable(pkg_dir, target_dir, target_filter)?;
+        let elf_path =
+            find_unique_executable(pkg_dir, target_dir, target_filter).map_err(SdkError::Other)?;
         let data = read(&elf_path)?;
-        Elf::decode(&data, MEM_SIZE as u32)
+        Elf::decode(&data, MEM_SIZE as u32).map_err(SdkError::Other)
+    }
+
+    /// Transpiler for transpiling RISC-V ELF to OpenVM executable.
+    pub fn transpiler(&self) -> Result<&Transpiler<F>, SdkError> {
+        self.transpiler
+            .as_ref()
+            .ok_or(SdkError::TranspilerNotAvailable)
+    }
+    pub fn set_transpiler(&mut self, transpiler: Transpiler<F>) {
+        self.transpiler = Some(transpiler);
+    }
+    pub fn with_transpiler(mut self, transpiler: Transpiler<F>) -> Self {
+        self.set_transpiler(transpiler);
+        self
     }
 
-    pub fn transpile(
+    pub fn convert_to_exe(
         &self,
-        elf: Elf,
-        transpiler: Transpiler<F>,
-    ) -> Result<VmExe<F>, TranspilerError> {
-        VmExe::from_elf(elf, transpiler)
+        executable: impl Into<ExecutableFormat>,
+    ) -> Result<Arc<VmExe<F>>, SdkError> {
+        let executable = executable.into();
+        let exe = match executable {
+            ExecutableFormat::Elf(elf) => {
+                let transpiler = self.transpiler()?.clone();
+                Arc::new(VmExe::from_elf(elf, transpiler)?)
+            }
+            ExecutableFormat::VmExe(exe) => Arc::new(exe),
+            ExecutableFormat::SharedVmExe(exe) => exe,
+        };
+        Ok(exe)
     }
 
-    pub fn execute<VC: VmConfig<F>>(
+    /// Returns the user public values as field elements.
+    pub fn execute(
         &self,
-        exe: VmExe<F>,
-        vm_config: VC,
+        app_exe: impl Into<ExecutableFormat>,
         inputs: StdIn,
-    ) -> Result<Vec<F>, ExecutionError>
-    where
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let vm = VmExecutor::new(vm_config);
-        let final_memory = vm.execute(exe, inputs)?;
+    ) -> Result<Vec<u8>, SdkError> {
+        let exe = self.convert_to_exe(app_exe)?;
+        let instance = self
+            .executor
+            .instance(&exe)
+            .map_err(VirtualMachineError::from)?;
+        let final_memory = instance
+            .execute(inputs, None)
+            .map_err(VirtualMachineError::from)?
+            .memory;
         let public_values = extract_public_values(
-            &vm.config.system().memory_config.memory_dimensions(),
-            vm.config.system().num_public_values,
-            final_memory.as_ref().unwrap(),
+            self.executor.config.as_ref().num_public_values,
+            &final_memory.memory,
         );
         Ok(public_values)
     }
 
-    pub fn commit_app_exe(
-        &self,
-        app_fri_params: FriParameters,
-        exe: VmExe<F>,
-    ) -> Result<Arc<NonRootCommittedExe>> {
-        let committed_exe = commit_app_exe(app_fri_params, exe);
-        Ok(committed_exe)
-    }
+    // ======================== Proving Methods ============================
 
-    pub fn app_keygen<VC: VmConfig<F>>(&self, config: AppConfig<VC>) -> Result<AppProvingKey<VC>>
-    where
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let app_pk = AppProvingKey::keygen(config);
-        Ok(app_pk)
+    /// Generates a single aggregate STARK proof of the full program execution of the given
+    /// `app_exe` with program inputs `inputs`.
+    ///
+    /// The returned STARK proof is not intended for EVM verification. For EVM verification, use the
+    /// `prove_evm` method, which requires the `"evm-prove"` feature to be
+    /// enabled.
+    ///
+    /// For convenience, this function also returns the [AppExecutionCommit], which is a full
+    /// commitment to the App VM config and the App [VmExe]. It does **not** depend on the `inputs`.
+    /// It can be generated separately from the proof by creating a
+    /// [`prover`](Self::prover) and calling
+    /// [`app_commit`](StarkProver::app_commit).
+    ///
+    /// If STARK aggregation is not needed and a proof whose size may grow linearly with the length
+    /// of the program runtime is desired, create an [`app_prover`](Self::app_prover) and call
+    /// [`app_prover.prove(inputs)`](AppProver::prove).
+    pub fn prove(
+        &self,
+        app_exe: impl Into<ExecutableFormat>,
+        inputs: StdIn,
+    ) -> Result<(VmStarkProof<SC>, AppExecutionCommit), SdkError> {
+        let mut prover = self.prover(app_exe)?;
+        let app_commit = prover.app_prover.app_commit();
+        let proof = prover.prove(inputs)?;
+        Ok((proof, app_commit))
     }
 
-    pub fn generate_app_proof<VC: VmConfig<F>>(
+    #[cfg(feature = "evm-prove")]
+    pub fn prove_evm(
         &self,
-        app_pk: Arc<AppProvingKey<VC>>,
-        app_committed_exe: Arc<NonRootCommittedExe>,
+        app_exe: impl Into<ExecutableFormat>,
         inputs: StdIn,
-    ) -> Result<ContinuationVmProof<SC>>
-    where
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let app_prover = AppProver::<VC, E>::new(app_pk.app_vm_pk.clone(), app_committed_exe);
-        let proof = app_prover.generate_app_proof(inputs);
+    ) -> Result<EvmProof, SdkError> {
+        let app_exe = self.convert_to_exe(app_exe)?;
+        let mut evm_prover = self.evm_prover(app_exe)?;
+        let proof = evm_prover.prove_evm(inputs)?;
         Ok(proof)
     }
 
-    /// Verifies the [ContinuationVmProof], which is a collection of STARK proofs as well as
-    /// additional Merkle proof for user public values.
+    // ========================= Prover Constructors =========================
+
+    /// Constructs a new [StarkProver] instance for the given executable.
+    /// This function will generate the [AppProvingKey] and [AggProvingKey] if they do not already
+    /// exist.
+    pub fn prover(
+        &self,
+        app_exe: impl Into<ExecutableFormat>,
+    ) -> Result<StarkProver<E, VB, NativeBuilder>, SdkError> {
+        let app_exe = self.convert_to_exe(app_exe)?;
+        let app_pk = self.app_pk();
+        let agg_pk = self.agg_pk();
+        let stark_prover = StarkProver::<E, _, _>::new(
+            self.app_vm_builder.clone(),
+            self.native_builder.clone(),
+            app_pk,
+            app_exe,
+            agg_pk,
+            self.agg_tree_config,
+        )?;
+        Ok(stark_prover)
+    }
+
+    #[cfg(feature = "evm-prove")]
+    pub fn evm_prover(
+        &self,
+        app_exe: impl Into<ExecutableFormat>,
+    ) -> Result<EvmHalo2Prover<E, VB, NativeBuilder>, SdkError> {
+        let app_exe = self.convert_to_exe(app_exe)?;
+        let evm_prover = EvmHalo2Prover::<E, _, _>::new(
+            self.halo2_params_reader(),
+            self.app_vm_builder.clone(),
+            self.native_builder.clone(),
+            self.app_pk(),
+            app_exe,
+            self.agg_pk(),
+            self.halo2_pk().clone(),
+            self.agg_tree_config,
+        )?;
+        Ok(evm_prover)
+    }
+
+    /// This constructor is for generating app proofs that do not require a single aggregate STARK
+    /// proof of the full program execution. For a single STARK proof, use the
+    /// [`prove`](Self::prove) method instead.
     ///
-    /// This function verifies the STARK proofs and additional conditions to ensure that the
-    /// `proof` is a valid proof of guest VM execution that terminates successfully (exit code 0)
-    /// _with respect to_ a commitment to some VM executable.
-    /// It is the responsibility of the caller to check that the commitment matches the expected
-    /// VM executable.
-    pub fn verify_app_proof(
+    /// Creates an app prover instance specific to the provided exe.
+    /// This function will generate the [AppProvingKey] if it doesn't already exist and use it to
+    /// construct the [AppProver].
+    pub fn app_prover(
         &self,
-        app_vk: &AppVerifyingKey,
-        proof: &ContinuationVmProof<SC>,
-    ) -> Result<VerifiedContinuationVmPayload> {
-        let engine = E::new(app_vk.fri_params);
-        let VerifiedExecutionPayload {
-            exe_commit,
-            final_memory_root,
-        } = verify_segments(&engine, &app_vk.app_vm_vk, &proof.per_segment)?;
+        exe: impl Into<ExecutableFormat>,
+    ) -> Result<AppProver<E, VB>, SdkError> {
+        let exe = self.convert_to_exe(exe)?;
+        let app_pk = self.app_pk();
+        let prover = AppProver::<E, VB>::new(
+            self.app_vm_builder.clone(),
+            &app_pk.app_vm_pk,
+            exe,
+            app_pk.leaf_verifier_program_commit(),
+        )?;
+        Ok(prover)
+    }
 
-        let hasher = vm_poseidon2_hasher();
-        proof
-            .user_public_values
-            .verify(&hasher, app_vk.memory_dimensions, final_memory_root)?;
+    // ======================== Keygen Related Methods ========================
 
-        Ok(VerifiedContinuationVmPayload {
-            exe_commit,
-            user_public_values: proof.user_public_values.public_values.clone(),
-        })
+    /// Generates the app proving key once and caches it. Future calls will return the cached key.
+    ///
+    /// # Panics
+    /// This function will panic if the app keygen fails.
+    pub fn app_keygen(&self) -> (AppProvingKey<VB::VmConfig>, AppVerifyingKey) {
+        let pk = self.app_pk().clone();
+        let vk = pk.get_app_vk();
+        (pk, vk)
     }
 
-    pub fn verify_app_proof_without_continuations(
+    /// Generates the app proving key once and caches it. Future calls will return the cached key.
+    ///
+    /// # Panics
+    /// This function will panic if the app keygen fails.
+    pub fn app_pk(&self) -> &AppProvingKey<VB::VmConfig> {
+        // TODO[jpw]: use `get_or_try_init` once it is stable
+        self.app_pk.get_or_init(|| {
+            AppProvingKey::keygen(self.app_config.clone()).expect("app_keygen failed")
+        })
+    }
+    /// Sets the app proving key. Returns `Ok(())` if app keygen has not been called and
+    /// `Err(app_pk)` if keygen has already been called.
+    pub fn set_app_pk(
         &self,
-        app_vk: &AppVerifyingKey,
-        proof: &Proof<SC>,
-    ) -> Result<()> {
-        let e = E::new(app_vk.fri_params);
-        e.verify(&app_vk.app_vm_vk, proof)?;
-        Ok(())
+        app_pk: AppProvingKey<VB::VmConfig>,
+    ) -> Result<(), AppProvingKey<VB::VmConfig>> {
+        self.app_pk.set(app_pk)
+    }
+    /// See [`set_app_pk`](Self::set_app_pk). This should only be used in a constructor, and panics
+    /// if app keygen has already been called.
+    pub fn with_app_pk(self, app_pk: AppProvingKey<VB::VmConfig>) -> Self {
+        let _ = self
+            .set_app_pk(app_pk)
+            .map_err(|_| panic!("app_pk already set"));
+        self
     }
 
-    #[cfg(feature = "evm-prove")]
-    pub fn agg_keygen(
-        &self,
-        config: AggConfig,
-        reader: &impl Halo2ParamsReader,
-        pv_handler: &impl StaticVerifierPvHandler,
-    ) -> Result<AggProvingKey> {
-        let agg_pk = AggProvingKey::keygen(config, reader, pv_handler);
-        Ok(agg_pk)
+    /// Generates the proving keys necessary for STARK aggregation. Generates the proving keys once
+    /// and caches them. Future calls will return the cached key. This function does not include
+    /// [`app_keygen`](Self::app_keygen), which is specific to the App VM config. The proving keys
+    /// generated in this step are independent of the App VM config.
+    ///
+    /// # Panics
+    /// This function will panic if the keygen fails.
+    pub fn agg_keygen(&self) -> Result<(AggProvingKey, AggVerifyingKey), SdkError> {
+        let agg_pk = self.agg_pk().clone();
+        let agg_vk = agg_pk.get_agg_vk();
+        Ok((agg_pk, agg_vk))
     }
 
-    pub fn agg_stark_keygen(&self, config: AggStarkConfig) -> Result<AggStarkProvingKey> {
-        let agg_pk = AggStarkProvingKey::keygen(config);
-        Ok(agg_pk)
+    pub fn agg_pk(&self) -> &AggProvingKey {
+        // TODO[jpw]: use `get_or_try_init` once it is stable
+        self.agg_pk.get_or_init(|| {
+            let (agg_pk, dummy_proof) =
+                AggProvingKey::dummy_proof_and_keygen(self.agg_config).expect("agg_keygen failed");
+            let prev = self.dummy_internal_proof.set(dummy_proof);
+            if prev.is_err() {
+                tracing::debug!("dummy proof already exists, did not overwrite");
+            }
+            agg_pk
+        })
+    }
+    /// Sets the aggregation proving keys. Returns `Ok(())` if agg keygen has not been called and
+    /// `Err(agg_pk)` if keygen has already been called.
+    pub fn set_agg_pk(&self, agg_pk: AggProvingKey) -> Result<(), AggProvingKey> {
+        self.agg_pk.set(agg_pk)
+    }
+    /// See [`set_agg_pk`](Self::set_agg_pk). This should only be used in a constructor, and panics
+    /// if app keygen has already been called.
+    pub fn with_agg_pk(self, agg_pk: AggProvingKey) -> Self {
+        let _ = self
+            .set_agg_pk(agg_pk)
+            .map_err(|_| panic!("agg_pk already set"));
+        self
+    }
+    // We have this function in case agg_pk is set externally without setting dummy proof.
+    fn dummy_internal_proof(&self) -> &Proof<SC> {
+        self.dummy_internal_proof.get_or_init(|| {
+            let (agg_pk, dummy_proof) =
+                AggProvingKey::dummy_proof_and_keygen(self.agg_config).expect("agg_keygen failed");
+            let prev = self.agg_pk.set(agg_pk);
+            if prev.is_err() {
+                tracing::debug!("agg_pk already exists, did not overwrite");
+            }
+            dummy_proof
+        })
+    }
+    pub fn agg_pk_and_dummy_internal_proof(&self) -> (&AggProvingKey, &Proof<SC>) {
+        (self.agg_pk(), self.dummy_internal_proof())
     }
 
-    pub fn generate_root_verifier_asm(&self, agg_stark_pk: &AggStarkProvingKey) -> String {
+    pub fn generate_root_verifier_asm(&self) -> String {
+        let agg_pk = self.agg_pk();
         let kernel_asm = RootVmVerifierConfig {
-            leaf_fri_params: agg_stark_pk.leaf_vm_pk.fri_params,
-            internal_fri_params: agg_stark_pk.internal_vm_pk.fri_params,
-            num_user_public_values: agg_stark_pk.num_user_public_values(),
-            internal_vm_verifier_commit: agg_stark_pk
-                .internal_committed_exe
-                .get_program_commit()
-                .into(),
+            leaf_fri_params: agg_pk.leaf_vm_pk.fri_params,
+            internal_fri_params: agg_pk.internal_vm_pk.fri_params,
+            num_user_public_values: agg_pk.num_user_public_values(),
+            internal_vm_verifier_commit: agg_pk.internal_committed_exe.get_program_commit().into(),
             compiler_options: Default::default(),
         }
         .build_kernel_asm(
-            &agg_stark_pk.leaf_vm_pk.vm_pk.get_vk(),
-            &agg_stark_pk.internal_vm_pk.vm_pk.get_vk(),
+            &agg_pk.leaf_vm_pk.vm_pk.get_vk(),
+            &agg_pk.internal_vm_pk.vm_pk.get_vk(),
         );
         program_to_asm(kernel_asm)
     }
 
-    pub fn generate_root_verifier_input<VC: VmConfig<F>>(
-        &self,
-        app_pk: Arc<AppProvingKey<VC>>,
-        app_exe: Arc<NonRootCommittedExe>,
-        agg_stark_pk: AggStarkProvingKey,
-        inputs: StdIn,
-    ) -> Result<RootVmVerifierInput<SC>>
-    where
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let stark_prover =
-            StarkProver::<VC, E>::new(app_pk, app_exe, agg_stark_pk, self.agg_tree_config);
-        let proof = stark_prover.generate_root_verifier_input(inputs);
-        Ok(proof)
+    #[cfg(feature = "evm-prove")]
+    pub fn halo2_keygen(&self) -> Halo2ProvingKey {
+        self.halo2_pk().clone()
     }
 
-    pub fn generate_e2e_stark_proof<VC: VmConfig<F>>(
-        &self,
-        app_pk: Arc<AppProvingKey<VC>>,
-        app_exe: Arc<NonRootCommittedExe>,
-        agg_stark_pk: AggStarkProvingKey,
-        inputs: StdIn,
-    ) -> Result<VmStarkProof<SC>>
-    where
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let stark_prover =
-            StarkProver::<VC, E>::new(app_pk, app_exe, agg_stark_pk, self.agg_tree_config);
-        let proof = stark_prover.generate_e2e_stark_proof(inputs);
-        Ok(proof)
+    #[cfg(feature = "evm-prove")]
+    pub fn halo2_pk(&self) -> &Halo2ProvingKey {
+        let (agg_pk, dummy_internal_proof) = self.agg_pk_and_dummy_internal_proof();
+        // TODO[jpw]: use `get_or_try_init` once it is stable
+        self.halo2_pk.get_or_init(|| {
+            Halo2ProvingKey::keygen(
+                self.halo2_config,
+                self.halo2_params_reader(),
+                &DefaultStaticVerifierPvHandler,
+                agg_pk,
+                dummy_internal_proof.clone(),
+            )
+            .expect("halo2_keygen failed")
+        })
+    }
+    /// Sets the halo2 proving keys. Returns `Ok(())` if halo2 keygen has not been called and
+    /// `Err(halo2_pk)` if keygen has already been called.
+    #[cfg(feature = "evm-prove")]
+    pub fn set_halo2_pk(&self, halo2_pk: Halo2ProvingKey) -> Result<(), Halo2ProvingKey> {
+        self.halo2_pk.set(halo2_pk)
+    }
+    /// See [`set_halo2_pk`](Self::set_halo2_pk). This should only be used in a constructor, and
+    /// panics if halo2 keygen has already been called.
+    #[cfg(feature = "evm-prove")]
+    pub fn with_halo2_pk(self, halo2_pk: Halo2ProvingKey) -> Self {
+        let _ = self
+            .set_halo2_pk(halo2_pk)
+            .map_err(|_| "halo2_pk already set");
+        self
     }
 
-    pub fn verify_e2e_stark_proof(
-        &self,
-        agg_stark_pk: &AggStarkProvingKey,
+    #[cfg(feature = "evm-prove")]
+    pub fn with_halo2_params_dir(mut self, params_dir: impl AsRef<Path>) -> Self {
+        self.set_halo2_params_dir(params_dir);
+        self
+    }
+    #[cfg(feature = "evm-prove")]
+    pub fn set_halo2_params_dir(&mut self, params_dir: impl AsRef<Path>) {
+        self.halo2_params_reader = CacheHalo2ParamsReader::new(params_dir);
+    }
+
+    // ======================== Verification Methods ========================
+
+    /// Verifies aggregate STARK proof of VM execution.
+    ///
+    /// **Note**: This function does not have any reliance on `self` and does not depend on the app
+    /// config set in the [Sdk].
+    pub fn verify_proof(
+        agg_vk: &AggVerifyingKey,
+        expected_app_commit: AppExecutionCommit,
         proof: &VmStarkProof<SC>,
-        expected_exe_commit: &Bn254Fr,
-        expected_vm_commit: &Bn254Fr,
-    ) -> Result<AppExecutionCommit> {
-        if proof.proof.per_air.len() < 3 {
-            return Err(eyre::eyre!(
-                "Invalid number of AIRs: expected at least 3, got {}",
-                proof.proof.per_air.len()
-            ));
-        } else if proof.proof.per_air[0].air_id != PROGRAM_AIR_ID {
-            return Err(eyre::eyre!("Missing program AIR"));
-        } else if proof.proof.per_air[1].air_id != CONNECTOR_AIR_ID {
-            return Err(eyre::eyre!("Missing connector AIR"));
-        } else if proof.proof.per_air[2].air_id != PUBLIC_VALUES_AIR_ID {
-            return Err(eyre::eyre!("Missing public values AIR"));
+    ) -> Result<(), SdkError> {
+        if proof.inner.per_air.len() < 3 {
+            return Err(VmVerificationError::NotEnoughAirs(proof.inner.per_air.len()).into());
+        } else if proof.inner.per_air[0].air_id != PROGRAM_AIR_ID {
+            return Err(VmVerificationError::SystemAirMissing {
+                air_id: PROGRAM_AIR_ID,
+            }
+            .into());
+        } else if proof.inner.per_air[1].air_id != CONNECTOR_AIR_ID {
+            return Err(VmVerificationError::SystemAirMissing {
+                air_id: CONNECTOR_AIR_ID,
+            }
+            .into());
+        } else if proof.inner.per_air[2].air_id != PUBLIC_VALUES_AIR_ID {
+            return Err(VmVerificationError::SystemAirMissing {
+                air_id: PUBLIC_VALUES_AIR_ID,
+            }
+            .into());
         }
-        let public_values_air_proof_data = &proof.proof.per_air[2];
+        let public_values_air_proof_data = &proof.inner.per_air[2];
 
         let program_commit =
-            proof.proof.commitments.main_trace[PROGRAM_CACHED_TRACE_INDEX].as_ref();
-        let internal_commit: &[_; CHUNK] = &agg_stark_pk
-            .internal_committed_exe
-            .get_program_commit()
-            .into();
-
-        let (vm_pk, vm_commit) = if program_commit == internal_commit {
-            let internal_pvs: &InternalVmVerifierPvs<_> = public_values_air_proof_data
-                .public_values
-                .as_slice()
-                .borrow();
-            if internal_commit != &internal_pvs.extra_pvs.internal_program_commit {
-                return Err(eyre::eyre!(
-                    "Invalid internal program commit: expected {:?}, got {:?}",
-                    internal_commit,
-                    internal_pvs.extra_pvs.internal_program_commit
-                ));
-            }
-            (
-                &agg_stark_pk.internal_vm_pk,
-                internal_pvs.extra_pvs.leaf_verifier_commit,
-            )
-        } else {
-            (&agg_stark_pk.leaf_vm_pk, *program_commit)
-        };
-        let e = E::new(vm_pk.fri_params);
-        e.verify(&vm_pk.vm_pk.get_vk(), &proof.proof)?;
+            proof.inner.commitments.main_trace[PROGRAM_CACHED_TRACE_INDEX].as_ref();
+        let internal_commit: &[_; CHUNK] = &agg_vk.internal_verifier_program_commit.into();
+
+        let (fri_params_final, vk_final, claimed_app_vm_commit) =
+            if program_commit == internal_commit {
+                let internal_pvs: &InternalVmVerifierPvs<_> = public_values_air_proof_data
+                    .public_values
+                    .as_slice()
+                    .borrow();
+                if internal_commit != &internal_pvs.extra_pvs.internal_program_commit {
+                    tracing::debug!(
+                        "Invalid internal program commit: expected {:?}, got {:?}",
+                        internal_commit,
+                        internal_pvs.extra_pvs.internal_program_commit
+                    );
+                    return Err(VmVerificationError::ProgramCommitMismatch { index: 0 }.into());
+                }
+                (
+                    agg_vk.internal_fri_params,
+                    &agg_vk.internal_vk,
+                    internal_pvs.extra_pvs.leaf_verifier_commit,
+                )
+            } else {
+                (agg_vk.leaf_fri_params, &agg_vk.leaf_vk, *program_commit)
+            };
+        let e = E::new(fri_params_final);
+        e.verify(vk_final, &proof.inner)
+            .map_err(VmVerificationError::from)?;
 
         let pvs: &VmVerifierPvs<_> =
             public_values_air_proof_data.public_values[..VmVerifierPvs::<u8>::width()].borrow();
 
         if let Some(exit_code) = pvs.connector.exit_code() {
             if exit_code != 0 {
-                return Err(eyre::eyre!(
-                    "Invalid exit code: expected 0, got {}",
-                    exit_code
-                ));
+                return Err(VmVerificationError::ExitCodeMismatch {
+                    expected: 0,
+                    actual: exit_code,
+                }
+                .into());
             }
         } else {
-            return Err(eyre::eyre!("Program did not terminate"));
+            return Err(VmVerificationError::IsTerminateMismatch {
+                expected: true,
+                actual: false,
+            }
+            .into());
         }
 
         let hasher = vm_poseidon2_hasher();
         let public_values_root = hasher.merkle_root(&proof.user_public_values);
         if public_values_root != pvs.public_values_commit {
-            return Err(eyre::eyre!(
+            tracing::debug!(
                 "Invalid public values root: expected {:?}, got {:?}",
                 pvs.public_values_commit,
                 public_values_root
-            ));
+            );
+            return Err(VmVerificationError::UserPublicValuesError(
+                UserPublicValuesProofError::UserPublicValuesCommitMismatch,
+            )
+            .into());
         }
 
-        let exe_commit = compute_exe_commit(
+        let claimed_app_exe_commit = compute_exe_commit(
             &hasher,
             &pvs.app_commit,
             &pvs.memory.initial_root,
             pvs.connector.initial_pc,
         );
-        let app_commit = AppExecutionCommit::from_field_commit(exe_commit, vm_commit);
-        let exe_commit_bn254 = app_commit.app_exe_commit.to_bn254();
-        let vm_commit_bn254 = app_commit.app_vm_commit.to_bn254();
-
-        if exe_commit_bn254 != *expected_exe_commit {
-            return Err(eyre::eyre!(
-                "Invalid app exe commit: expected {:?}, got {:?}",
-                expected_exe_commit,
-                exe_commit_bn254
-            ));
-        } else if vm_commit_bn254 != *expected_vm_commit {
-            return Err(eyre::eyre!(
-                "Invalid app vm commit: expected {:?}, got {:?}",
-                expected_vm_commit,
-                vm_commit_bn254
-            ));
+        let claimed_app_commit =
+            AppExecutionCommit::from_field_commit(claimed_app_exe_commit, claimed_app_vm_commit);
+        let exe_commit_bn254 = claimed_app_commit.app_exe_commit.to_bn254();
+        let vm_commit_bn254 = claimed_app_commit.app_vm_commit.to_bn254();
+
+        if exe_commit_bn254 != expected_app_commit.app_exe_commit.to_bn254() {
+            return Err(SdkError::InvalidAppExeCommit {
+                expected: expected_app_commit.app_exe_commit,
+                actual: claimed_app_commit.app_exe_commit,
+            });
+        } else if vm_commit_bn254 != expected_app_commit.app_vm_commit.to_bn254() {
+            return Err(SdkError::InvalidAppVmCommit {
+                expected: expected_app_commit.app_vm_commit,
+                actual: claimed_app_commit.app_vm_commit,
+            });
         }
-        Ok(app_commit)
-    }
-
-    #[cfg(feature = "evm-prove")]
-    pub fn generate_evm_proof<VC: VmConfig<F>>(
-        &self,
-        reader: &impl Halo2ParamsReader,
-        app_pk: Arc<AppProvingKey<VC>>,
-        app_exe: Arc<NonRootCommittedExe>,
-        agg_pk: AggProvingKey,
-        inputs: StdIn,
-    ) -> Result<EvmProof>
-    where
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let e2e_prover =
-            EvmHalo2Prover::<VC, E>::new(reader, app_pk, app_exe, agg_pk, self.agg_tree_config);
-        let proof = e2e_prover.generate_proof_for_evm(inputs);
-        Ok(proof)
+        Ok(())
     }
 
     #[cfg(feature = "evm-verify")]
-    pub fn generate_halo2_verifier_solidity(
-        &self,
-        reader: &impl Halo2ParamsReader,
-        agg_pk: &AggProvingKey,
-    ) -> Result<types::EvmHalo2Verifier> {
+    pub fn generate_halo2_verifier_solidity(&self) -> Result<types::EvmHalo2Verifier, SdkError> {
         use std::{
             fs::{create_dir_all, write},
             io::Write,
@@ -477,8 +737,11 @@ impl<E: StarkFriEngine<SC>> GenericSdk<E> {
             EVM_HALO2_VERIFIER_PARENT_NAME,
         };
 
-        let params = reader.read_params(agg_pk.halo2_pk.wrapper.pinning.metadata.config_params.k);
-        let pinning = &agg_pk.halo2_pk.wrapper.pinning;
+        let reader = self.halo2_params_reader();
+        let halo2_pk = self.halo2_pk();
+
+        let params = reader.read_params(halo2_pk.wrapper.pinning.metadata.config_params.k);
+        let pinning = &halo2_pk.wrapper.pinning;
 
         assert_eq!(
             pinning.metadata.config_params.k as u32,
@@ -492,7 +755,7 @@ impl<E: StarkFriEngine<SC>> GenericSdk<E> {
             pinning.metadata.num_pvs.clone(),
         );
 
-        let wrapper_pvs = agg_pk.halo2_pk.wrapper.pinning.metadata.num_pvs.clone();
+        let wrapper_pvs = halo2_pk.wrapper.pinning.metadata.num_pvs.clone();
         let pvs_length = match wrapper_pvs.first() {
             // We subtract 14 to exclude the KZG accumulator and the app exe
             // and vm commits.
@@ -558,7 +821,9 @@ impl<E: StarkFriEngine<SC>> GenericSdk<E> {
         .expect("Failed to format openvm verifier code");
 
         // Create temp dir
-        let temp_dir = tempdir().wrap_err("Failed to create temp dir")?;
+        let temp_dir = tempdir()
+            .wrap_err("Failed to create temp dir")
+            .map_err(SdkError::Other)?;
         let temp_path = temp_dir.path();
         let root_path = Path::new("src").join(format!("v{}", OPENVM_VERSION));
 
@@ -638,14 +903,15 @@ impl<E: StarkFriEngine<SC>> GenericSdk<E> {
         let output = child.wait_with_output().expect("Failed to read output");
 
         if !output.status.success() {
-            eyre::bail!(
+            return Err(SdkError::Other(eyre::eyre!(
                 "solc exited with status {}: {}",
                 output.status,
                 String::from_utf8_lossy(&output.stderr)
-            );
+            )));
         }
 
-        let parsed: Value = serde_json::from_slice(&output.stdout)?;
+        let parsed: Value =
+            serde_json::from_slice(&output.stdout).map_err(|e| SdkError::Other(e.into()))?;
 
         let bytecode = parsed
             .get("contracts")
@@ -686,15 +952,16 @@ impl<E: StarkFriEngine<SC>> GenericSdk<E> {
     #[cfg(feature = "evm-verify")]
     /// Uses the `verify(..)` interface of the `OpenVmHalo2Verifier` contract.
     pub fn verify_evm_halo2_proof(
-        &self,
         openvm_verifier: &types::EvmHalo2Verifier,
         evm_proof: EvmProof,
-    ) -> Result<u64> {
+    ) -> Result<u64, SdkError> {
         let calldata = evm_proof.verifier_calldata();
         let deployment_code = openvm_verifier.artifact.bytecode.clone();
 
         let gas_cost = snark_verifier::loader::evm::deploy_and_call(deployment_code, calldata)
-            .map_err(|reason| eyre::eyre!("Sdk::verify_openvm_evm_proof: {reason:?}"))?;
+            .map_err(|reason| {
+                SdkError::Other(eyre::eyre!("Sdk::verify_openvm_evm_proof: {reason:?}"))
+            })?;
 
         Ok(gas_cost)
     }
diff --git a/crates/sdk/src/prover/agg.rs b/crates/sdk/src/prover/agg.rs
index aa8fc843cb..6def0d2e68 100644
--- a/crates/sdk/src/prover/agg.rs
+++ b/crates/sdk/src/prover/agg.rs
@@ -1,34 +1,38 @@
 use std::sync::Arc;
 
-use openvm_circuit::arch::ContinuationVmProof;
+use openvm_circuit::arch::{
+    instructions::exe::VmExe, ContinuationVmProof, PreflightExecutor, SingleSegmentVmProver,
+    VirtualMachineError, VmBuilder, VmExecutionConfig, VmInstance,
+};
+#[cfg(feature = "evm-prove")]
+use openvm_continuations::verifier::root::types::RootVmVerifierInput;
 use openvm_continuations::verifier::{
     internal::types::{InternalVmVerifierInput, VmStarkProof},
     leaf::types::LeafVmVerifierInput,
-    root::types::RootVmVerifierInput,
 };
-use openvm_native_circuit::NativeConfig;
-use openvm_native_compiler::ir::DIGEST_SIZE;
+use openvm_native_circuit::{NativeConfig, NATIVE_MAX_TRACE_HEIGHTS};
 use openvm_native_recursion::hints::Hintable;
 use openvm_stark_sdk::{engine::StarkFriEngine, openvm_stark_backend::proof::Proof};
-use tracing::info_span;
+use tracing::{info_span, instrument};
 
 use crate::{
-    config::AggregationTreeConfig,
-    keygen::AggStarkProvingKey,
-    prover::{
-        vm::{local::VmLocalProver, SingleSegmentVmProver},
-        RootVerifierLocalProver,
-    },
-    NonRootCommittedExe, RootSC, F, SC,
+    config::AggregationTreeConfig, keygen::AggProvingKey, prover::vm::new_local_prover,
+    util::check_max_constraint_degrees, F, SC,
 };
+#[cfg(feature = "evm-prove")]
+use crate::{prover::RootVerifierLocalProver, RootSC};
 
-pub struct AggStarkProver<E: StarkFriEngine<SC>> {
-    leaf_prover: VmLocalProver<SC, NativeConfig, E>,
+pub struct AggStarkProver<E, NativeBuilder>
+where
+    E: StarkFriEngine<SC = SC>,
+    NativeBuilder: VmBuilder<E, VmConfig = NativeConfig>,
+{
+    leaf_prover: VmInstance<E, NativeBuilder>,
     leaf_controller: LeafProvingController,
 
-    internal_prover: VmLocalProver<SC, NativeConfig, E>,
+    internal_prover: VmInstance<E, NativeBuilder>,
+    #[cfg(feature = "evm-prove")]
     root_prover: RootVerifierLocalProver,
-
     pub num_children_internal: usize,
     pub max_internal_wrapper_layers: usize,
 }
@@ -38,27 +42,55 @@ pub struct LeafProvingController {
     pub num_children: usize,
 }
 
-impl<E: StarkFriEngine<SC>> AggStarkProver<E> {
+impl<E, NativeBuilder> AggStarkProver<E, NativeBuilder>
+where
+    E: StarkFriEngine<SC = SC>,
+    NativeBuilder: VmBuilder<E, VmConfig = NativeConfig> + Clone,
+    <NativeConfig as VmExecutionConfig<F>>::Executor:
+        PreflightExecutor<F, <NativeBuilder as VmBuilder<E>>::RecordArena>,
+{
     pub fn new(
-        agg_stark_pk: AggStarkProvingKey,
-        leaf_committed_exe: Arc<NonRootCommittedExe>,
+        native_builder: NativeBuilder,
+        agg_pk: &AggProvingKey,
+        leaf_verifier_exe: Arc<VmExe<F>>,
+        tree_config: AggregationTreeConfig,
+    ) -> Result<Self, VirtualMachineError> {
+        let leaf_prover = new_local_prover(
+            native_builder.clone(),
+            &agg_pk.leaf_vm_pk,
+            leaf_verifier_exe,
+        )?;
+        let internal_prover = new_local_prover(
+            native_builder,
+            &agg_pk.internal_vm_pk,
+            agg_pk.internal_committed_exe.exe.clone(),
+        )?;
+        #[cfg(feature = "evm-prove")]
+        let root_prover = RootVerifierLocalProver::new(&agg_pk.root_verifier_pk)?;
+        Ok(Self::new_from_instances(
+            leaf_prover,
+            internal_prover,
+            #[cfg(feature = "evm-prove")]
+            root_prover,
+            tree_config,
+        ))
+    }
+
+    pub fn new_from_instances(
+        leaf_instance: VmInstance<E, NativeBuilder>,
+        internal_instance: VmInstance<E, NativeBuilder>,
+        #[cfg(feature = "evm-prove")] root_instance: RootVerifierLocalProver,
         tree_config: AggregationTreeConfig,
     ) -> Self {
-        let leaf_prover =
-            VmLocalProver::<SC, NativeConfig, E>::new(agg_stark_pk.leaf_vm_pk, leaf_committed_exe);
         let leaf_controller = LeafProvingController {
             num_children: tree_config.num_children_leaf,
         };
-        let internal_prover = VmLocalProver::<SC, NativeConfig, E>::new(
-            agg_stark_pk.internal_vm_pk,
-            agg_stark_pk.internal_committed_exe,
-        );
-        let root_prover = RootVerifierLocalProver::new(agg_stark_pk.root_verifier_pk);
         Self {
-            leaf_prover,
+            leaf_prover: leaf_instance,
             leaf_controller,
-            internal_prover,
-            root_prover,
+            internal_prover: internal_instance,
+            #[cfg(feature = "evm-prove")]
+            root_prover: root_instance,
             num_children_internal: tree_config.num_children_internal,
             max_internal_wrapper_layers: tree_config.max_internal_wrapper_layers,
         }
@@ -80,31 +112,50 @@ impl<E: StarkFriEngine<SC>> AggStarkProver<E> {
     }
 
     /// Generate the root proof for outer recursion.
-    pub fn generate_root_proof(&self, app_proofs: ContinuationVmProof<SC>) -> Proof<RootSC> {
-        let root_verifier_input = self.generate_root_verifier_input(app_proofs);
+    #[cfg(feature = "evm-prove")]
+    pub fn generate_root_proof(
+        &mut self,
+        app_proofs: ContinuationVmProof<SC>,
+    ) -> Result<Proof<RootSC>, VirtualMachineError> {
+        let root_verifier_input = self.generate_root_verifier_input(app_proofs)?;
         self.generate_root_proof_impl(root_verifier_input)
     }
 
-    pub fn generate_leaf_proofs(&self, app_proofs: &ContinuationVmProof<SC>) -> Vec<Proof<SC>> {
+    pub fn generate_leaf_proofs(
+        &mut self,
+        app_proofs: &ContinuationVmProof<SC>,
+    ) -> Result<Vec<Proof<SC>>, VirtualMachineError> {
+        check_max_constraint_degrees(
+            self.leaf_prover.vm.config().as_ref(),
+            &self.leaf_prover.vm.engine.fri_params(),
+        );
         self.leaf_controller
-            .generate_proof(&self.leaf_prover, app_proofs)
+            .generate_proof(&mut self.leaf_prover, app_proofs)
     }
 
+    /// This is typically only used for the halo2 verifier.
+    #[cfg(feature = "evm-prove")]
     pub fn generate_root_verifier_input(
-        &self,
+        &mut self,
         app_proofs: ContinuationVmProof<SC>,
-    ) -> RootVmVerifierInput<SC> {
-        let leaf_proofs = self.generate_leaf_proofs(&app_proofs);
+    ) -> Result<RootVmVerifierInput<SC>, VirtualMachineError> {
+        let leaf_proofs = self.generate_leaf_proofs(&app_proofs)?;
         let public_values = app_proofs.user_public_values.public_values;
-        let e2e_stark_proof = self.aggregate_leaf_proofs(leaf_proofs, public_values);
-        self.wrap_e2e_stark_proof(e2e_stark_proof)
+        let e2e_stark_proof = self.aggregate_leaf_proofs(leaf_proofs, public_values)?;
+        let wrapped_stark_proof = self.wrap_e2e_stark_proof(e2e_stark_proof)?;
+        Ok(wrapped_stark_proof)
     }
 
     pub fn aggregate_leaf_proofs(
-        &self,
+        &mut self,
         leaf_proofs: Vec<Proof<SC>>,
         public_values: Vec<F>,
-    ) -> VmStarkProof<SC> {
+    ) -> Result<VmStarkProof<SC>, VirtualMachineError> {
+        check_max_constraint_degrees(
+            self.internal_prover.vm.config().as_ref(),
+            &self.internal_prover.vm.engine.fri_params(),
+        );
+
         let mut internal_node_idx = -1;
         let mut internal_node_height = 0;
         let mut proofs = leaf_proofs;
@@ -112,10 +163,7 @@ impl<E: StarkFriEngine<SC>> AggStarkProver<E> {
         // proof, in order to shrink the proof size
         while proofs.len() > 1 || internal_node_height == 0 {
             let internal_inputs = InternalVmVerifierInput::chunk_leaf_or_internal_proofs(
-                self.internal_prover
-                    .committed_exe
-                    .get_program_commit()
-                    .into(),
+                (*self.internal_prover.program_commitment()).into(),
                 &proofs,
                 self.num_children_internal,
             );
@@ -124,10 +172,10 @@ impl<E: StarkFriEngine<SC>> AggStarkProver<E> {
                 group = format!("internal.{internal_node_height}")
             )
             .in_scope(|| {
-                #[cfg(feature = "bench-metrics")]
+                #[cfg(feature = "metrics")]
                 {
                     metrics::counter!("fri.log_blowup")
-                        .absolute(self.internal_prover.fri_params().log_blowup as u64);
+                        .absolute(self.internal_prover.vm.engine.fri_params().log_blowup as u64);
                     metrics::counter!("num_children").absolute(self.num_children_internal as u64);
                 }
                 internal_inputs
@@ -135,47 +183,101 @@ impl<E: StarkFriEngine<SC>> AggStarkProver<E> {
                     .map(|input| {
                         internal_node_idx += 1;
                         info_span!("single_internal_agg", idx = internal_node_idx,).in_scope(|| {
-                            SingleSegmentVmProver::prove(&self.internal_prover, input.write())
+                            SingleSegmentVmProver::prove(
+                                &mut self.internal_prover,
+                                input.write(),
+                                NATIVE_MAX_TRACE_HEIGHTS,
+                            )
                         })
                     })
-                    .collect()
-            });
+                    .collect::<Result<Vec<_>, _>>()
+            })?;
             internal_node_height += 1;
         }
-        VmStarkProof {
-            proof: proofs.pop().unwrap(),
+        let proof = proofs.pop().unwrap();
+        Ok(VmStarkProof {
+            inner: proof,
             user_public_values: public_values,
-        }
+        })
     }
 
     /// Wrap the e2e stark proof until its heights meet the requirements of the root verifier.
-    pub fn wrap_e2e_stark_proof(
-        &self,
+    #[cfg(feature = "evm-prove")]
+    fn wrap_e2e_stark_proof(
+        &mut self,
         e2e_stark_proof: VmStarkProof<SC>,
-    ) -> RootVmVerifierInput<SC> {
-        let internal_commit = self
-            .internal_prover
-            .committed_exe
-            .get_program_commit()
-            .into();
-        wrap_e2e_stark_proof(
-            &self.internal_prover,
-            &self.root_prover,
-            internal_commit,
-            self.max_internal_wrapper_layers,
-            e2e_stark_proof,
-        )
-    }
+    ) -> Result<RootVmVerifierInput<SC>, VirtualMachineError> {
+        let internal_commit = (*self.internal_prover.program_commitment()).into();
+        let internal_prover = &mut self.internal_prover;
+        let root_prover = &mut self.root_prover;
+        let max_internal_wrapper_layers = self.max_internal_wrapper_layers;
+        fn heights_le(a: &[u32], b: &[u32]) -> bool {
+            assert_eq!(a.len(), b.len());
+            a.iter().zip(b.iter()).all(|(a, b)| a <= b)
+        }
 
-    fn generate_root_proof_impl(&self, root_input: RootVmVerifierInput<SC>) -> Proof<RootSC> {
-        info_span!("agg_layer", group = "root", idx = 0).in_scope(|| {
-            let input = root_input.write();
-            #[cfg(feature = "bench-metrics")]
-            metrics::counter!("fri.log_blowup")
-                .absolute(self.root_prover.fri_params().log_blowup as u64);
-            SingleSegmentVmProver::prove(&self.root_prover, input)
+        let VmStarkProof {
+            inner: mut proof,
+            user_public_values,
+        } = e2e_stark_proof;
+        let mut wrapper_layers = 0;
+        loop {
+            let input = RootVmVerifierInput {
+                proofs: vec![proof.clone()],
+                public_values: user_public_values.clone(),
+            };
+            let actual_air_heights = root_prover.execute_for_air_heights(input)?;
+            // Root verifier can handle the internal proof. We can stop here.
+            if heights_le(&actual_air_heights, root_prover.fixed_air_heights()) {
+                break;
+            }
+            if wrapper_layers >= max_internal_wrapper_layers {
+                panic!("The heights of the root verifier still exceed the required heights after {} wrapper layers", max_internal_wrapper_layers);
+            }
+            wrapper_layers += 1;
+            let input = InternalVmVerifierInput {
+                self_program_commit: internal_commit,
+                proofs: vec![proof.clone()],
+            };
+            proof = info_span!(
+                "wrapper_layer",
+                group = format!("internal_wrapper.{wrapper_layers}")
+            )
+            .in_scope(|| {
+                #[cfg(feature = "metrics")]
+                {
+                    metrics::counter!("fri.log_blowup")
+                        .absolute(internal_prover.vm.engine.fri_params().log_blowup as u64);
+                }
+                SingleSegmentVmProver::prove(
+                    internal_prover,
+                    input.write(),
+                    NATIVE_MAX_TRACE_HEIGHTS,
+                )
+            })?;
+        }
+        Ok(RootVmVerifierInput {
+            proofs: vec![proof],
+            public_values: user_public_values,
         })
     }
+
+    #[cfg(feature = "evm-prove")]
+    #[instrument(name = "agg_layer", skip_all, fields(group = "root", idx = 0))]
+    fn generate_root_proof_impl(
+        &mut self,
+        root_input: RootVmVerifierInput<SC>,
+    ) -> Result<Proof<RootSC>, VirtualMachineError> {
+        check_max_constraint_degrees(
+            self.root_prover.vm_config().as_ref(),
+            self.root_prover.fri_params(),
+        );
+        let input = root_input.write();
+        #[cfg(feature = "metrics")]
+        metrics::counter!("fri.log_blowup")
+            .absolute(self.root_prover.fri_params().log_blowup as u64);
+        SingleSegmentVmProver::prove(&mut self.root_prover, input, NATIVE_MAX_TRACE_HEIGHTS)
+    }
 }
 
 impl LeafProvingController {
@@ -184,85 +286,39 @@ impl LeafProvingController {
         self
     }
 
-    pub fn generate_proof<E: StarkFriEngine<SC>>(
+    #[instrument(name = "agg_layer", skip_all, fields(group = "leaf"))]
+    pub fn generate_proof<E, NativeBuilder>(
         &self,
-        prover: &VmLocalProver<SC, NativeConfig, E>,
+        prover: &mut VmInstance<E, NativeBuilder>,
         app_proofs: &ContinuationVmProof<SC>,
-    ) -> Vec<Proof<SC>> {
-        info_span!("agg_layer", group = "leaf").in_scope(|| {
-            #[cfg(feature = "bench-metrics")]
-            {
-                metrics::counter!("fri.log_blowup").absolute(prover.fri_params().log_blowup as u64);
-                metrics::counter!("num_children").absolute(self.num_children as u64);
-            }
-            let leaf_inputs =
-                LeafVmVerifierInput::chunk_continuation_vm_proof(app_proofs, self.num_children);
-            tracing::info!("num_leaf_proofs={}", leaf_inputs.len());
-            leaf_inputs
-                .into_iter()
-                .enumerate()
-                .map(|(leaf_node_idx, input)| {
-                    info_span!("single_leaf_agg", idx = leaf_node_idx)
-                        .in_scope(|| SingleSegmentVmProver::prove(prover, input.write_to_stream()))
-                })
-                .collect::<Vec<_>>()
-        })
-    }
-}
-
-/// Wrap the e2e stark proof until its heights meet the requirements of the root verifier.
-pub fn wrap_e2e_stark_proof<E: StarkFriEngine<SC>>(
-    internal_prover: &VmLocalProver<SC, NativeConfig, E>,
-    root_prover: &RootVerifierLocalProver,
-    internal_commit: [F; DIGEST_SIZE],
-    max_internal_wrapper_layers: usize,
-    e2e_stark_proof: VmStarkProof<SC>,
-) -> RootVmVerifierInput<SC> {
-    let VmStarkProof {
-        mut proof,
-        user_public_values,
-    } = e2e_stark_proof;
-    let mut wrapper_layers = 0;
-    loop {
-        let actual_air_heights = root_prover.execute_for_air_heights(RootVmVerifierInput {
-            proofs: vec![proof.clone()],
-            public_values: user_public_values.clone(),
-        });
-        // Root verifier can handle the internal proof. We can stop here.
-        if heights_le(
-            &actual_air_heights,
-            &root_prover.root_verifier_pk.air_heights,
-        ) {
-            break;
-        }
-        if wrapper_layers >= max_internal_wrapper_layers {
-            panic!("The heights of the root verifier still exceed the required heights after {} wrapper layers", max_internal_wrapper_layers);
+    ) -> Result<Vec<Proof<SC>>, VirtualMachineError>
+    where
+        E: StarkFriEngine<SC = SC>,
+        NativeBuilder: VmBuilder<E, VmConfig = NativeConfig>,
+        <NativeConfig as VmExecutionConfig<F>>::Executor:
+            PreflightExecutor<F, <NativeBuilder as VmBuilder<E>>::RecordArena>,
+    {
+        #[cfg(feature = "metrics")]
+        {
+            metrics::counter!("fri.log_blowup")
+                .absolute(prover.vm.engine.fri_params().log_blowup as u64);
+            metrics::counter!("num_children").absolute(self.num_children as u64);
         }
-        wrapper_layers += 1;
-        let input = InternalVmVerifierInput {
-            self_program_commit: internal_commit,
-            proofs: vec![proof.clone()],
-        };
-        proof = info_span!(
-            "wrapper_layer",
-            group = format!("internal_wrapper.{wrapper_layers}")
-        )
-        .in_scope(|| {
-            #[cfg(feature = "bench-metrics")]
-            {
-                metrics::counter!("fri.log_blowup")
-                    .absolute(internal_prover.fri_params().log_blowup as u64);
-            }
-            SingleSegmentVmProver::prove(internal_prover, input.write())
-        });
-    }
-    RootVmVerifierInput {
-        proofs: vec![proof],
-        public_values: user_public_values,
+        let leaf_inputs =
+            LeafVmVerifierInput::chunk_continuation_vm_proof(app_proofs, self.num_children);
+        tracing::info!("num_leaf_proofs={}", leaf_inputs.len());
+        leaf_inputs
+            .into_iter()
+            .enumerate()
+            .map(|(leaf_node_idx, input)| {
+                info_span!("single_leaf_agg", idx = leaf_node_idx).in_scope(|| {
+                    SingleSegmentVmProver::prove(
+                        prover,
+                        input.write_to_stream(),
+                        NATIVE_MAX_TRACE_HEIGHTS,
+                    )
+                })
+            })
+            .collect()
     }
 }
-
-fn heights_le(a: &[usize], b: &[usize]) -> bool {
-    assert_eq!(a.len(), b.len());
-    a.iter().zip(b.iter()).all(|(a, b)| a <= b)
-}
diff --git a/crates/sdk/src/prover/app.rs b/crates/sdk/src/prover/app.rs
index 095351677e..87dd414fb2 100644
--- a/crates/sdk/src/prover/app.rs
+++ b/crates/sdk/src/prover/app.rs
@@ -1,37 +1,96 @@
-use std::sync::Arc;
+use std::sync::{Arc, OnceLock};
 
 use getset::Getters;
-use openvm_circuit::arch::{ContinuationVmProof, VmConfig};
-use openvm_stark_backend::{proof::Proof, Chip};
-use openvm_stark_sdk::engine::StarkFriEngine;
+use itertools::Itertools;
+use openvm_circuit::{
+    arch::{
+        hasher::poseidon2::{vm_poseidon2_hasher, Poseidon2Hasher},
+        instructions::exe::VmExe,
+        verify_segments, ContinuationVmProof, ContinuationVmProver, Executor, MeteredExecutor,
+        PreflightExecutor, VerifiedExecutionPayload, VirtualMachineError, VmBuilder,
+        VmExecutionConfig, VmInstance, VmVerificationError,
+    },
+    system::memory::CHUNK,
+};
+use openvm_stark_backend::{
+    config::{Com, Val},
+    keygen::types::MultiStarkVerifyingKey,
+    p3_field::PrimeField32,
+};
+use openvm_stark_sdk::{
+    config::baby_bear_poseidon2::BabyBearPoseidon2Engine,
+    engine::{StarkEngine, StarkFriEngine},
+};
 use tracing::info_span;
 
-use super::vm::SingleSegmentVmProver;
 use crate::{
-    prover::vm::{local::VmLocalProver, types::VmProvingKey, ContinuationVmProver},
-    NonRootCommittedExe, StdIn, F, SC,
+    commit::{AppExecutionCommit, CommitBytes},
+    keygen::AppVerifyingKey,
+    prover::vm::{new_local_prover, types::VmProvingKey},
+    util::check_max_constraint_degrees,
+    StdIn, F, SC,
 };
 
 #[derive(Getters)]
-pub struct AppProver<VC, E: StarkFriEngine<SC>> {
+pub struct AppProver<E, VB>
+where
+    E: StarkEngine,
+    VB: VmBuilder<E>,
+{
     pub program_name: Option<String>,
     #[getset(get = "pub")]
-    app_prover: VmLocalProver<SC, VC, E>,
+    instance: VmInstance<E, VB>,
+    #[getset(get = "pub")]
+    app_vm_vk: MultiStarkVerifyingKey<E::SC>,
+    #[getset(get = "pub")]
+    leaf_verifier_program_commit: Com<E::SC>,
+
+    app_execution_commit: OnceLock<AppExecutionCommit>,
 }
 
-impl<VC, E: StarkFriEngine<SC>> AppProver<VC, E> {
+impl<E, VB> AppProver<E, VB>
+where
+    E: StarkFriEngine,
+    VB: VmBuilder<E>,
+    Val<E::SC>: PrimeField32,
+    Com<E::SC>: AsRef<[Val<E::SC>; CHUNK]> + From<[Val<E::SC>; CHUNK]> + Into<[Val<E::SC>; CHUNK]>,
+{
+    /// Creates a new [AppProver] instance. This method will re-commit the `exe` program on device.
+    /// If a cached version of the program already exists on device, then directly use the
+    /// [`Self::new_from_instance`] constructor.
+    ///
+    /// The `leaf_verifier_program_commit` is the commitment to the program of the leaf verifier
+    /// that verifies the App VM circuit. It can be found in the `AppProvingKey`.
     pub fn new(
-        app_vm_pk: Arc<VmProvingKey<SC, VC>>,
-        app_committed_exe: Arc<NonRootCommittedExe>,
-    ) -> Self
-    where
-        VC: VmConfig<F>,
-    {
+        vm_builder: VB,
+        app_vm_pk: &VmProvingKey<E::SC, VB::VmConfig>,
+        app_exe: Arc<VmExe<Val<E::SC>>>,
+        leaf_verifier_program_commit: Com<E::SC>,
+    ) -> Result<Self, VirtualMachineError> {
+        let instance = new_local_prover(vm_builder, app_vm_pk, app_exe)?;
+        let app_vm_vk = app_vm_pk.vm_pk.get_vk();
+
+        Ok(Self::new_from_instance(
+            instance,
+            app_vm_vk,
+            leaf_verifier_program_commit,
+        ))
+    }
+
+    pub fn new_from_instance(
+        instance: VmInstance<E, VB>,
+        app_vm_vk: MultiStarkVerifyingKey<E::SC>,
+        leaf_verifier_program_commit: Com<E::SC>,
+    ) -> Self {
         Self {
             program_name: None,
-            app_prover: VmLocalProver::<SC, VC, E>::new(app_vm_pk, app_committed_exe),
+            instance,
+            app_vm_vk,
+            leaf_verifier_program_commit,
+            app_execution_commit: OnceLock::new(),
         }
     }
+
     pub fn set_program_name(&mut self, program_name: impl AsRef<str>) -> &mut Self {
         self.program_name = Some(program_name.as_ref().to_string());
         self
@@ -41,43 +100,45 @@ impl<VC, E: StarkFriEngine<SC>> AppProver<VC, E> {
         self
     }
 
-    /// Generates proof for every continuation segment
-    pub fn generate_app_proof(&self, input: StdIn) -> ContinuationVmProof<SC>
-    where
-        VC: VmConfig<F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        assert!(
-            self.vm_config().system().continuation_enabled,
-            "Use generate_app_proof_without_continuations instead."
-        );
-        info_span!(
-            "app proof",
-            group = self
-                .program_name
-                .as_ref()
-                .unwrap_or(&"app_proof".to_string())
-        )
-        .in_scope(|| {
-            #[cfg(feature = "bench-metrics")]
-            metrics::counter!("fri.log_blowup")
-                .absolute(self.app_prover.pk.fri_params.log_blowup as u64);
-            ContinuationVmProver::prove(&self.app_prover, input)
+    /// Returns [AppExecutionCommit], which is a commitment to **both** the App VM and the App
+    /// VmExe.
+    pub fn app_commit(&self) -> AppExecutionCommit {
+        *self.app_execution_commit.get_or_init(|| {
+            AppExecutionCommit::compute::<E::SC>(
+                &self.instance().vm.config().as_ref().memory_config,
+                self.instance().exe(),
+                self.instance().program_commitment().clone(),
+                self.leaf_verifier_program_commit.clone(),
+            )
         })
     }
 
-    pub fn generate_app_proof_without_continuations(&self, input: StdIn) -> Proof<SC>
+    pub fn app_program_commit(&self) -> Com<E::SC> {
+        self.instance().program_commitment().clone()
+    }
+
+    /// Generates proof for every continuation segment
+    ///
+    /// This function internally calls [verify_app_proof] to verify the result before returning the
+    /// proof.
+    pub fn prove(
+        &mut self,
+        input: StdIn<Val<E::SC>>,
+    ) -> Result<ContinuationVmProof<E::SC>, VirtualMachineError>
     where
-        VC: VmConfig<F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
+        <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor: Executor<Val<E::SC>>
+            + MeteredExecutor<Val<E::SC>>
+            + PreflightExecutor<Val<E::SC>, VB::RecordArena>,
     {
         assert!(
-            !self.vm_config().system().continuation_enabled,
-            "Use generate_app_proof instead."
+            self.vm_config().as_ref().continuation_enabled,
+            "Use generate_app_proof_without_continuations instead."
         );
-        info_span!(
+        check_max_constraint_degrees(
+            self.vm_config().as_ref(),
+            &self.instance.vm.engine.fri_params(),
+        );
+        let proofs = info_span!(
             "app proof",
             group = self
                 .program_name
@@ -85,15 +146,85 @@ impl<VC, E: StarkFriEngine<SC>> AppProver<VC, E> {
                 .unwrap_or(&"app_proof".to_string())
         )
         .in_scope(|| {
-            #[cfg(feature = "bench-metrics")]
+            #[cfg(feature = "metrics")]
             metrics::counter!("fri.log_blowup")
-                .absolute(self.app_prover.pk.fri_params.log_blowup as u64);
-            SingleSegmentVmProver::prove(&self.app_prover, input)
-        })
+                .absolute(self.instance.vm.engine.fri_params().log_blowup as u64);
+            ContinuationVmProver::prove(&mut self.instance, input)
+        })?;
+        // We skip verification of the user public values proof here because it is directly computed
+        // from the merkle tree above
+        let res = verify_segments(
+            &self.instance.vm.engine,
+            &self.app_vm_vk,
+            &proofs.per_segment,
+        )?;
+        let app_exe_commit_u32s = self.app_commit().app_exe_commit.to_u32_digest();
+        let exe_commit_u32s = res.exe_commit.map(|x| x.as_canonical_u32());
+        if exe_commit_u32s != app_exe_commit_u32s {
+            return Err(VmVerificationError::ExeCommitMismatch {
+                expected: app_exe_commit_u32s,
+                actual: exe_commit_u32s,
+            }
+            .into());
+        }
+        Ok(proofs)
     }
 
     /// App VM config
-    pub fn vm_config(&self) -> &VC {
-        self.app_prover.vm_config()
+    pub fn vm_config(&self) -> &VB::VmConfig {
+        self.instance.vm.config()
     }
 }
+
+/// The payload of a verified guest VM execution with user public values extracted and
+/// verified.
+pub struct VerifiedAppArtifacts {
+    /// The Merklelized hash of:
+    /// - Program code commitment (commitment of the cached trace)
+    /// - Merkle root of the initial memory
+    /// - Starting program counter (`pc_start`)
+    ///
+    /// The Merklelization uses Poseidon2 as a cryptographic hash function (for the leaves)
+    /// and a cryptographic compression function (for internal nodes).
+    pub app_exe_commit: CommitBytes,
+    pub user_public_values: Vec<u8>,
+}
+
+/// Verifies the [ContinuationVmProof], which is a collection of STARK proofs as well as
+/// additional Merkle proof for user public values.
+///
+/// This function verifies the STARK proofs and additional conditions to ensure that the
+/// `proof` is a valid proof of guest VM execution that terminates successfully (exit code 0)
+/// _with respect to_ a commitment to some VM executable.
+/// It is the responsibility of the caller to check that the commitment matches the expected
+/// VM executable.
+pub fn verify_app_proof(
+    app_vk: &AppVerifyingKey,
+    proof: &ContinuationVmProof<SC>,
+) -> Result<VerifiedAppArtifacts, VmVerificationError> {
+    static POSEIDON2_HASHER: OnceLock<Poseidon2Hasher<F>> = OnceLock::new();
+    let engine = BabyBearPoseidon2Engine::new(app_vk.fri_params);
+    let VerifiedExecutionPayload {
+        exe_commit,
+        final_memory_root,
+    } = verify_segments(&engine, &app_vk.vk, &proof.per_segment)?;
+
+    proof.user_public_values.verify(
+        POSEIDON2_HASHER.get_or_init(vm_poseidon2_hasher),
+        app_vk.memory_dimensions,
+        final_memory_root,
+    )?;
+
+    let app_exe_commit = CommitBytes::from_u32_digest(&exe_commit.map(|x| x.as_canonical_u32()));
+    // The user public values address space has cells have type u8
+    let user_public_values = proof
+        .user_public_values
+        .public_values
+        .iter()
+        .map(|x| x.as_canonical_u32().try_into().unwrap())
+        .collect_vec();
+    Ok(VerifiedAppArtifacts {
+        app_exe_commit,
+        user_public_values,
+    })
+}
diff --git a/crates/sdk/src/prover/mod.rs b/crates/sdk/src/prover/mod.rs
index 67ccfe1eb8..e0010c4d97 100644
--- a/crates/sdk/src/prover/mod.rs
+++ b/crates/sdk/src/prover/mod.rs
@@ -2,6 +2,7 @@ mod agg;
 mod app;
 #[cfg(feature = "evm-prove")]
 mod halo2;
+#[cfg(feature = "evm-prove")]
 mod root;
 mod stark;
 pub mod vm;
@@ -12,6 +13,7 @@ pub use app::*;
 pub use evm::*;
 #[cfg(feature = "evm-prove")]
 pub use halo2::*;
+#[cfg(feature = "evm-prove")]
 pub use root::*;
 pub use stark::*;
 
@@ -19,60 +21,84 @@ pub use stark::*;
 mod evm {
     use std::sync::Arc;
 
-    use openvm_circuit::arch::VmConfig;
+    use openvm_circuit::arch::{
+        instructions::exe::VmExe, Executor, MeteredExecutor, PreflightExecutor,
+        VirtualMachineError, VmBuilder, VmExecutionConfig,
+    };
+    use openvm_native_circuit::NativeConfig;
     use openvm_native_recursion::halo2::utils::Halo2ParamsReader;
-    use openvm_stark_sdk::{engine::StarkFriEngine, openvm_stark_backend::Chip};
+    use openvm_stark_sdk::engine::StarkFriEngine;
 
     use super::{Halo2Prover, StarkProver};
     use crate::{
         config::AggregationTreeConfig,
-        keygen::{AggProvingKey, AppProvingKey},
+        keygen::{AggProvingKey, AppProvingKey, Halo2ProvingKey},
         stdin::StdIn,
         types::EvmProof,
-        NonRootCommittedExe, F, SC,
+        F, SC,
     };
 
-    pub struct EvmHalo2Prover<VC, E: StarkFriEngine<SC>> {
-        pub stark_prover: StarkProver<VC, E>,
+    pub struct EvmHalo2Prover<E, VB, NativeBuilder>
+    where
+        E: StarkFriEngine<SC = SC>,
+        VB: VmBuilder<E>,
+        NativeBuilder: VmBuilder<E, VmConfig = NativeConfig>,
+    {
+        pub stark_prover: StarkProver<E, VB, NativeBuilder>,
         pub halo2_prover: Halo2Prover,
     }
 
-    impl<VC, E: StarkFriEngine<SC>> EvmHalo2Prover<VC, E> {
+    impl<E, VB, NativeBuilder> EvmHalo2Prover<E, VB, NativeBuilder>
+    where
+        E: StarkFriEngine<SC = SC>,
+        VB: VmBuilder<E>,
+        <VB::VmConfig as VmExecutionConfig<F>>::Executor: Executor<F>
+            + MeteredExecutor<F>
+            + PreflightExecutor<F, <VB as VmBuilder<E>>::RecordArena>,
+        NativeBuilder: VmBuilder<E, VmConfig = NativeConfig> + Clone,
+        <NativeConfig as VmExecutionConfig<F>>::Executor:
+            PreflightExecutor<F, <NativeBuilder as VmBuilder<E>>::RecordArena>,
+    {
+        #[allow(clippy::too_many_arguments)]
         pub fn new(
             reader: &impl Halo2ParamsReader,
-            app_pk: Arc<AppProvingKey<VC>>,
-            app_committed_exe: Arc<NonRootCommittedExe>,
-            agg_pk: AggProvingKey,
+            app_vm_builder: VB,
+            native_builder: NativeBuilder,
+            app_pk: &AppProvingKey<VB::VmConfig>,
+            app_exe: Arc<VmExe<F>>,
+            agg_pk: &AggProvingKey,
+            halo2_pk: Halo2ProvingKey,
             agg_tree_config: AggregationTreeConfig,
-        ) -> Self
-        where
-            VC: VmConfig<F>,
-        {
-            let AggProvingKey {
-                agg_stark_pk,
-                halo2_pk,
-            } = agg_pk;
-            let stark_prover =
-                StarkProver::new(app_pk, app_committed_exe, agg_stark_pk, agg_tree_config);
-            Self {
+        ) -> Result<Self, VirtualMachineError> {
+            let stark_prover = StarkProver::new(
+                app_vm_builder,
+                native_builder,
+                app_pk,
+                app_exe,
+                agg_pk,
+                agg_tree_config,
+            )?;
+            Ok(Self {
                 stark_prover,
                 halo2_prover: Halo2Prover::new(reader, halo2_pk),
-            }
+            })
         }
 
+        pub fn with_program_name(mut self, program_name: impl AsRef<str>) -> Self {
+            self.set_program_name(program_name);
+            self
+        }
         pub fn set_program_name(&mut self, program_name: impl AsRef<str>) -> &mut Self {
             self.stark_prover.set_program_name(program_name);
             self
         }
 
-        pub fn generate_proof_for_evm(&self, input: StdIn) -> EvmProof
-        where
-            VC: VmConfig<F>,
-            VC::Executor: Chip<SC>,
-            VC::Periphery: Chip<SC>,
-        {
-            let root_proof = self.stark_prover.generate_proof_for_outer_recursion(input);
-            self.halo2_prover.prove_for_evm(&root_proof)
+        pub fn prove_evm(&mut self, input: StdIn) -> Result<EvmProof, VirtualMachineError> {
+            let root_proof = self
+                .stark_prover
+                .generate_proof_for_outer_recursion(input)?;
+            let evm_proof = self.halo2_prover.prove_for_evm(&root_proof);
+            Ok(evm_proof)
         }
     }
 }
diff --git a/crates/sdk/src/prover/root.rs b/crates/sdk/src/prover/root.rs
index 6e69aa0f13..e0bf91624c 100644
--- a/crates/sdk/src/prover/root.rs
+++ b/crates/sdk/src/prover/root.rs
@@ -1,89 +1,194 @@
-use async_trait::async_trait;
-use openvm_circuit::arch::{SingleSegmentVmExecutor, Streams};
+use getset::Getters;
+use itertools::zip_eq;
+use openvm_circuit::arch::{
+    GenerationError, PreflightExecutionOutput, SingleSegmentVmProver, Streams, VirtualMachine,
+    VirtualMachineError, VmInstance,
+};
 use openvm_continuations::verifier::root::types::RootVmVerifierInput;
-use openvm_native_circuit::NativeConfig;
+use openvm_native_circuit::{NativeConfig, NativeCpuBuilder, NATIVE_MAX_TRACE_HEIGHTS};
 use openvm_native_recursion::hints::Hintable;
 use openvm_stark_sdk::{
     config::{baby_bear_poseidon2_root::BabyBearPoseidon2RootEngine, FriParameters},
-    engine::{StarkEngine, StarkFriEngine},
+    engine::StarkEngine,
     openvm_stark_backend::proof::Proof,
 };
 
 use crate::{
-    keygen::RootVerifierProvingKey,
-    prover::vm::{AsyncSingleSegmentVmProver, SingleSegmentVmProver},
+    keygen::{perm::AirIdPermutation, RootVerifierProvingKey},
+    prover::vm::new_local_prover,
     RootSC, F, SC,
 };
 
 /// Local prover for a root verifier.
+#[derive(Getters)]
 pub struct RootVerifierLocalProver {
-    pub root_verifier_pk: RootVerifierProvingKey,
-    executor_for_heights: SingleSegmentVmExecutor<F, NativeConfig>,
+    /// The proving key in `inner` should always have ordering of AIRs in the sorted order by fixed
+    /// trace heights outside of the `prove` function.
+    // This is CPU-only for now because it uses RootSC
+    inner: VmInstance<BabyBearPoseidon2RootEngine, NativeCpuBuilder>,
+    /// The constant trace heights, ordered by AIR ID (the original ordering from VmConfig).
+    #[getset(get = "pub")]
+    fixed_air_heights: Vec<u32>,
+    air_id_perm: AirIdPermutation,
+    air_id_inv_perm: AirIdPermutation,
 }
 
 impl RootVerifierLocalProver {
-    pub fn new(root_verifier_pk: RootVerifierProvingKey) -> Self {
-        let executor_for_heights =
-            SingleSegmentVmExecutor::<F, _>::new(root_verifier_pk.vm_pk.vm_config.clone());
-        Self {
-            root_verifier_pk,
-            executor_for_heights,
+    pub fn new(root_verifier_pk: &RootVerifierProvingKey) -> Result<Self, VirtualMachineError> {
+        let inner = new_local_prover(
+            NativeCpuBuilder,
+            &root_verifier_pk.vm_pk,
+            root_verifier_pk.root_committed_exe.exe.clone(),
+        )?;
+        let fixed_air_heights = root_verifier_pk.air_heights.clone();
+        let air_id_perm = AirIdPermutation::compute(&fixed_air_heights);
+        let mut inverse_perm = vec![0usize; air_id_perm.perm.len()];
+        for (i, &perm_i) in air_id_perm.perm.iter().enumerate() {
+            inverse_perm[perm_i] = i;
         }
-    }
-    pub fn execute_for_air_heights(&self, input: RootVmVerifierInput<SC>) -> Vec<usize> {
-        let result = self
-            .executor_for_heights
-            .execute_and_compute_heights(
-                self.root_verifier_pk.root_committed_exe.exe.clone(),
-                input.write(),
-            )
-            .unwrap();
-        result.air_heights
+        let air_id_inv_perm = AirIdPermutation { perm: inverse_perm };
+
+        Ok(Self {
+            inner,
+            fixed_air_heights,
+            air_id_perm,
+            air_id_inv_perm,
+        })
     }
     pub fn vm_config(&self) -> &NativeConfig {
-        &self.root_verifier_pk.vm_pk.vm_config
+        self.inner.vm.config()
     }
     #[allow(dead_code)]
     pub(crate) fn fri_params(&self) -> &FriParameters {
-        &self.root_verifier_pk.vm_pk.fri_params
+        &self.inner.vm.engine.fri_params
     }
-}
 
-impl SingleSegmentVmProver<RootSC> for RootVerifierLocalProver {
-    fn prove(&self, input: impl Into<Streams<F>>) -> Proof<RootSC> {
-        let input = input.into();
-        let mut vm = SingleSegmentVmExecutor::new(self.vm_config().clone());
-        vm.set_override_trace_heights(self.root_verifier_pk.vm_heights.clone());
-        let mut proof_input = vm
-            .execute_and_generate(self.root_verifier_pk.root_committed_exe.clone(), input)
-            .unwrap();
-        assert_eq!(
-            proof_input.per_air.len(),
-            self.root_verifier_pk.air_heights.len(),
-            "All AIRs of root verifier should present"
-        );
-        proof_input.per_air.iter().for_each(|(air_id, input)| {
-            assert_eq!(
-                input.main_trace_height(),
-                self.root_verifier_pk.air_heights[*air_id],
-                "Trace height doesn't match"
-            );
-        });
-        // Reorder the AIRs by heights.
-        let air_id_perm = self.root_verifier_pk.air_id_permutation();
-        air_id_perm.permute(&mut proof_input.per_air);
-        for i in 0..proof_input.per_air.len() {
-            // Overwrite the AIR ID.
-            proof_input.per_air[i].0 = i;
+    pub fn execute_for_air_heights(
+        &mut self,
+        input: RootVmVerifierInput<SC>,
+    ) -> Result<Vec<u32>, VirtualMachineError> {
+        let exe = self.inner.exe().clone();
+        // See `SingleSegmentVmProver::prove` for explanation
+        let vm = &mut self.inner.vm;
+        Self::permute_pk(vm, &self.air_id_inv_perm);
+        assert!(!vm.config().as_ref().continuation_enabled);
+        let input = input.write();
+        let state = vm.create_initial_state(&exe, input);
+        vm.transport_init_memory_to_device(&state.memory);
+        let PreflightExecutionOutput {
+            system_records,
+            record_arenas,
+            ..
+        } = vm.execute_preflight(
+            &mut self.inner.interpreter,
+            state,
+            None,
+            NATIVE_MAX_TRACE_HEIGHTS,
+        )?;
+        // Note[jpw]: we could in theory extract trace heights from just preflight execution, but
+        // that requires special logic in the chips so we will just generate the traces for now
+        let ctx = vm.generate_proving_ctx(system_records, record_arenas)?;
+        let air_heights = ctx
+            .per_air
+            .iter()
+            .map(|(_, air_ctx)| air_ctx.main_trace_height() as u32)
+            .collect();
+        Self::permute_pk(vm, &self.air_id_perm);
+        Ok(air_heights)
+    }
+
+    // ATTENTION: this must exactly match the permutation done in
+    // `AggStarkProvingKey::dummy_proof_and_keygen` except on DeviceMultiStarkProvingKey.
+    fn permute_pk(
+        vm: &mut VirtualMachine<BabyBearPoseidon2RootEngine, NativeCpuBuilder>,
+        perm: &AirIdPermutation,
+    ) {
+        perm.permute(&mut vm.pk_mut().per_air);
+        for thc in &mut vm.pk_mut().trace_height_constraints {
+            perm.permute(&mut thc.coefficients);
         }
-        let e = BabyBearPoseidon2RootEngine::new(*self.fri_params());
-        e.prove(&self.root_verifier_pk.vm_pk.vm_pk, proof_input)
     }
 }
 
-#[async_trait]
-impl AsyncSingleSegmentVmProver<RootSC> for RootVerifierLocalProver {
-    async fn prove(&self, input: impl Into<Streams<F>> + Send + Sync) -> Proof<RootSC> {
-        SingleSegmentVmProver::prove(self, input)
+impl SingleSegmentVmProver<RootSC> for RootVerifierLocalProver {
+    // @dev: If this implementation is generalized to prover backends not using MatrixRecordArena,
+    // then it must be ensured that:
+    // - the Native extension chips can ensure that, if the record arenas have
+    //   `force_matrix_dimensions()` set, then the record arena capacity heights must equal the
+    //   trace matrix heights.
+    // - any chips that do not use record arenas (currently system memory chips) have a way to force
+    //   trace heights as well. We currently use the fact that all non-system periphery chips have
+    //   fixed height (in particular, there is no Poseidon2PeripheryChip).
+    fn prove(
+        &mut self,
+        input: impl Into<Streams<F>>,
+        _: &[u32],
+    ) -> Result<Proof<RootSC>, VirtualMachineError> {
+        assert!(!self.vm_config().as_ref().continuation_enabled);
+        // The following is unrolled from SingleSegmentVmProver for VmLocalProver and
+        // VirtualMachine::prove to add special logic around ensuring trace heights are fixed and
+        // then reordering the trace matrices so the heights are sorted.
+        self.inner.reset_state(input);
+        let state = self
+            .inner
+            .state_mut()
+            .take()
+            .expect("State should always be present");
+        let vm = &mut self.inner.vm;
+        // The root_verifier_pk has the AIRs ordered by the fixed AIR height sorted ordering, but
+        // execute_preflight and generate_proving_ctx still expect the original AIR ID ordering from
+        // VmConfig, so we apply the inverse permutation here, and then undo it after tracegen. This
+        // could maybe be replaced by only changing `executor_idx_to_air_idx`, but applying the
+        // permutation is conceptually simpler to track.
+        Self::permute_pk(vm, &self.air_id_inv_perm);
+        assert!(!vm.config().as_ref().continuation_enabled);
+        vm.transport_init_memory_to_device(&state.memory);
+
+        let trace_heights = &self.fixed_air_heights;
+        let PreflightExecutionOutput {
+            system_records,
+            mut record_arenas,
+            to_state,
+        } = vm.execute_preflight(&mut self.inner.interpreter, state, None, trace_heights)?;
+        // record_arenas are created with capacity specified by trace_heights. we must ensure
+        // `generate_proving_ctx` does not resize the trace matrices to make them smaller:
+        for ra in &mut record_arenas {
+            ra.force_matrix_dimensions();
+        }
+        vm.override_system_trace_heights(trace_heights);
+
+        let mut ctx = vm.generate_proving_ctx(system_records, record_arenas)?;
+        // Sanity check: ensure all generated trace matrices actually match the fixed heights.
+        for (air_idx, (fixed_height, (idx, air_ctx))) in
+            zip_eq(trace_heights, &ctx.per_air).enumerate()
+        {
+            let fixed_height = *fixed_height as usize;
+            if air_idx != *idx {
+                return Err(GenerationError::ForceTraceHeightIncorrect {
+                    air_idx,
+                    actual: 0,
+                    expected: fixed_height,
+                }
+                .into());
+            }
+            if fixed_height != air_ctx.main_trace_height() {
+                return Err(GenerationError::ForceTraceHeightIncorrect {
+                    air_idx,
+                    actual: air_ctx.main_trace_height(),
+                    expected: fixed_height,
+                }
+                .into());
+            }
+        }
+        // Reorder the AIRs by heights.
+        self.air_id_perm.permute(&mut ctx.per_air);
+        for (i, (air_idx, _)) in ctx.per_air.iter_mut().enumerate() {
+            *air_idx = i;
+        }
+        // We also undo the permutation on pk because `prove` needs pk and ctx ordering to match.
+        Self::permute_pk(vm, &self.air_id_perm);
+        let proof = vm.engine.prove(vm.pk(), ctx);
+        *self.inner.state_mut() = Some(to_state);
+        Ok(proof)
     }
 }
diff --git a/crates/sdk/src/prover/stark.rs b/crates/sdk/src/prover/stark.rs
index fdec583f0f..49e234c604 100644
--- a/crates/sdk/src/prover/stark.rs
+++ b/crates/sdk/src/prover/stark.rs
@@ -1,85 +1,123 @@
 use std::sync::Arc;
 
-use openvm_circuit::arch::VmConfig;
-use openvm_continuations::verifier::{
-    internal::types::VmStarkProof, root::types::RootVmVerifierInput,
+use openvm_circuit::arch::{
+    instructions::exe::VmExe, Executor, MeteredExecutor, PreflightExecutor, VirtualMachineError,
+    VmBuilder, VmExecutionConfig,
 };
-use openvm_stark_backend::{proof::Proof, Chip};
+use openvm_continuations::verifier::internal::types::VmStarkProof;
+#[cfg(feature = "evm-prove")]
+use openvm_continuations::{verifier::root::types::RootVmVerifierInput, RootSC};
+use openvm_native_circuit::NativeConfig;
+#[cfg(feature = "evm-prove")]
+use openvm_stark_backend::proof::Proof;
 use openvm_stark_sdk::engine::StarkFriEngine;
 
 use crate::{
+    commit::AppExecutionCommit,
     config::AggregationTreeConfig,
-    keygen::{AggStarkProvingKey, AppProvingKey},
+    keygen::{AggProvingKey, AppProvingKey},
     prover::{agg::AggStarkProver, app::AppProver},
-    NonRootCommittedExe, RootSC, StdIn, F, SC,
+    StdIn, F, SC,
 };
 
-pub struct StarkProver<VC, E: StarkFriEngine<SC>> {
-    pub app_prover: AppProver<VC, E>,
-    pub agg_prover: AggStarkProver<E>,
+/// This prover contains an [`app_prover`](StarkProver::app_prover) internally.
+pub struct StarkProver<E, VB, NativeBuilder>
+where
+    E: StarkFriEngine<SC = SC>,
+    VB: VmBuilder<E>,
+    NativeBuilder: VmBuilder<E, VmConfig = NativeConfig>,
+{
+    pub app_prover: AppProver<E, VB>,
+    pub agg_prover: AggStarkProver<E, NativeBuilder>,
 }
-impl<VC, E: StarkFriEngine<SC>> StarkProver<VC, E> {
+impl<E, VB, NativeBuilder> StarkProver<E, VB, NativeBuilder>
+where
+    E: StarkFriEngine<SC = SC>,
+    VB: VmBuilder<E>,
+    <VB::VmConfig as VmExecutionConfig<F>>::Executor:
+        Executor<F> + MeteredExecutor<F> + PreflightExecutor<F, <VB as VmBuilder<E>>::RecordArena>,
+    NativeBuilder: VmBuilder<E, VmConfig = NativeConfig> + Clone,
+    <NativeConfig as VmExecutionConfig<F>>::Executor:
+        PreflightExecutor<F, <NativeBuilder as VmBuilder<E>>::RecordArena>,
+{
     pub fn new(
-        app_pk: Arc<AppProvingKey<VC>>,
-        app_committed_exe: Arc<NonRootCommittedExe>,
-        agg_stark_pk: AggStarkProvingKey,
+        app_vm_builder: VB,
+        native_builder: NativeBuilder,
+        app_pk: &AppProvingKey<VB::VmConfig>,
+        app_exe: Arc<VmExe<F>>,
+        agg_pk: &AggProvingKey,
         agg_tree_config: AggregationTreeConfig,
-    ) -> Self
-    where
-        VC: VmConfig<F>,
-    {
+    ) -> Result<Self, VirtualMachineError> {
         assert_eq!(
-            app_pk.leaf_fri_params, agg_stark_pk.leaf_vm_pk.fri_params,
+            app_pk.leaf_fri_params, agg_pk.leaf_vm_pk.fri_params,
             "App VM is incompatible with Agg VM because of leaf FRI parameters"
         );
         assert_eq!(
-            app_pk.app_vm_pk.vm_config.system().num_public_values,
-            agg_stark_pk.num_user_public_values(),
+            app_pk.app_vm_pk.vm_config.as_ref().num_public_values,
+            agg_pk.num_user_public_values(),
             "App VM is incompatible with Agg VM  because of the number of public values"
         );
 
-        Self {
-            app_prover: AppProver::new(app_pk.app_vm_pk.clone(), app_committed_exe),
+        Ok(Self {
+            app_prover: AppProver::new(
+                app_vm_builder,
+                &app_pk.app_vm_pk,
+                app_exe,
+                app_pk.leaf_committed_exe.get_program_commit(),
+            )?,
             agg_prover: AggStarkProver::new(
-                agg_stark_pk,
-                app_pk.leaf_committed_exe.clone(),
+                native_builder,
+                agg_pk,
+                app_pk.leaf_committed_exe.exe.clone(),
                 agg_tree_config,
-            ),
-        }
+            )?,
+        })
+    }
+
+    pub fn from_parts(
+        app_prover: AppProver<E, VB>,
+        agg_prover: AggStarkProver<E, NativeBuilder>,
+    ) -> Result<Self, VirtualMachineError> {
+        Ok(Self {
+            app_prover,
+            agg_prover,
+        })
+    }
+
+    pub fn with_program_name(mut self, program_name: impl AsRef<str>) -> Self {
+        self.set_program_name(program_name);
+        self
     }
     pub fn set_program_name(&mut self, program_name: impl AsRef<str>) -> &mut Self {
         self.app_prover.set_program_name(program_name);
         self
     }
-    pub fn generate_proof_for_outer_recursion(&self, input: StdIn) -> Proof<RootSC>
-    where
-        VC: VmConfig<F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let app_proof = self.app_prover.generate_app_proof(input);
-        self.agg_prover.generate_root_proof(app_proof)
-    }
 
-    pub fn generate_root_verifier_input(&self, input: StdIn) -> RootVmVerifierInput<SC>
-    where
-        VC: VmConfig<F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let app_proof = self.app_prover.generate_app_proof(input);
-        self.agg_prover.generate_root_verifier_input(app_proof)
+    pub fn app_commit(&self) -> AppExecutionCommit {
+        self.app_prover.app_commit()
     }
 
-    pub fn generate_e2e_stark_proof(&self, input: StdIn) -> VmStarkProof<SC>
-    where
-        VC: VmConfig<F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let app_proof = self.app_prover.generate_app_proof(input);
-        let leaf_proofs = self.agg_prover.generate_leaf_proofs(&app_proof);
+    pub fn prove(&mut self, input: StdIn) -> Result<VmStarkProof<SC>, VirtualMachineError> {
+        let app_proof = self.app_prover.prove(input)?;
+        let leaf_proofs = self.agg_prover.generate_leaf_proofs(&app_proof)?;
         self.agg_prover
             .aggregate_leaf_proofs(leaf_proofs, app_proof.user_public_values.public_values)
     }
+
+    #[cfg(feature = "evm-prove")]
+    pub fn generate_proof_for_outer_recursion(
+        &mut self,
+        input: StdIn,
+    ) -> Result<Proof<RootSC>, VirtualMachineError> {
+        let app_proof = self.app_prover.prove(input)?;
+        self.agg_prover.generate_root_proof(app_proof)
+    }
+    #[cfg(feature = "evm-prove")]
+    pub fn generate_root_verifier_input(
+        &mut self,
+        input: StdIn,
+    ) -> Result<RootVmVerifierInput<SC>, VirtualMachineError> {
+        let app_proof = self.app_prover.prove(input)?;
+        self.agg_prover.generate_root_verifier_input(app_proof)
+    }
 }
diff --git a/crates/sdk/src/prover/vm/local.rs b/crates/sdk/src/prover/vm/local.rs
deleted file mode 100644
index b56c6a1ad3..0000000000
--- a/crates/sdk/src/prover/vm/local.rs
+++ /dev/null
@@ -1,196 +0,0 @@
-use std::{marker::PhantomData, mem, sync::Arc};
-
-use async_trait::async_trait;
-use openvm_circuit::{
-    arch::{
-        hasher::poseidon2::vm_poseidon2_hasher, GenerationError, SingleSegmentVmExecutor, Streams,
-        VirtualMachine, VmComplexTraceHeights, VmConfig,
-    },
-    system::{memory::tree::public_values::UserPublicValuesProof, program::trace::VmCommittedExe},
-};
-use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
-    p3_field::PrimeField32,
-    proof::Proof,
-    Chip,
-};
-use openvm_stark_sdk::{config::FriParameters, engine::StarkFriEngine};
-use tracing::info_span;
-
-use crate::prover::vm::{
-    types::VmProvingKey, AsyncContinuationVmProver, AsyncSingleSegmentVmProver,
-    ContinuationVmProof, ContinuationVmProver, SingleSegmentVmProver,
-};
-
-pub struct VmLocalProver<SC: StarkGenericConfig, VC, E: StarkFriEngine<SC>> {
-    pub pk: Arc<VmProvingKey<SC, VC>>,
-    pub committed_exe: Arc<VmCommittedExe<SC>>,
-    overridden_heights: Option<VmComplexTraceHeights>,
-    _marker: PhantomData<E>,
-}
-
-impl<SC: StarkGenericConfig, VC, E: StarkFriEngine<SC>> VmLocalProver<SC, VC, E> {
-    pub fn new(pk: Arc<VmProvingKey<SC, VC>>, committed_exe: Arc<VmCommittedExe<SC>>) -> Self {
-        Self {
-            pk,
-            committed_exe,
-            overridden_heights: None,
-            _marker: PhantomData,
-        }
-    }
-
-    pub fn new_with_overridden_trace_heights(
-        pk: Arc<VmProvingKey<SC, VC>>,
-        committed_exe: Arc<VmCommittedExe<SC>>,
-        overridden_heights: Option<VmComplexTraceHeights>,
-    ) -> Self {
-        Self {
-            pk,
-            committed_exe,
-            overridden_heights,
-            _marker: PhantomData,
-        }
-    }
-
-    pub fn set_override_trace_heights(&mut self, overridden_heights: VmComplexTraceHeights) {
-        self.overridden_heights = Some(overridden_heights);
-    }
-
-    pub fn vm_config(&self) -> &VC {
-        &self.pk.vm_config
-    }
-    #[allow(dead_code)]
-    pub(crate) fn fri_params(&self) -> &FriParameters {
-        &self.pk.fri_params
-    }
-}
-
-const MAX_SEGMENTATION_RETRIES: usize = 4;
-
-impl<SC: StarkGenericConfig, VC: VmConfig<Val<SC>>, E: StarkFriEngine<SC>> ContinuationVmProver<SC>
-    for VmLocalProver<SC, VC, E>
-where
-    Val<SC>: PrimeField32,
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
-{
-    fn prove(&self, input: impl Into<Streams<Val<SC>>>) -> ContinuationVmProof<SC> {
-        assert!(self.pk.vm_config.system().continuation_enabled);
-        let e = E::new(self.pk.fri_params);
-        let trace_height_constraints = self.pk.vm_pk.trace_height_constraints.clone();
-        let mut vm = VirtualMachine::new_with_overridden_trace_heights(
-            e,
-            self.pk.vm_config.clone(),
-            self.overridden_heights.clone(),
-        );
-        vm.set_trace_height_constraints(trace_height_constraints.clone());
-        let mut final_memory = None;
-        let VmCommittedExe {
-            exe,
-            committed_program,
-        } = self.committed_exe.as_ref();
-        let input = input.into();
-
-        // This loop should typically iterate exactly once. Only in exceptional cases will the
-        // segmentation produce an invalid segment and we will have to retry.
-        let mut retries = 0;
-        let per_segment = loop {
-            match vm.executor.execute_and_then(
-                exe.clone(),
-                input.clone(),
-                |seg_idx, mut seg| {
-                    final_memory = mem::take(&mut seg.final_memory);
-                    let proof_input = info_span!("trace_gen", segment = seg_idx)
-                        .in_scope(|| seg.generate_proof_input(Some(committed_program.clone())))?;
-                    info_span!("prove_segment", segment = seg_idx)
-                        .in_scope(|| Ok(vm.engine.prove(&self.pk.vm_pk, proof_input)))
-                },
-                GenerationError::Execution,
-            ) {
-                Ok(per_segment) => break per_segment,
-                Err(GenerationError::Execution(err)) => panic!("execution error: {err}"),
-                Err(GenerationError::TraceHeightsLimitExceeded) => {
-                    if retries >= MAX_SEGMENTATION_RETRIES {
-                        panic!(
-                            "trace heights limit exceeded after {MAX_SEGMENTATION_RETRIES} retries"
-                        );
-                    }
-                    retries += 1;
-                    tracing::info!(
-                        "trace heights limit exceeded; retrying execution (attempt {retries})"
-                    );
-                    let sys_config = vm.executor.config.system_mut();
-                    let new_seg_strat = sys_config.segmentation_strategy.stricter_strategy();
-                    sys_config.set_segmentation_strategy(new_seg_strat);
-                    // continue
-                }
-            };
-        };
-
-        let user_public_values = UserPublicValuesProof::compute(
-            self.pk.vm_config.system().memory_config.memory_dimensions(),
-            self.pk.vm_config.system().num_public_values,
-            &vm_poseidon2_hasher(),
-            final_memory.as_ref().unwrap(),
-        );
-        ContinuationVmProof {
-            per_segment,
-            user_public_values,
-        }
-    }
-}
-
-#[async_trait]
-impl<SC: StarkGenericConfig, VC: VmConfig<Val<SC>>, E: StarkFriEngine<SC>>
-    AsyncContinuationVmProver<SC> for VmLocalProver<SC, VC, E>
-where
-    VmLocalProver<SC, VC, E>: Send + Sync,
-    Val<SC>: PrimeField32,
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
-{
-    async fn prove(
-        &self,
-        input: impl Into<Streams<Val<SC>>> + Send + Sync,
-    ) -> ContinuationVmProof<SC> {
-        ContinuationVmProver::prove(self, input)
-    }
-}
-
-impl<SC: StarkGenericConfig, VC: VmConfig<Val<SC>>, E: StarkFriEngine<SC>> SingleSegmentVmProver<SC>
-    for VmLocalProver<SC, VC, E>
-where
-    Val<SC>: PrimeField32,
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
-{
-    fn prove(&self, input: impl Into<Streams<Val<SC>>>) -> Proof<SC> {
-        assert!(!self.pk.vm_config.system().continuation_enabled);
-        let e = E::new(self.pk.fri_params);
-        // note: use SingleSegmentVmExecutor so there's not a "segment" label in metrics
-        let executor = {
-            let mut executor = SingleSegmentVmExecutor::new(self.pk.vm_config.clone());
-            executor.set_trace_height_constraints(self.pk.vm_pk.trace_height_constraints.clone());
-            executor
-        };
-        let proof_input = executor
-            .execute_and_generate(self.committed_exe.clone(), input)
-            .unwrap();
-        let vm = VirtualMachine::new(e, executor.config);
-        vm.prove_single(&self.pk.vm_pk, proof_input)
-    }
-}
-
-#[async_trait]
-impl<SC: StarkGenericConfig, VC: VmConfig<Val<SC>>, E: StarkFriEngine<SC>>
-    AsyncSingleSegmentVmProver<SC> for VmLocalProver<SC, VC, E>
-where
-    VmLocalProver<SC, VC, E>: Send + Sync,
-    Val<SC>: PrimeField32,
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
-{
-    async fn prove(&self, input: impl Into<Streams<Val<SC>>> + Send + Sync) -> Proof<SC> {
-        SingleSegmentVmProver::prove(self, input)
-    }
-}
diff --git a/crates/sdk/src/prover/vm/mod.rs b/crates/sdk/src/prover/vm/mod.rs
index bc79d7b30c..2eca5c6281 100644
--- a/crates/sdk/src/prover/vm/mod.rs
+++ b/crates/sdk/src/prover/vm/mod.rs
@@ -1,34 +1,28 @@
-use async_trait::async_trait;
-use openvm_circuit::arch::{ContinuationVmProof, Streams};
-use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
-    proof::Proof,
-};
-
-pub mod local;
-pub mod types;
+use std::sync::Arc;
 
-/// Prover for a specific exe in a specific continuation VM using a specific Stark config.
-pub trait ContinuationVmProver<SC: StarkGenericConfig> {
-    fn prove(&self, input: impl Into<Streams<Val<SC>>>) -> ContinuationVmProof<SC>;
-}
+use openvm_circuit::arch::{
+    instructions::exe::VmExe, VirtualMachine, VirtualMachineError, VmBuilder, VmInstance,
+};
+use openvm_stark_backend::{config::Val, prover::hal::DeviceDataTransporter};
+use openvm_stark_sdk::engine::StarkFriEngine;
 
-/// Async prover for a specific exe in a specific continuation VM using a specific Stark config.
-#[async_trait]
-pub trait AsyncContinuationVmProver<SC: StarkGenericConfig> {
-    async fn prove(
-        &self,
-        input: impl Into<Streams<Val<SC>>> + Send + Sync,
-    ) -> ContinuationVmProof<SC>;
-}
+use crate::prover::vm::types::VmProvingKey;
 
-/// Prover for a specific exe in a specific single-segment VM using a specific Stark config.
-pub trait SingleSegmentVmProver<SC: StarkGenericConfig> {
-    fn prove(&self, input: impl Into<Streams<Val<SC>>>) -> Proof<SC>;
-}
+pub mod types;
 
-/// Async prover for a specific exe in a specific single-segment VM using a specific Stark config.
-#[async_trait]
-pub trait AsyncSingleSegmentVmProver<SC: StarkGenericConfig> {
-    async fn prove(&self, input: impl Into<Streams<Val<SC>>> + Send + Sync) -> Proof<SC>;
+pub fn new_local_prover<E, VB>(
+    vm_builder: VB,
+    vm_pk: &VmProvingKey<E::SC, VB::VmConfig>,
+    exe: Arc<VmExe<Val<E::SC>>>,
+) -> Result<VmInstance<E, VB>, VirtualMachineError>
+where
+    E: StarkFriEngine,
+    VB: VmBuilder<E>,
+{
+    let engine = E::new(vm_pk.fri_params);
+    let d_pk = engine.device().transport_pk_to_device(&vm_pk.vm_pk);
+    let vm = VirtualMachine::new(engine, vm_builder, vm_pk.vm_config.clone(), d_pk)?;
+    let cached_program_trace = vm.commit_program_on_device(&exe.program);
+    let instance = VmInstance::new(vm, exe, cached_program_trace)?;
+    Ok(instance)
 }
diff --git a/crates/sdk/src/prover/vm/types.rs b/crates/sdk/src/prover/vm/types.rs
index 2e4ec41f11..c4ed753a7c 100644
--- a/crates/sdk/src/prover/vm/types.rs
+++ b/crates/sdk/src/prover/vm/types.rs
@@ -6,7 +6,7 @@ use openvm_stark_backend::{
 use openvm_stark_sdk::config::FriParameters;
 use serde::{Deserialize, Serialize};
 
-///Proving key for a specific VM.
+/// Proving key for a specific VM.
 #[derive(Serialize, Deserialize, Derivative)]
 #[serde(bound(
     serialize = "MultiStarkProvingKey<SC>: Serialize, VC: Serialize",
diff --git a/crates/sdk/src/stdin.rs b/crates/sdk/src/stdin.rs
index 9101e8d4de..db5bfbb52e 100644
--- a/crates/sdk/src/stdin.rs
+++ b/crates/sdk/src/stdin.rs
@@ -4,18 +4,16 @@ use std::{
 };
 
 use openvm_circuit::arch::Streams;
-use openvm_stark_backend::p3_field::FieldAlgebra;
+use openvm_stark_backend::p3_field::Field;
 use serde::{Deserialize, Serialize};
 
-use crate::F;
-
 #[derive(Clone, Default, Serialize, Deserialize)]
-pub struct StdIn {
+pub struct StdIn<F = crate::F> {
     pub buffer: VecDeque<Vec<F>>,
     pub kv_store: HashMap<Vec<u8>, Vec<u8>>,
 }
 
-impl StdIn {
+impl<F: Field> StdIn<F> {
     pub fn from_bytes(data: &[u8]) -> Self {
         let mut ret = Self::default();
         ret.write_bytes(data);
@@ -45,8 +43,8 @@ impl StdIn {
     }
 }
 
-impl From<StdIn> for Streams<F> {
-    fn from(mut std_in: StdIn) -> Self {
+impl<F: Field> From<StdIn<F>> for Streams<F> {
+    fn from(mut std_in: StdIn<F>) -> Self {
         let mut data = Vec::<Vec<F>>::new();
         while let Some(input) = std_in.read() {
             data.push(input);
@@ -57,9 +55,9 @@ impl From<StdIn> for Streams<F> {
     }
 }
 
-impl From<Vec<Vec<F>>> for StdIn {
+impl<F: Field> From<Vec<Vec<F>>> for StdIn<F> {
     fn from(inputs: Vec<Vec<F>>) -> Self {
-        let mut ret = StdIn::default();
+        let mut ret = StdIn::<F>::default();
         for input in inputs {
             ret.write_field(&input);
         }
diff --git a/crates/sdk/src/types.rs b/crates/sdk/src/types.rs
index d83140a5ae..3f4e39d7f4 100644
--- a/crates/sdk/src/types.rs
+++ b/crates/sdk/src/types.rs
@@ -1,22 +1,26 @@
-use std::io::Cursor;
+use std::{io::Cursor, sync::Arc};
 
+use derive_more::derive::From;
 use eyre::Result;
+use openvm::platform::memory::MEM_SIZE;
+use openvm_circuit::arch::instructions::exe::VmExe;
 use openvm_continuations::{verifier::internal::types::VmStarkProof, SC};
 use openvm_stark_backend::proof::Proof;
+use openvm_transpiler::elf::Elf;
 use serde::{Deserialize, Serialize};
 use serde_with::serde_as;
 #[cfg(feature = "evm-prove")]
 use {
-    crate::commit::CommitBytes,
+    crate::commit::{AppExecutionCommit, CommitBytes},
     itertools::Itertools,
     openvm_native_recursion::halo2::{wrapper::EvmVerifierByteCode, Fr, RawEvmProof},
-    std::iter::{once, repeat},
+    std::iter::{once, repeat_n},
     thiserror::Error,
 };
 
 use crate::{
     codec::{decode_vec, encode_slice, Decode, Encode},
-    commit::AppExecutionCommit,
+    OPENVM_VERSION,
 };
 
 /// Number of bytes in a Bn254Fr.
@@ -27,6 +31,25 @@ pub const NUM_BN254_ACCUMULATOR: usize = 12;
 #[cfg(feature = "evm-prove")]
 const NUM_BN254_PROOF: usize = 43;
 
+#[derive(From)]
+pub enum ExecutableFormat {
+    Elf(Elf),
+    VmExe(VmExe<crate::F>),
+    SharedVmExe(Arc<VmExe<crate::F>>),
+}
+
+impl<'a> From<&'a [u8]> for ExecutableFormat {
+    fn from(bytes: &'a [u8]) -> Self {
+        let elf = Elf::decode(bytes, MEM_SIZE.try_into().unwrap()).expect("Invalid ELF bytes");
+        ExecutableFormat::Elf(elf)
+    }
+}
+impl From<Vec<u8>> for ExecutableFormat {
+    fn from(bytes: Vec<u8>) -> Self {
+        ExecutableFormat::from(&bytes[..])
+    }
+}
+
 #[cfg(feature = "evm-prove")]
 #[derive(Clone, Debug, Serialize, Deserialize)]
 pub struct EvmHalo2Verifier {
@@ -52,6 +75,9 @@ pub struct ProofData {
 #[serde_as]
 #[derive(Clone, Debug, Deserialize, Serialize)]
 pub struct EvmProof {
+    /// The openvm major and minor version v{}.{}. The proof format will not change on patch
+    /// versions.
+    pub version: String,
     #[serde(flatten)]
     /// Bn254Fr public value app commits.
     pub app_commit: AppExecutionCommit,
@@ -88,6 +114,7 @@ impl EvmProof {
             user_public_values,
             app_commit,
             proof_data,
+            version: _,
         } = self;
 
         let ProofData { accumulator, proof } = proof_data;
@@ -151,6 +178,7 @@ impl TryFrom<RawEvmProof> for EvmProof {
         };
 
         Ok(Self {
+            version: format!("v{}", OPENVM_VERSION),
             app_commit,
             user_public_values,
             proof_data: ProofData {
@@ -169,6 +197,7 @@ impl TryFrom<EvmProof> for RawEvmProof {
             mut app_commit,
             user_public_values,
             proof_data,
+            version: _,
         } = evm_openvm_proof;
 
         app_commit.app_exe_commit.reverse();
@@ -195,7 +224,7 @@ impl TryFrom<EvmProof> for RawEvmProof {
 
             let user_public_values = user_public_values
                 .into_iter()
-                .flat_map(|byte| once(byte).chain(repeat(0).take(31)))
+                .flat_map(|byte| once(byte).chain(repeat_n(0, 31)))
                 .collect::<Vec<_>>();
 
             let mut ret = Vec::new();
@@ -215,33 +244,35 @@ impl TryFrom<EvmProof> for RawEvmProof {
     }
 }
 
+/// Struct purely for encoding and decoding of [VmStarkProof].
 #[serde_as]
 #[derive(Clone, Debug, Deserialize, Serialize)]
-pub struct VmStarkProofBytes {
-    #[serde(flatten)]
-    pub app_commit: AppExecutionCommit,
+pub struct VersionedVmStarkProof {
+    /// The openvm major and minor version v{}.{}. The proof format will not change on patch
+    /// versions.
+    pub version: String,
     #[serde_as(as = "serde_with::hex::Hex")]
     pub user_public_values: Vec<u8>,
     #[serde_as(as = "serde_with::hex::Hex")]
     pub proof: Vec<u8>,
 }
 
-impl VmStarkProofBytes {
-    pub fn new(app_commit: AppExecutionCommit, proof: VmStarkProof<SC>) -> Result<Self> {
+impl VersionedVmStarkProof {
+    pub fn new(proof: VmStarkProof<SC>) -> Result<Self> {
         let mut user_public_values = Vec::new();
         encode_slice(&proof.user_public_values, &mut user_public_values)?;
         Ok(Self {
-            app_commit,
+            version: format!("v{}", OPENVM_VERSION),
             user_public_values,
-            proof: proof.proof.encode_to_vec()?,
+            proof: proof.inner.encode_to_vec()?,
         })
     }
 }
 
-impl TryFrom<VmStarkProofBytes> for VmStarkProof<SC> {
+impl TryFrom<VersionedVmStarkProof> for VmStarkProof<SC> {
     type Error = std::io::Error;
-    fn try_from(proof: VmStarkProofBytes) -> Result<Self, std::io::Error> {
-        let VmStarkProofBytes {
+    fn try_from(proof: VersionedVmStarkProof) -> Result<Self, std::io::Error> {
+        let VersionedVmStarkProof {
             proof,
             user_public_values,
             ..
@@ -250,7 +281,7 @@ impl TryFrom<VmStarkProofBytes> for VmStarkProof<SC> {
         let user_public_values = decode_vec(&mut reader)?;
         Ok(Self {
             user_public_values,
-            proof: Proof::decode_from_bytes(&proof)?,
+            inner: Proof::decode_from_bytes(&proof)?,
         })
     }
 }
diff --git a/crates/sdk/src/util.rs b/crates/sdk/src/util.rs
new file mode 100644
index 0000000000..77f0cdcdf2
--- /dev/null
+++ b/crates/sdk/src/util.rs
@@ -0,0 +1,12 @@
+use openvm_circuit::arch::SystemConfig;
+use openvm_stark_sdk::config::FriParameters;
+
+pub fn check_max_constraint_degrees(config: &SystemConfig, fri_params: &FriParameters) {
+    if config.max_constraint_degree != fri_params.max_constraint_degree() {
+        tracing::warn!(
+            "config.max_constraint_degree ({}) != fri_params.max_constraint_degree() ({})",
+            config.max_constraint_degree,
+            fri_params.max_constraint_degree()
+        );
+    }
+}
diff --git a/crates/sdk/tests/integration_test.rs b/crates/sdk/tests/integration_test.rs
index 9248fc5445..8714250ef6 100644
--- a/crates/sdk/tests/integration_test.rs
+++ b/crates/sdk/tests/integration_test.rs
@@ -1,41 +1,36 @@
-use std::{borrow::Borrow, path::PathBuf, sync::Arc};
+use std::{
+    borrow::Borrow,
+    path::PathBuf,
+    sync::{Arc, OnceLock},
+};
 
 use eyre::Result;
 use openvm_build::GuestOptions;
 use openvm_circuit::{
-    arch::{
-        hasher::poseidon2::vm_poseidon2_hasher, ContinuationVmProof, ExecutionError,
-        GenerationError, SingleSegmentVmExecutor, SystemConfig, VmConfig, VmExecutor,
-    },
-    system::{memory::tree::public_values::UserPublicValuesProof, program::trace::VmCommittedExe},
+    self,
+    arch::{instructions::exe::VmExe, ContinuationVmProof, ExecutionError, VirtualMachineError},
+    utils::test_system_config,
 };
 use openvm_continuations::verifier::{
     common::types::VmVerifierPvs,
     leaf::types::{LeafVmVerifierInput, UserPublicValuesRootProof},
 };
-use openvm_native_circuit::{Native, NativeConfig};
+use openvm_native_circuit::{execute_program_with_config, NativeConfig, NativeCpuBuilder};
 use openvm_native_compiler::{conversion::CompilerOptions, prelude::*};
-use openvm_native_recursion::types::InnerConfig;
-use openvm_rv32im_transpiler::{
-    Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
-};
 use openvm_sdk::{
     codec::{Decode, Encode},
-    config::{AggStarkConfig, AppConfig, SdkSystemConfig, SdkVmConfig},
-    keygen::AppProvingKey,
+    config::{AggregationConfig, AppConfig, SdkSystemConfig, SdkVmConfig},
+    prover::verify_app_proof,
     Sdk, StdIn,
 };
-use openvm_stark_backend::{keygen::types::LinearConstraint, p3_matrix::Matrix};
 use openvm_stark_sdk::{
     config::{
         baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
         setup_tracing, FriParameters,
     },
-    engine::{StarkEngine, StarkFriEngine},
-    openvm_stark_backend::{p3_field::FieldAlgebra, Chip},
+    openvm_stark_backend::p3_field::FieldAlgebra,
     p3_baby_bear::BabyBear,
 };
-use openvm_transpiler::transpiler::Transpiler;
 #[cfg(feature = "evm-verify")]
 use {
     openvm_continuations::{
@@ -47,25 +42,15 @@ use {
     },
     openvm_native_recursion::{
         config::outer::OuterConfig,
-        halo2::{
-            utils::{CacheHalo2ParamsReader, Halo2ParamsReader},
-            wrapper::Halo2WrapperProvingKey,
-            RawEvmProof,
-        },
+        halo2::{utils::Halo2ParamsReader, wrapper::Halo2WrapperProvingKey, RawEvmProof},
         vars::StarkProofVariable,
     },
-    openvm_sdk::{
-        commit::AppExecutionCommit,
-        config::{AggConfig, Halo2Config},
-        types::{EvmHalo2Verifier, EvmProof},
-        DefaultStaticVerifierPvHandler,
-    },
+    openvm_sdk::types::{EvmHalo2Verifier, EvmProof},
     openvm_stark_sdk::p3_bn254_fr::Bn254Fr,
     snark_verifier_sdk::evm::evm_verify,
 };
 
 type SC = BabyBearPoseidon2Config;
-type C = InnerConfig;
 type F = BabyBear;
 
 const NUM_PUB_VALUES: usize = 16;
@@ -91,64 +76,37 @@ fn verify_evm_halo2_proof_with_fallback(
     Ok(gas_cost)
 }
 
-fn run_leaf_verifier<VC: VmConfig<F>>(
-    leaf_vm: &SingleSegmentVmExecutor<F, VC>,
-    leaf_committed_exe: Arc<VmCommittedExe<SC>>,
+fn run_leaf_verifier(
+    leaf_vm_config: &NativeConfig,
+    leaf_exe: &VmExe<F>,
     verifier_input: LeafVmVerifierInput<SC>,
-) -> Result<Vec<F>, ExecutionError>
-where
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
-{
-    let exe_result = leaf_vm.execute_and_compute_heights(
-        leaf_committed_exe.exe.clone(),
+) -> Result<Vec<F>, VirtualMachineError> {
+    assert!(leaf_vm_config.system.has_public_values_chip());
+    let (output, _vm) = execute_program_with_config::<BabyBearPoseidon2Engine, _>(
+        leaf_exe.program.clone(),
         verifier_input.write_to_stream(),
+        NativeCpuBuilder,
+        leaf_vm_config.clone(),
     )?;
-    let runtime_pvs: Vec<_> = exe_result
-        .public_values
-        .iter()
-        .map(|v| v.unwrap())
-        .collect();
-    Ok(runtime_pvs)
-}
-
-fn app_committed_exe_for_test(app_log_blowup: usize) -> Arc<VmCommittedExe<SC>> {
-    let program = {
-        let n = 200;
-        let mut builder = Builder::<C>::default();
-        let a: Felt<F> = builder.eval(F::ZERO);
-        let b: Felt<F> = builder.eval(F::ONE);
-        let c: Felt<F> = builder.uninit();
-        builder.range(0, n).for_each(|_, builder| {
-            builder.assign(&c, a + b);
-            builder.assign(&a, b);
-            builder.assign(&b, c);
-        });
-        builder.halt();
-        builder.compile_isa()
-    };
-    Sdk::new()
-        .commit_app_exe(
-            FriParameters::new_for_testing(app_log_blowup),
-            program.into(),
-        )
-        .unwrap()
+    Ok(output.system_records.public_values)
 }
 
-#[cfg(feature = "evm-verify")]
-fn agg_config_for_test() -> AggConfig {
-    AggConfig {
-        agg_stark_config: agg_stark_config_for_test(),
-        halo2_config: Halo2Config {
-            verifier_k: 24,
-            wrapper_k: None,
-            profiling: false,
-        },
-    }
+fn app_exe_for_test() -> Arc<VmExe<F>> {
+    static EXE: OnceLock<Arc<VmExe<F>>> = OnceLock::new();
+    EXE.get_or_init(|| {
+        let sdk = Sdk::new(small_test_app_config(1)).unwrap();
+        let mut pkg_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
+        pkg_dir.push("guest/fib");
+        let elf = sdk
+            .build(Default::default(), pkg_dir, &Default::default(), None)
+            .unwrap();
+        sdk.convert_to_exe(elf).unwrap()
+    })
+    .clone()
 }
 
-fn agg_stark_config_for_test() -> AggStarkConfig {
-    AggStarkConfig {
+fn agg_config_for_test() -> AggregationConfig {
+    AggregationConfig {
         max_num_user_public_values: NUM_PUB_VALUES,
         leaf_fri_params: FriParameters::new_for_testing(LEAF_LOG_BLOWUP),
         internal_fri_params: FriParameters::new_for_testing(INTERNAL_LOG_BLOWUP),
@@ -162,16 +120,22 @@ fn agg_stark_config_for_test() -> AggStarkConfig {
     }
 }
 
-fn small_test_app_config(app_log_blowup: usize) -> AppConfig<NativeConfig> {
+fn app_vm_config_for_test() -> SdkVmConfig {
+    let config = test_system_config()
+        .with_max_segment_len(200)
+        .with_public_values(NUM_PUB_VALUES);
+    SdkVmConfig::builder()
+        .system(SdkSystemConfig { config })
+        .rv32i(Default::default())
+        .rv32m(Default::default())
+        .io(Default::default())
+        .build()
+}
+
+fn small_test_app_config(app_log_blowup: usize) -> AppConfig<SdkVmConfig> {
     AppConfig {
         app_fri_params: FriParameters::new_for_testing(app_log_blowup).into(),
-        app_vm_config: NativeConfig::new(
-            SystemConfig::default()
-                .with_max_segment_len(200)
-                .with_continuations()
-                .with_public_values(NUM_PUB_VALUES),
-            Native,
-        ),
+        app_vm_config: app_vm_config_for_test(),
         leaf_fri_params: FriParameters::new_for_testing(LEAF_LOG_BLOWUP).into(),
         compiler_options: CompilerOptions {
             enable_cycle_tracker: true,
@@ -181,40 +145,34 @@ fn small_test_app_config(app_log_blowup: usize) -> AppConfig<NativeConfig> {
 }
 
 #[test]
-fn test_public_values_and_leaf_verification() {
-    let app_log_blowup = 3;
+fn test_public_values_and_leaf_verification() -> eyre::Result<()> {
+    setup_tracing();
+    let app_log_blowup = 1;
     let app_config = small_test_app_config(app_log_blowup);
-    let app_pk = AppProvingKey::keygen(app_config);
-    let app_committed_exe = app_committed_exe_for_test(app_log_blowup);
-
-    let agg_stark_config = agg_stark_config_for_test();
-    let leaf_vm_config = agg_stark_config.leaf_vm_config();
-    let leaf_vm = SingleSegmentVmExecutor::new(leaf_vm_config);
-    let leaf_committed_exe = app_pk.leaf_committed_exe.clone();
-
-    let app_engine = BabyBearPoseidon2Engine::new(app_pk.app_vm_pk.fri_params);
-    let app_vm = VmExecutor::new(app_pk.app_vm_pk.vm_config.clone());
-    let app_vm_result = app_vm
-        .execute_and_generate_with_cached_program(app_committed_exe.clone(), vec![])
-        .unwrap();
-    assert!(app_vm_result.per_segment.len() > 2);
-
-    let mut app_vm_seg_proofs: Vec<_> = app_vm_result
-        .per_segment
-        .into_iter()
-        .map(|proof_input| app_engine.prove(&app_pk.app_vm_pk.vm_pk, proof_input))
-        .collect();
-    let app_last_proof = app_vm_seg_proofs.pop().unwrap();
-
-    let expected_app_commit: [F; DIGEST_SIZE] = app_committed_exe.get_program_commit().into();
+    let exe = app_exe_for_test();
+    let pc_start = exe.pc_start;
+
+    let agg_config = agg_config_for_test();
+    let leaf_vm_config = agg_config.leaf_vm_config();
+
+    let sdk = Sdk::new(app_config)?;
+    let app_pk = sdk.app_pk();
+    let leaf_exe = &app_pk.leaf_committed_exe.exe;
+    let mut app_prover = sdk.app_prover(exe)?;
+    let mut app_proof = app_prover.prove(StdIn::default())?;
+
+    assert!(app_proof.per_segment.len() > 2);
+    let app_last_proof = app_proof.per_segment.pop().unwrap();
+
+    let expected_app_commit: [F; DIGEST_SIZE] = app_prover.app_program_commit().into();
 
     // Verify all segments except the last one.
     let (first_seg_final_pc, first_seg_final_mem_root) = {
         let runtime_pvs = run_leaf_verifier(
-            &leaf_vm,
-            leaf_committed_exe.clone(),
+            &leaf_vm_config,
+            leaf_exe,
             LeafVmVerifierInput {
-                proofs: app_vm_seg_proofs.clone(),
+                proofs: app_proof.per_segment.clone(),
                 public_values_root_proof: None,
             },
         )
@@ -224,26 +182,24 @@ fn test_public_values_and_leaf_verification() {
 
         assert_eq!(leaf_vm_pvs.app_commit, expected_app_commit);
         assert_eq!(leaf_vm_pvs.connector.is_terminate, F::ZERO);
-        assert_eq!(leaf_vm_pvs.connector.initial_pc, F::ZERO);
+        assert_eq!(
+            leaf_vm_pvs.connector.initial_pc,
+            F::from_canonical_u32(pc_start)
+        );
         (
             leaf_vm_pvs.connector.final_pc,
             leaf_vm_pvs.memory.final_root,
         )
     };
 
-    let pv_proof = UserPublicValuesProof::compute(
-        app_vm.config.system.memory_config.memory_dimensions(),
-        NUM_PUB_VALUES,
-        &vm_poseidon2_hasher(),
-        app_vm_result.final_memory.as_ref().unwrap(),
-    );
+    let pv_proof = app_proof.user_public_values;
     let pv_root_proof = UserPublicValuesRootProof::extract(&pv_proof);
 
     // Verify the last segment with the correct public values root proof.
     {
         let runtime_pvs = run_leaf_verifier(
-            &leaf_vm,
-            leaf_committed_exe.clone(),
+            &leaf_vm_config,
+            leaf_exe,
             LeafVmVerifierInput {
                 proofs: vec![app_last_proof.clone()],
                 public_values_root_proof: Some(pv_root_proof.clone()),
@@ -268,15 +224,18 @@ fn test_public_values_and_leaf_verification() {
         let mut wrong_pv_root_proof = pv_root_proof.clone();
         wrong_pv_root_proof.public_values_commit[0] += F::ONE;
         let execution_result = run_leaf_verifier(
-            &leaf_vm,
-            leaf_committed_exe.clone(),
+            &leaf_vm_config,
+            leaf_exe,
             LeafVmVerifierInput {
                 proofs: vec![app_last_proof.clone()],
                 public_values_root_proof: Some(wrong_pv_root_proof),
             },
         );
         assert!(
-            matches!(execution_result, Err(ExecutionError::Fail { .. })),
+            matches!(
+                execution_result,
+                Err(VirtualMachineError::Execution(ExecutionError::Fail { .. }))
+            ),
             "Expected failure: the public value root proof has a wrong pv commit: {:?}",
             execution_result
         );
@@ -287,24 +246,31 @@ fn test_public_values_and_leaf_verification() {
         let mut wrong_pv_root_proof = pv_root_proof.clone();
         wrong_pv_root_proof.sibling_hashes[0][0] += F::ONE;
         let execution_result = run_leaf_verifier(
-            &leaf_vm,
-            leaf_committed_exe.clone(),
+            &leaf_vm_config,
+            leaf_exe,
             LeafVmVerifierInput {
                 proofs: vec![app_last_proof.clone()],
                 public_values_root_proof: Some(wrong_pv_root_proof),
             },
         );
         assert!(
-            matches!(execution_result, Err(ExecutionError::Fail { .. })),
+            matches!(
+                execution_result,
+                Err(VirtualMachineError::Execution(ExecutionError::Fail { .. }))
+            ),
             "Expected failure: the public value root proof has a wrong path proof: {:?}",
             execution_result
         );
     }
+    Ok(())
 }
 
 #[cfg(feature = "evm-verify")]
 #[test]
-fn test_static_verifier_custom_pv_handler() {
+#[ignore = "slow"]
+fn test_static_verifier_custom_pv_handler() -> eyre::Result<()> {
+    use openvm_sdk::keygen::Halo2ProvingKey;
+
     // Define custom public values handler and implement StaticVerifierPvHandler trait on it
     pub struct CustomPvHandler {
         pub exe_commit: Bn254Fr,
@@ -348,213 +314,90 @@ fn test_static_verifier_custom_pv_handler() {
     println!("test setup");
     let app_log_blowup = 1;
     let app_config = small_test_app_config(app_log_blowup);
-    let sdk = Sdk::new();
-    let app_pk = sdk.app_keygen(app_config.clone()).unwrap();
-    let app_committed_exe = app_committed_exe_for_test(app_log_blowup);
     println!("app_config: {:?}", app_config.app_vm_config);
-    let params_reader = CacheHalo2ParamsReader::new_with_default_params_dir();
+    let sdk = Sdk::new(app_config)?;
+    let app_exe = app_exe_for_test();
 
     // Generate PK using custom PV handler
     println!("generate PK using custom PV handler");
-    let commits = AppExecutionCommit::compute(
-        &app_config.app_vm_config,
-        &app_committed_exe,
-        &app_pk.leaf_committed_exe,
-    );
-    let exe_commit = commits.app_exe_commit.to_bn254();
-    let leaf_verifier_commit = commits.app_vm_commit.to_bn254();
+    let app_commit = sdk.app_prover(app_exe.clone())?.app_commit();
+    let exe_commit = app_commit.app_exe_commit.to_bn254();
+    let leaf_verifier_commit = app_commit.app_vm_commit.to_bn254();
 
     let pv_handler = CustomPvHandler {
         exe_commit,
         leaf_verifier_commit,
     };
-    let agg_pk = sdk
-        .agg_keygen(agg_config_for_test(), &params_reader, &pv_handler)
-        .unwrap();
+    let (agg_pk, dummy_internal_proof) = sdk.agg_pk_and_dummy_internal_proof();
+    // SDK does not support CustomPvHandler, so we must use constructor directly
+    let params_reader = sdk.halo2_params_reader();
+    let halo2_pk = Halo2ProvingKey::keygen(
+        *sdk.halo2_config(),
+        params_reader,
+        &pv_handler,
+        agg_pk,
+        dummy_internal_proof.clone(),
+    )?;
 
     // Generate verifier contract
     println!("generate verifier contract");
-    let params =
-        params_reader.read_params(agg_pk.halo2_pk.wrapper.pinning.metadata.config_params.k);
-    let evm_verifier = agg_pk
-        .halo2_pk
-        .wrapper
-        .generate_fallback_evm_verifier(&params);
+    let wrapper_k = halo2_pk.wrapper.pinning.metadata.config_params.k;
+    let params = params_reader.read_params(wrapper_k);
+    let evm_verifier = halo2_pk.wrapper.generate_fallback_evm_verifier(&params);
 
     // Generate and verify proof
     println!("generate and verify proof");
-    let evm_proof = sdk
-        .generate_evm_proof(
-            &params_reader,
-            Arc::new(app_pk),
-            app_committed_exe,
-            agg_pk,
-            StdIn::default(),
-        )
-        .unwrap();
+    let _ = sdk.set_halo2_pk(halo2_pk).map_err(|_| panic!());
+    let evm_proof = sdk.prove_evm(app_exe, StdIn::default())?;
 
     let evm_proof: RawEvmProof = evm_proof
         .clone()
         .try_into()
         .expect("failed to convert evm proof");
     Halo2WrapperProvingKey::evm_verify(&evm_verifier, &evm_proof).unwrap();
+    Ok(())
 }
 
 #[cfg(feature = "evm-verify")]
 #[test]
-fn test_e2e_proof_generation_and_verification_with_pvs() {
-    let mut pkg_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
-    pkg_dir.push("guest/fib");
-
-    let vm_config = SdkVmConfig::builder()
-        .system(SdkSystemConfig {
-            config: SystemConfig::default()
-                .with_max_segment_len(200)
-                .with_continuations()
-                .with_public_values(NUM_PUB_VALUES),
-        })
-        .rv32i(Default::default())
-        .rv32m(Default::default())
-        .io(Default::default())
-        .native(Default::default())
-        .build();
-
-    let sdk = Sdk::new();
-    let elf = sdk
-        .build(
-            Default::default(),
-            &vm_config,
-            pkg_dir,
-            &Default::default(),
-            None,
-        )
-        .unwrap();
-    let exe = sdk.transpile(elf, vm_config.transpiler()).unwrap();
-
+fn test_e2e_proof_generation_and_verification_with_pvs() -> eyre::Result<()> {
     let app_log_blowup = 1;
-    let app_fri_params = FriParameters::new_for_testing(app_log_blowup);
-    let leaf_fri_params = FriParameters::new_for_testing(LEAF_LOG_BLOWUP);
-    let mut app_config =
-        AppConfig::new_with_leaf_fri_params(app_fri_params, vm_config, leaf_fri_params);
-    app_config.compiler_options.enable_cycle_tracker = true;
-
-    let app_committed_exe = sdk
-        .commit_app_exe(app_fri_params, exe)
-        .expect("failed to commit exe");
-
-    let app_pk = sdk.app_keygen(app_config).unwrap();
-
-    let params_reader = CacheHalo2ParamsReader::new_with_default_params_dir();
-    let agg_pk = sdk
-        .agg_keygen(
-            agg_config_for_test(),
-            &params_reader,
-            &DefaultStaticVerifierPvHandler,
-        )
-        .unwrap();
-
-    let evm_verifier = sdk
-        .generate_halo2_verifier_solidity(&params_reader, &agg_pk)
-        .unwrap();
-
-    let evm_proof = sdk
-        .generate_evm_proof(
-            &params_reader,
-            Arc::new(app_pk),
-            app_committed_exe,
-            agg_pk,
-            StdIn::default(),
-        )
-        .unwrap();
+    let app_config = small_test_app_config(app_log_blowup);
+    let mut sdk = Sdk::new(app_config)?;
+    sdk.agg_config_mut().leaf_fri_params = FriParameters::new_for_testing(LEAF_LOG_BLOWUP);
+
+    let evm_verifier = sdk.generate_halo2_verifier_solidity()?;
+    let evm_proof = sdk.prove_evm(app_exe_for_test(), StdIn::default())?;
 
-    verify_evm_halo2_proof_with_fallback(&evm_verifier, &evm_proof).unwrap();
-    sdk.verify_evm_halo2_proof(&evm_verifier, evm_proof)
-        .unwrap();
+    verify_evm_halo2_proof_with_fallback(&evm_verifier, &evm_proof)?;
+    Sdk::verify_evm_halo2_proof(&evm_verifier, evm_proof)?;
+    Ok(())
 }
 
 #[test]
-fn test_sdk_guest_build_and_transpile() {
-    let sdk = Sdk::new();
-    let guest_opts = GuestOptions::default()
-        // .with_features(vec!["zkvm"])
-        // .with_options(vec!["--release"]);
-        ;
+fn test_sdk_guest_build_and_transpile() -> eyre::Result<()> {
+    let sdk = Sdk::new(small_test_app_config(1))?;
+    let guest_opts = GuestOptions::default();
     let mut pkg_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
     pkg_dir.push("guest/fib");
 
-    let vm_config = SdkVmConfig::builder()
-        .system(SdkSystemConfig {
-            config: SystemConfig::default()
-                .with_max_segment_len(200)
-                .with_continuations()
-                .with_public_values(NUM_PUB_VALUES),
-        })
-        .rv32i(Default::default())
-        .rv32m(Default::default())
-        .io(Default::default())
-        .native(Default::default())
-        .build();
-
-    let one = sdk
-        .build(
-            guest_opts.clone(),
-            &vm_config,
-            &pkg_dir,
-            &Default::default(),
-            None,
-        )
-        .unwrap();
-    let two = sdk
-        .build(
-            guest_opts.clone(),
-            &vm_config,
-            &pkg_dir,
-            &Default::default(),
-            None,
-        )
-        .unwrap();
+    let one = sdk.build(guest_opts.clone(), &pkg_dir, &None, None)?;
+    let two = sdk.build(guest_opts.clone(), &pkg_dir, &None, None)?;
     assert_eq!(one.instructions, two.instructions);
     assert_eq!(one.instructions, two.instructions);
-    let transpiler = Transpiler::<F>::default()
-        .with_extension(Rv32ITranspilerExtension)
-        .with_extension(Rv32MTranspilerExtension)
-        .with_extension(Rv32IoTranspilerExtension);
-    let _exe = sdk.transpile(one, transpiler).unwrap();
+    let _exe = sdk.convert_to_exe(one)?;
+    Ok(())
 }
 
 #[test]
 fn test_inner_proof_codec_roundtrip() -> eyre::Result<()> {
     // generate a proof
-    let sdk = Sdk::new();
-    let mut pkg_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
-    pkg_dir.push("guest/fib");
-
-    let vm_config = SdkVmConfig::builder()
-        .system(SdkSystemConfig {
-            config: SystemConfig::default()
-                .with_max_segment_len(200)
-                .with_continuations()
-                .with_public_values(NUM_PUB_VALUES),
-        })
-        .rv32i(Default::default())
-        .rv32m(Default::default())
-        .io(Default::default())
-        .native(Default::default())
-        .build();
-    let elf = sdk.build(
-        Default::default(),
-        &vm_config,
-        pkg_dir,
-        &Default::default(),
-        None,
-    )?;
-    assert!(vm_config.system.config.continuation_enabled);
-    let exe = sdk.transpile(elf, vm_config.transpiler())?;
-    let fri_params = FriParameters::standard_fast();
-    let app_config = AppConfig::new(fri_params, vm_config);
-    let committed_exe = sdk.commit_app_exe(fri_params, exe)?;
-    let app_pk = Arc::new(sdk.app_keygen(app_config)?);
-    let app_proof = sdk.generate_app_proof(app_pk.clone(), committed_exe, StdIn::default())?;
+    let sdk = Sdk::new(small_test_app_config(1))?;
+    assert!(sdk.app_config().app_vm_config.as_ref().continuation_enabled);
+    let (_, app_vk) = sdk.app_keygen();
+    let app_proof = sdk
+        .app_prover(app_exe_for_test())?
+        .prove(StdIn::default())?;
     let mut app_proof_bytes = Vec::new();
     app_proof.encode(&mut app_proof_bytes)?;
     let decoded_app_proof = ContinuationVmProof::decode(&mut &app_proof_bytes[..])?;
@@ -564,62 +407,6 @@ fn test_inner_proof_codec_roundtrip() -> eyre::Result<()> {
         serde_json::to_vec(&decoded_app_proof)?
     );
     // Test the decoding by verifying the decoded proof
-    sdk.verify_app_proof(&app_pk.get_app_vk(), &decoded_app_proof)?;
+    verify_app_proof(&app_vk, &decoded_app_proof)?;
     Ok(())
 }
-
-#[test]
-fn test_segmentation_retry() {
-    setup_tracing();
-    let app_log_blowup = 3;
-    let app_config = small_test_app_config(app_log_blowup);
-    let app_pk = AppProvingKey::keygen(app_config);
-    let app_committed_exe = app_committed_exe_for_test(app_log_blowup);
-
-    let app_vm = VmExecutor::new(app_pk.app_vm_pk.vm_config.clone());
-    let app_vm_result = app_vm
-        .execute_and_generate_with_cached_program(app_committed_exe.clone(), vec![])
-        .unwrap();
-    assert!(app_vm_result.per_segment.len() > 2);
-
-    let total_height: usize = app_vm_result.per_segment[0]
-        .per_air
-        .iter()
-        .map(|(_, input)| {
-            let main = input.raw.common_main.as_ref();
-            main.map(|mat| mat.height()).unwrap_or(0)
-        })
-        .sum();
-
-    // Re-run with a threshold that will be violated.
-    let mut app_vm = VmExecutor::new(app_pk.app_vm_pk.vm_config.clone());
-    let num_airs = app_pk.app_vm_pk.vm_pk.per_air.len();
-    app_vm.set_trace_height_constraints(vec![LinearConstraint {
-        coefficients: vec![1; num_airs],
-        threshold: total_height as u32 - 1,
-    }]);
-    let app_vm_result =
-        app_vm.execute_and_generate_with_cached_program(app_committed_exe.clone(), vec![]);
-    assert!(matches!(
-        app_vm_result,
-        Err(GenerationError::TraceHeightsLimitExceeded)
-    ));
-
-    // Try lowering segmentation threshold.
-    let config = VmConfig::<BabyBear>::system_mut(&mut app_vm.config);
-    config.set_segmentation_strategy(config.segmentation_strategy.stricter_strategy());
-    let app_vm_result = app_vm
-        .execute_and_generate_with_cached_program(app_committed_exe.clone(), vec![])
-        .unwrap();
-
-    // New max height should indeed by smaller.
-    let new_total_height: usize = app_vm_result.per_segment[0]
-        .per_air
-        .iter()
-        .map(|(_, input)| {
-            let main = input.raw.common_main.as_ref();
-            main.map(|mat| mat.height()).unwrap_or(0)
-        })
-        .sum();
-    assert!(new_total_height < total_height);
-}
diff --git a/crates/toolchain/instructions/src/exe.rs b/crates/toolchain/instructions/src/exe.rs
index fb84ec7da5..9db5f242ac 100644
--- a/crates/toolchain/instructions/src/exe.rs
+++ b/crates/toolchain/instructions/src/exe.rs
@@ -5,8 +5,9 @@ use serde::{Deserialize, Serialize};
 
 use crate::program::Program;
 
-/// Memory image is a map from (address space, address) to word.
-pub type MemoryImage<F> = BTreeMap<(u32, u32), F>;
+// TODO[jpw]: delete this
+/// Memory image is a map from (address space, address * size_of<CellType>) to u8.
+pub type SparseMemoryImage = BTreeMap<(u32, u32), u8>;
 /// Stores the starting address, end address, and name of a set of function.
 pub type FnBounds = BTreeMap<u32, FnBound>;
 
@@ -22,7 +23,7 @@ pub struct VmExe<F> {
     /// Start address of pc.
     pub pc_start: u32,
     /// Initial memory image.
-    pub init_memory: MemoryImage<F>,
+    pub init_memory: SparseMemoryImage,
     /// Starting + ending bounds for each function.
     pub fn_bounds: FnBounds,
 }
@@ -40,7 +41,7 @@ impl<F> VmExe<F> {
         self.pc_start = pc_start;
         self
     }
-    pub fn with_init_memory(mut self, init_memory: MemoryImage<F>) -> Self {
+    pub fn with_init_memory(mut self, init_memory: SparseMemoryImage) -> Self {
         self.init_memory = init_memory;
         self
     }
diff --git a/crates/toolchain/instructions/src/lib.rs b/crates/toolchain/instructions/src/lib.rs
index c251e77d0d..76e7200cbb 100644
--- a/crates/toolchain/instructions/src/lib.rs
+++ b/crates/toolchain/instructions/src/lib.rs
@@ -18,6 +18,8 @@ pub mod utils;
 
 pub use phantom::*;
 
+pub const NATIVE_AS: u32 = 4;
+
 pub trait LocalOpcode {
     const CLASS_OFFSET: usize;
     /// Convert from the discriminant of the enum to the typed enum variant.
@@ -25,8 +27,11 @@ pub trait LocalOpcode {
     fn from_usize(value: usize) -> Self;
     fn local_usize(&self) -> usize;
 
+    fn global_opcode_usize(&self) -> usize {
+        self.local_usize() + Self::CLASS_OFFSET
+    }
     fn global_opcode(&self) -> VmOpcode {
-        VmOpcode::from_usize(self.local_usize() + Self::CLASS_OFFSET)
+        VmOpcode::from_usize(self.global_opcode_usize())
     }
 }
 
@@ -36,17 +41,19 @@ pub struct VmOpcode(usize);
 
 impl VmOpcode {
     /// Returns the corresponding `local_opcode_idx`
-    pub fn local_opcode_idx(&self, offset: usize) -> usize {
+    #[inline(always)]
+    pub const fn local_opcode_idx(&self, offset: usize) -> usize {
         self.as_usize() - offset
     }
 
     /// Returns the opcode as a usize
-    pub fn as_usize(&self) -> usize {
+    #[inline(always)]
+    pub const fn as_usize(&self) -> usize {
         self.0
     }
 
     /// Create a new [VmOpcode] from a usize
-    pub fn from_usize(value: usize) -> Self {
+    pub const fn from_usize(value: usize) -> Self {
         Self(value)
     }
 
diff --git a/crates/toolchain/instructions/src/program.rs b/crates/toolchain/instructions/src/program.rs
index 010b70514d..73c901be8a 100644
--- a/crates/toolchain/instructions/src/program.rs
+++ b/crates/toolchain/instructions/src/program.rs
@@ -1,4 +1,8 @@
-use std::{fmt, fmt::Display};
+use std::{
+    fmt::{self, Display},
+    ops::Deref,
+    sync::Arc,
+};
 
 use itertools::Itertools;
 use openvm_stark_backend::p3_field::Field;
@@ -24,37 +28,35 @@ pub struct Program<F> {
         deserialize_with = "deserialize_instructions_and_debug_infos"
     )]
     pub instructions_and_debug_infos: Vec<Option<(Instruction<F>, Option<DebugInfo>)>>,
-    pub step: u32,
     pub pc_base: u32,
 }
 
+#[derive(Clone, Debug, Default)]
+pub struct ProgramDebugInfo {
+    inner: Arc<Vec<Option<DebugInfo>>>,
+    pc_base: u32,
+}
+
 impl<F: Field> Program<F> {
-    pub fn new_empty(step: u32, pc_base: u32) -> Self {
+    pub fn new_empty(pc_base: u32) -> Self {
         Self {
             instructions_and_debug_infos: vec![],
-            step,
             pc_base,
         }
     }
 
-    pub fn new_without_debug_infos(
-        instructions: &[Instruction<F>],
-        step: u32,
-        pc_base: u32,
-    ) -> Self {
+    pub fn new_without_debug_infos(instructions: &[Instruction<F>], pc_base: u32) -> Self {
         Self {
             instructions_and_debug_infos: instructions
                 .iter()
                 .map(|instruction| Some((instruction.clone(), None)))
                 .collect(),
-            step,
             pc_base,
         }
     }
 
     pub fn new_without_debug_infos_with_option(
         instructions: &[Option<Instruction<F>>],
-        step: u32,
         pc_base: u32,
     ) -> Self {
         Self {
@@ -62,7 +64,6 @@ impl<F: Field> Program<F> {
                 .iter()
                 .map(|instruction| instruction.clone().map(|instruction| (instruction, None)))
                 .collect(),
-            step,
             pc_base,
         }
     }
@@ -79,7 +80,6 @@ impl<F: Field> Program<F> {
                 .zip_eq(debug_infos.iter())
                 .map(|(instruction, debug_info)| Some((instruction.clone(), debug_info.clone())))
                 .collect(),
-            step: DEFAULT_PC_STEP,
             pc_base: 0,
         }
     }
@@ -96,7 +96,7 @@ impl<F: Field> Program<F> {
     }
 
     pub fn from_instructions(instructions: &[Instruction<F>]) -> Self {
-        Self::new_without_debug_infos(instructions, DEFAULT_PC_STEP, 0)
+        Self::new_without_debug_infos(instructions, 0)
     }
 
     pub fn len(&self) -> usize {
@@ -120,14 +120,6 @@ impl<F: Field> Program<F> {
         self.defined_instructions().len()
     }
 
-    pub fn debug_infos(&self) -> Vec<Option<DebugInfo>> {
-        self.instructions_and_debug_infos
-            .iter()
-            .flatten()
-            .map(|(_, debug_info)| debug_info.clone())
-            .collect()
-    }
-
     pub fn enumerate_by_pc(&self) -> Vec<(u32, Instruction<F>, Option<DebugInfo>)> {
         self.instructions_and_debug_infos
             .iter()
@@ -135,7 +127,7 @@ impl<F: Field> Program<F> {
             .flat_map(|(index, option)| {
                 option.clone().map(|(instruction, debug_info)| {
                     (
-                        self.pc_base + (self.step * (index as u32)),
+                        self.pc_base + (DEFAULT_PC_STEP * (index as u32)),
                         instruction,
                         debug_info,
                     )
@@ -172,6 +164,21 @@ impl<F: Field> Program<F> {
             .extend(other.instructions_and_debug_infos);
     }
 }
+
+impl<F> Program<F> {
+    pub fn debug_infos(&self) -> ProgramDebugInfo {
+        let debug_infos = self
+            .instructions_and_debug_infos
+            .iter()
+            .map(|opt| opt.as_ref().and_then(|(_, debug_info)| debug_info.clone()))
+            .collect();
+        ProgramDebugInfo {
+            inner: Arc::new(debug_infos),
+            pc_base: self.pc_base,
+        }
+    }
+}
+
 impl<F: Field> Display for Program<F> {
     fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
         for instruction in self.defined_instructions().iter() {
@@ -195,6 +202,24 @@ impl<F: Field> Display for Program<F> {
     }
 }
 
+impl ProgramDebugInfo {
+    /// ## Panics
+    /// If `pc` is out of bounds.
+    pub fn get(&self, pc: u32) -> &Option<DebugInfo> {
+        let pc_base = self.pc_base;
+        let pc_idx = ((pc - pc_base) / DEFAULT_PC_STEP) as usize;
+        &self.inner[pc_idx]
+    }
+}
+
+impl Deref for ProgramDebugInfo {
+    type Target = [Option<DebugInfo>];
+
+    fn deref(&self) -> &Self::Target {
+        &self.inner
+    }
+}
+
 pub fn display_program_with_pc<F: Field>(program: &Program<F>) {
     for (pc, instruction) in program.defined_instructions().iter().enumerate() {
         let Instruction {
@@ -257,7 +282,7 @@ mod tests {
 
     #[test]
     fn test_program_serde() {
-        let mut program = Program::<F>::new_empty(4, 0);
+        let mut program = Program::<F>::new_empty(0);
         program.instructions_and_debug_infos.push(Some((
             Instruction::from_isize(VmOpcode::from_usize(113), 1, 2, 3, 4, 5),
             None,
diff --git a/crates/toolchain/instructions/src/riscv.rs b/crates/toolchain/instructions/src/riscv.rs
index b2998c4539..720b323d52 100644
--- a/crates/toolchain/instructions/src/riscv.rs
+++ b/crates/toolchain/instructions/src/riscv.rs
@@ -5,3 +5,5 @@ pub const RV32_CELL_BITS: usize = 8;
 pub const RV32_IMM_AS: u32 = 0;
 pub const RV32_REGISTER_AS: u32 = 1;
 pub const RV32_MEMORY_AS: u32 = 2;
+
+pub const RV32_NUM_REGISTERS: usize = 32;
diff --git a/crates/toolchain/tests/Cargo.toml b/crates/toolchain/tests/Cargo.toml
index 9f3e3caa82..c2349b893f 100644
--- a/crates/toolchain/tests/Cargo.toml
+++ b/crates/toolchain/tests/Cargo.toml
@@ -8,11 +8,16 @@ homepage.workspace = true
 repository.workspace = true
 
 [dependencies]
+openvm-build.workspace = true
+openvm-circuit.workspace = true
+openvm-transpiler.workspace = true
+eyre.workspace = true
+tempfile.workspace = true
+
+[dev-dependencies]
 openvm-stark-backend.workspace = true
 openvm-stark-sdk.workspace = true
 openvm-circuit = { workspace = true, features = ["test-utils"] }
-openvm-transpiler.workspace = true
-openvm-build.workspace = true
 openvm-algebra-transpiler.workspace = true
 openvm-bigint-circuit.workspace = true
 openvm-rv32im-circuit.workspace = true
@@ -21,10 +26,8 @@ openvm-algebra-circuit.workspace = true
 openvm-ecc-circuit = { workspace = true }
 openvm-instructions = { workspace = true }
 openvm-platform = { workspace = true }
-
-eyre.workspace = true
 test-case.workspace = true
-tempfile.workspace = true
+rand = { workspace = true }
 serde = { workspace = true, features = ["alloc"] }
 derive_more = { workspace = true, features = ["from"] }
 
@@ -36,4 +39,4 @@ default = ["parallel"]
 parallel = ["openvm-circuit/parallel"]
 
 [package.metadata.cargo-shear]
-ignored = ["derive_more", "openvm-stark-backend"]
+ignored = ["derive_more", "openvm-stark-backend", "rand"]
diff --git a/crates/toolchain/tests/src/utils.rs b/crates/toolchain/tests/src/utils.rs
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/crates/toolchain/tests/tests/riscv_test_vectors.rs b/crates/toolchain/tests/tests/riscv_test_vectors.rs
index 9516b0cd7b..9b0c2524e4 100644
--- a/crates/toolchain/tests/tests/riscv_test_vectors.rs
+++ b/crates/toolchain/tests/tests/riscv_test_vectors.rs
@@ -5,7 +5,7 @@ use openvm_circuit::{
     arch::{instructions::exe::VmExe, VmExecutor},
     utils::air_test,
 };
-use openvm_rv32im_circuit::Rv32ImConfig;
+use openvm_rv32im_circuit::{Rv32ImConfig, Rv32ImCpuBuilder};
 use openvm_rv32im_transpiler::{
     Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
 };
@@ -39,9 +39,10 @@ fn test_rv32im_riscv_vector_runtime() -> Result<()> {
                         .with_extension(Rv32MTranspilerExtension)
                         .with_extension(Rv32IoTranspilerExtension),
                 )?;
-                let executor = VmExecutor::<F, _>::new(config.clone());
-                let res = executor.execute(exe, vec![])?;
-                Ok(res)
+                let executor = VmExecutor::new(config.clone())?;
+                let interpreter = executor.instance(&exe)?;
+                let _state = interpreter.execute(vec![], None)?;
+                Ok(())
             });
 
             match result {
@@ -80,7 +81,7 @@ fn test_rv32im_riscv_vector_prove() -> Result<()> {
             )?;
 
             let result = std::panic::catch_unwind(|| {
-                air_test(config.clone(), exe);
+                air_test(Rv32ImCpuBuilder, config.clone(), exe);
             });
 
             match result {
diff --git a/crates/toolchain/tests/tests/transpiler_tests.rs b/crates/toolchain/tests/tests/transpiler_tests.rs
index bf07eccc42..f50371dcaf 100644
--- a/crates/toolchain/tests/tests/transpiler_tests.rs
+++ b/crates/toolchain/tests/tests/transpiler_tests.rs
@@ -5,28 +5,22 @@ use std::{
 
 use eyre::Result;
 use num_bigint::BigUint;
-use openvm_algebra_circuit::{
-    Fp2Extension, Fp2ExtensionExecutor, Fp2ExtensionPeriphery, ModularExtension,
-    ModularExtensionExecutor, ModularExtensionPeriphery,
-};
+use openvm_algebra_circuit::*;
 use openvm_algebra_transpiler::{Fp2TranspilerExtension, ModularTranspilerExtension};
-use openvm_bigint_circuit::{Int256, Int256Executor, Int256Periphery};
+use openvm_bigint_circuit::*;
 use openvm_circuit::{
     arch::{InitFileGenerator, SystemConfig, VmExecutor},
     derive::VmConfig,
+    system::SystemExecutor,
     utils::air_test,
 };
 use openvm_ecc_circuit::{SECP256K1_MODULUS, SECP256K1_ORDER};
 use openvm_instructions::exe::VmExe;
 use openvm_platform::memory::MEM_SIZE;
-use openvm_rv32im_circuit::{
-    Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32ImConfig, Rv32Io, Rv32IoExecutor, Rv32IoPeriphery,
-    Rv32M, Rv32MExecutor, Rv32MPeriphery,
-};
+use openvm_rv32im_circuit::*;
 use openvm_rv32im_transpiler::{
     Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
 };
-use openvm_stark_backend::p3_field::PrimeField32;
 use openvm_stark_sdk::p3_baby_bear::BabyBear;
 use openvm_transpiler::{elf::Elf, transpiler::Transpiler, FromElf};
 use serde::{Deserialize, Serialize};
@@ -80,14 +74,15 @@ fn test_rv32im_runtime(elf_path: &str) -> Result<()> {
             .with_extension(Rv32IoTranspilerExtension),
     )?;
     let config = Rv32ImConfig::default();
-    let executor = VmExecutor::<F, _>::new(config);
-    executor.execute(exe, vec![])?;
+    let executor = VmExecutor::new(config)?;
+    let interpreter = executor.instance(&exe)?;
+    interpreter.execute(vec![], None)?;
     Ok(())
 }
 
 #[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
 pub struct Rv32ModularFp2Int256Config {
-    #[system]
+    #[config(executor = "SystemExecutor<F>")]
     pub system: SystemConfig,
     #[extension]
     pub base: Rv32I,
@@ -106,7 +101,7 @@ pub struct Rv32ModularFp2Int256Config {
 impl Rv32ModularFp2Int256Config {
     pub fn new(modular_moduli: Vec<BigUint>, fp2_moduli: Vec<(String, BigUint)>) -> Self {
         Self {
-            system: SystemConfig::default().with_continuations(),
+            system: SystemConfig::default(),
             base: Default::default(),
             mul: Default::default(),
             io: Default::default(),
@@ -143,8 +138,9 @@ fn test_intrinsic_runtime(elf_path: &str) -> Result<()> {
             .with_extension(ModularTranspilerExtension)
             .with_extension(Fp2TranspilerExtension),
     )?;
-    let executor = VmExecutor::<F, _>::new(config);
-    executor.execute(openvm_exe, vec![])?;
+    let executor = VmExecutor::new(config)?;
+    let interpreter = executor.instance(&openvm_exe)?;
+    interpreter.execute(vec![], None)?;
     Ok(())
 }
 
@@ -160,6 +156,6 @@ fn test_terminate_prove() -> Result<()> {
             .with_extension(Rv32IoTranspilerExtension)
             .with_extension(ModularTranspilerExtension),
     )?;
-    air_test(config, openvm_exe);
+    air_test(Rv32ImCpuBuilder, config, openvm_exe);
     Ok(())
 }
diff --git a/crates/toolchain/transpiler/src/lib.rs b/crates/toolchain/transpiler/src/lib.rs
index 367b028393..ee85e9b153 100644
--- a/crates/toolchain/transpiler/src/lib.rs
+++ b/crates/toolchain/transpiler/src/lib.rs
@@ -1,10 +1,7 @@
 //! A transpiler from custom RISC-V ELFs to OpenVM executable binaries.
 
 use elf::Elf;
-use openvm_instructions::{
-    exe::VmExe,
-    program::{Program, DEFAULT_PC_STEP},
-};
+use openvm_instructions::{exe::VmExe, program::Program};
 pub use openvm_platform;
 use openvm_stark_backend::p3_field::PrimeField32;
 use transpiler::{Transpiler, TranspilerError};
@@ -29,11 +26,7 @@ impl<F: PrimeField32> FromElf for VmExe<F> {
     type ElfContext = Transpiler<F>;
     fn from_elf(elf: Elf, transpiler: Self::ElfContext) -> Result<Self, TranspilerError> {
         let instructions = transpiler.transpile(&elf.instructions)?;
-        let program = Program::new_without_debug_infos_with_option(
-            &instructions,
-            DEFAULT_PC_STEP,
-            elf.pc_base,
-        );
+        let program = Program::new_without_debug_infos_with_option(&instructions, elf.pc_base);
         let init_memory = elf_memory_image_to_openvm_memory_image(elf.memory_image);
 
         Ok(VmExe {
diff --git a/crates/toolchain/transpiler/src/transpiler.rs b/crates/toolchain/transpiler/src/transpiler.rs
index 54e2c1e91d..58e81b6bea 100644
--- a/crates/toolchain/transpiler/src/transpiler.rs
+++ b/crates/toolchain/transpiler/src/transpiler.rs
@@ -8,6 +8,7 @@ use crate::TranspilerExtension;
 
 /// Collection of [`TranspilerExtension`]s.
 /// The transpiler can be configured to transpile any ELF in 32-bit chunks.
+#[derive(Clone)]
 pub struct Transpiler<F> {
     processors: Vec<Rc<dyn TranspilerExtension<F>>>,
 }
diff --git a/crates/toolchain/transpiler/src/util.rs b/crates/toolchain/transpiler/src/util.rs
index d9135de153..c5711653ff 100644
--- a/crates/toolchain/transpiler/src/util.rs
+++ b/crates/toolchain/transpiler/src/util.rs
@@ -1,7 +1,7 @@
 use std::collections::BTreeMap;
 
 use openvm_instructions::{
-    exe::MemoryImage,
+    exe::SparseMemoryImage,
     instruction::Instruction,
     riscv::{RV32_MEMORY_AS, RV32_REGISTER_NUM_LIMBS},
     utils::isize_to_field,
@@ -165,17 +165,14 @@ pub fn nop<F: PrimeField32>() -> Instruction<F> {
     }
 }
 
-/// Converts our memory image (u32 -> [u8; 4]) into Vm memory image ((as, address) -> word)
-pub fn elf_memory_image_to_openvm_memory_image<F: PrimeField32>(
+/// Converts our memory image (u32 -> [u8; 4]) into Vm memory image ((as=2, address) -> byte)
+pub fn elf_memory_image_to_openvm_memory_image(
     memory_image: BTreeMap<u32, u32>,
-) -> MemoryImage<F> {
-    let mut result = MemoryImage::new();
+) -> SparseMemoryImage {
+    let mut result = SparseMemoryImage::new();
     for (addr, word) in memory_image {
         for (i, byte) in word.to_le_bytes().into_iter().enumerate() {
-            result.insert(
-                (RV32_MEMORY_AS, addr + i as u32),
-                F::from_canonical_u8(byte),
-            );
+            result.insert((RV32_MEMORY_AS, addr + i as u32), byte);
         }
     }
     result
diff --git a/crates/vm/Cargo.toml b/crates/vm/Cargo.toml
index 80e6794b48..55d2e16030 100644
--- a/crates/vm/Cargo.toml
+++ b/crates/vm/Cargo.toml
@@ -27,7 +27,6 @@ backtrace.workspace = true
 rand.workspace = true
 serde.workspace = true
 serde-big-array.workspace = true
-cfg-if.workspace = true
 metrics = { workspace = true, optional = true }
 thiserror.workspace = true
 rustc-hash.workspace = true
@@ -35,24 +34,42 @@ eyre.workspace = true
 derivative.workspace = true
 static_assertions.workspace = true
 getset.workspace = true
+dashmap.workspace = true
+
+[target.'cfg(any(unix, windows))'.dependencies]
+memmap2.workspace = true
+[target.'cfg(target_os = "linux")'.dependencies]
+libc.workspace = true
 
 [dev-dependencies]
 test-log.workspace = true
 
 openvm-circuit = { workspace = true, features = ["test-utils"] }
 openvm-stark-sdk.workspace = true
-openvm-native-circuit.workspace = true
 openvm-native-compiler.workspace = true
 openvm-rv32im-transpiler.workspace = true
 
 [features]
 default = ["parallel", "jemalloc"]
-parallel = ["openvm-stark-backend/parallel"]
-test-utils = ["dep:openvm-stark-sdk"]
-bench-metrics = ["dep:metrics", "openvm-stark-backend/bench-metrics"]
-function-span = ["bench-metrics"]
+parallel = [
+    "openvm-stark-backend/parallel",
+    "dashmap/rayon",
+    "openvm-stark-sdk?/parallel",
+]
+metrics = [
+    "dep:metrics",
+    "openvm-stark-backend/metrics",
+    "openvm-stark-sdk?/metrics",
+]
+# turns on more invasive profiling for fine-grained guest metrics
+perf-metrics = ["metrics"]
+# use basic memory instead of mmap:
+basic-memory = []
+# turns on stark-backend debugger in all proofs
+stark-debug = []
+test-utils = ["openvm-stark-sdk"]
 # performance features:
 mimalloc = ["openvm-stark-backend/mimalloc"]
 jemalloc = ["openvm-stark-backend/jemalloc"]
 jemalloc-prof = ["openvm-stark-backend/jemalloc-prof"]
-nightly-features = ["openvm-stark-sdk/nightly-features"]
+nightly-features = ["openvm-stark-sdk?/nightly-features"]
diff --git a/crates/vm/derive/Cargo.toml b/crates/vm/derive/Cargo.toml
index bd3c7cb693..d2d11dcc78 100644
--- a/crates/vm/derive/Cargo.toml
+++ b/crates/vm/derive/Cargo.toml
@@ -12,4 +12,5 @@ proc-macro = true
 [dependencies]
 syn = { version = "2.0", features = ["parsing"] }
 quote = "1.0"
+proc-macro2 = "1.0"
 itertools = { workspace = true }
diff --git a/crates/vm/derive/src/lib.rs b/crates/vm/derive/src/lib.rs
index 37dca6e4ed..a43053e0cd 100644
--- a/crates/vm/derive/src/lib.rs
+++ b/crates/vm/derive/src/lib.rs
@@ -4,15 +4,33 @@ extern crate proc_macro;
 use itertools::{multiunzip, Itertools};
 use proc_macro::{Span, TokenStream};
 use quote::{quote, ToTokens};
-use syn::{punctuated::Punctuated, Data, Fields, GenericParam, Ident, Meta, Token};
+use syn::{
+    parse_quote, punctuated::Punctuated, spanned::Spanned, Data, DataStruct, Field, Fields,
+    GenericParam, Ident, Meta, Token,
+};
 
-#[proc_macro_derive(InstructionExecutor)]
-pub fn instruction_executor_derive(input: TokenStream) -> TokenStream {
+#[proc_macro_derive(PreflightExecutor)]
+pub fn preflight_executor_derive(input: TokenStream) -> TokenStream {
     let ast: syn::DeriveInput = syn::parse(input).unwrap();
 
     let name = &ast.ident;
     let generics = &ast.generics;
-    let (impl_generics, ty_generics, _) = generics.split_for_impl();
+    let (_, ty_generics, _) = generics.split_for_impl();
+
+    let default_ty_generic = Ident::new("F", proc_macro2::Span::call_site());
+    let mut new_generics = generics.clone();
+    new_generics.params.push(syn::parse_quote! { RA });
+    let field_ty_generic = generics
+        .params
+        .first()
+        .and_then(|param| match param {
+            GenericParam::Type(type_param) => Some(&type_param.ident),
+            _ => None,
+        })
+        .unwrap_or_else(|| {
+            new_generics.params.push(syn::parse_quote! { F });
+            &default_ty_generic
+        });
 
     match &ast.data {
         Data::Struct(inner) => {
@@ -27,21 +45,20 @@ pub fn instruction_executor_derive(input: TokenStream) -> TokenStream {
                 _ => panic!("Only unnamed fields are supported"),
             };
             // Use full path ::openvm_circuit... so it can be used either within or outside the vm
-            // crate. Assume F is already generic of the field.
-            let mut new_generics = generics.clone();
+            // crate.
             let where_clause = new_generics.make_where_clause();
             where_clause.predicates.push(
-                syn::parse_quote! { #inner_ty: ::openvm_circuit::arch::InstructionExecutor<F> },
+                syn::parse_quote! { #inner_ty: ::openvm_circuit::arch::PreflightExecutor<#field_ty_generic, RA> },
             );
+            let (impl_generics, _, where_clause) = new_generics.split_for_impl();
             quote! {
-                impl #impl_generics ::openvm_circuit::arch::InstructionExecutor<F> for #name #ty_generics #where_clause {
+                impl #impl_generics ::openvm_circuit::arch::PreflightExecutor<#field_ty_generic, RA> for #name #ty_generics #where_clause {
                     fn execute(
-                        &mut self,
-                        memory: &mut ::openvm_circuit::system::memory::MemoryController<F>,
-                        instruction: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
-                        from_state: ::openvm_circuit::arch::ExecutionState<u32>,
-                    ) -> ::openvm_circuit::arch::Result<::openvm_circuit::arch::ExecutionState<u32>> {
-                        self.0.execute(memory, instruction, from_state)
+                        &self,
+                        state: ::openvm_circuit::arch::VmStateMut<#field_ty_generic, ::openvm_circuit::system::memory::online::TracingMemory, RA>,
+                        instruction: &::openvm_circuit::arch::instructions::instruction::Instruction<#field_ty_generic>,
+                    ) -> Result<(), ::openvm_circuit::arch::ExecutionError> {
+                        self.0.execute(state, instruction)
                     }
 
                     fn get_opcode_name(&self, opcode: usize) -> String {
@@ -64,37 +81,35 @@ pub fn instruction_executor_derive(input: TokenStream) -> TokenStream {
                     (variant_name, field)
                 })
                 .collect::<Vec<_>>();
-            let first_ty_generic = ast
-                .generics
-                .params
-                .first()
-                .and_then(|param| match param {
-                    GenericParam::Type(type_param) => Some(&type_param.ident),
-                    _ => None,
-                })
-                .expect("First generic must be type for Field");
             // Use full path ::openvm_circuit... so it can be used either within or outside the vm
             // crate. Assume F is already generic of the field.
-            let (execute_arms, get_opcode_name_arms): (Vec<_>, Vec<_>) =
+            let (execute_arms, get_opcode_name_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>) =
                 multiunzip(variants.iter().map(|(variant_name, field)| {
                     let field_ty = &field.ty;
                     let execute_arm = quote! {
-                        #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::InstructionExecutor<#first_ty_generic>>::execute(x, memory, instruction, from_state)
+                        #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::PreflightExecutor<#field_ty_generic, RA>>::execute(x, state, instruction)
                     };
                     let get_opcode_name_arm = quote! {
-                        #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::InstructionExecutor<#first_ty_generic>>::get_opcode_name(x, opcode)
+                        #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::PreflightExecutor<#field_ty_generic, RA>>::get_opcode_name(x, opcode)
                     };
-
-                    (execute_arm, get_opcode_name_arm)
+                    let where_predicate = syn::parse_quote! {
+                        #field_ty: ::openvm_circuit::arch::PreflightExecutor<#field_ty_generic, RA>
+                    };
+                    (execute_arm, get_opcode_name_arm, where_predicate)
                 }));
+            let where_clause = new_generics.make_where_clause();
+            for predicate in where_predicates {
+                where_clause.predicates.push(predicate);
+            }
+            // Don't use these ty_generics because it might have extra "F"
+            let (impl_generics, _, where_clause) = new_generics.split_for_impl();
             quote! {
-                impl #impl_generics ::openvm_circuit::arch::InstructionExecutor<#first_ty_generic> for #name #ty_generics {
+                impl #impl_generics ::openvm_circuit::arch::PreflightExecutor<#field_ty_generic, RA> for #name #ty_generics #where_clause {
                     fn execute(
-                        &mut self,
-                        memory: &mut ::openvm_circuit::system::memory::MemoryController<#first_ty_generic>,
-                        instruction: &::openvm_circuit::arch::instructions::instruction::Instruction<#first_ty_generic>,
-                        from_state: ::openvm_circuit::arch::ExecutionState<u32>,
-                    ) -> ::openvm_circuit::arch::Result<::openvm_circuit::arch::ExecutionState<u32>> {
+                        &self,
+                        state: ::openvm_circuit::arch::VmStateMut<#field_ty_generic, ::openvm_circuit::system::memory::online::TracingMemory, RA>,
+                        instruction: &::openvm_circuit::arch::instructions::instruction::Instruction<#field_ty_generic>,
+                    ) -> Result<(), ::openvm_circuit::arch::ExecutionError> {
                         match self {
                             #(#execute_arms,)*
                         }
@@ -113,6 +128,262 @@ pub fn instruction_executor_derive(input: TokenStream) -> TokenStream {
     }
 }
 
+#[proc_macro_derive(Executor)]
+pub fn executor_derive(input: TokenStream) -> TokenStream {
+    let ast: syn::DeriveInput = syn::parse(input).unwrap();
+
+    let name = &ast.ident;
+    let generics = &ast.generics;
+    let (impl_generics, ty_generics, _) = generics.split_for_impl();
+
+    match &ast.data {
+        Data::Struct(inner) => {
+            // Check if the struct has only one unnamed field
+            let inner_ty = match &inner.fields {
+                Fields::Unnamed(fields) => {
+                    if fields.unnamed.len() != 1 {
+                        panic!("Only one unnamed field is supported");
+                    }
+                    fields.unnamed.first().unwrap().ty.clone()
+                }
+                _ => panic!("Only unnamed fields are supported"),
+            };
+            // Use full path ::openvm_circuit... so it can be used either within or outside the vm
+            // crate. Assume F is already generic of the field.
+            let mut new_generics = generics.clone();
+            let where_clause = new_generics.make_where_clause();
+            where_clause
+                .predicates
+                .push(syn::parse_quote! { #inner_ty: ::openvm_circuit::arch::Executor<F> });
+            quote! {
+                impl #impl_generics ::openvm_circuit::arch::Executor<F> for #name #ty_generics #where_clause {
+                    #[inline(always)]
+                    fn pre_compute_size(&self) -> usize {
+                        self.0.pre_compute_size()
+                    }
+                    #[inline(always)]
+                    fn pre_compute<Ctx>(
+                        &self,
+                        pc: u32,
+                        inst: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
+                        data: &mut [u8],
+                    ) -> Result<::openvm_circuit::arch::ExecuteFunc<F, Ctx>, ::openvm_circuit::arch::StaticProgramError>
+                    where
+                        Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, {
+                        self.0.pre_compute(pc, inst, data)
+                    }
+                }
+            }
+            .into()
+        }
+        Data::Enum(e) => {
+            let variants = e
+                .variants
+                .iter()
+                .map(|variant| {
+                    let variant_name = &variant.ident;
+
+                    let mut fields = variant.fields.iter();
+                    let field = fields.next().unwrap();
+                    assert!(fields.next().is_none(), "Only one field is supported");
+                    (variant_name, field)
+                })
+                .collect::<Vec<_>>();
+            let default_ty_generic = Ident::new("F", proc_macro2::Span::call_site());
+            let mut new_generics = generics.clone();
+            let first_ty_generic = ast
+                .generics
+                .params
+                .first()
+                .and_then(|param| match param {
+                    GenericParam::Type(type_param) => Some(&type_param.ident),
+                    _ => None,
+                })
+                .unwrap_or_else(|| {
+                    new_generics.params.push(syn::parse_quote! { F });
+                    &default_ty_generic
+                });
+            // Use full path ::openvm_circuit... so it can be used either within or outside the vm
+            // crate. Assume F is already generic of the field.
+            let (pre_compute_size_arms, pre_compute_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| {
+                let field_ty = &field.ty;
+                let pre_compute_size_arm = quote! {
+                    #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::pre_compute_size(x)
+                };
+                let pre_compute_arm = quote! {
+                    #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::Executor<#first_ty_generic>>::pre_compute(x, pc, instruction, data)
+                };
+                let where_predicate = syn::parse_quote! {
+                    #field_ty: ::openvm_circuit::arch::Executor<#first_ty_generic>
+                };
+                (pre_compute_size_arm, pre_compute_arm, where_predicate)
+            }));
+            let where_clause = new_generics.make_where_clause();
+            for predicate in where_predicates {
+                where_clause.predicates.push(predicate);
+            }
+            // Don't use these ty_generics because it might have extra "F"
+            let (impl_generics, _, where_clause) = new_generics.split_for_impl();
+
+            quote! {
+                impl #impl_generics ::openvm_circuit::arch::Executor<#first_ty_generic> for #name #ty_generics #where_clause {
+                    #[inline(always)]
+                    fn pre_compute_size(&self) -> usize {
+                        match self {
+                            #(#pre_compute_size_arms,)*
+                        }
+                    }
+
+                    #[inline(always)]
+                    fn pre_compute<Ctx>(
+                        &self,
+                        pc: u32,
+                        instruction: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
+                        data: &mut [u8],
+                    ) -> Result<::openvm_circuit::arch::ExecuteFunc<F, Ctx>, ::openvm_circuit::arch::StaticProgramError>
+                    where
+                        Ctx: ::openvm_circuit::arch::execution_mode::ExecutionCtxTrait, {
+                        match self {
+                            #(#pre_compute_arms,)*
+                        }
+                    }
+                }
+            }
+            .into()
+        }
+        Data::Union(_) => unimplemented!("Unions are not supported"),
+    }
+}
+
+#[proc_macro_derive(MeteredExecutor)]
+pub fn metered_executor_derive(input: TokenStream) -> TokenStream {
+    let ast: syn::DeriveInput = syn::parse(input).unwrap();
+
+    let name = &ast.ident;
+    let generics = &ast.generics;
+    let (impl_generics, ty_generics, _) = generics.split_for_impl();
+
+    match &ast.data {
+        Data::Struct(inner) => {
+            // Check if the struct has only one unnamed field
+            let inner_ty = match &inner.fields {
+                Fields::Unnamed(fields) => {
+                    if fields.unnamed.len() != 1 {
+                        panic!("Only one unnamed field is supported");
+                    }
+                    fields.unnamed.first().unwrap().ty.clone()
+                }
+                _ => panic!("Only unnamed fields are supported"),
+            };
+            // Use full path ::openvm_circuit... so it can be used either within or outside the vm
+            // crate. Assume F is already generic of the field.
+            let mut new_generics = generics.clone();
+            let where_clause = new_generics.make_where_clause();
+            where_clause
+                .predicates
+                .push(syn::parse_quote! { #inner_ty: ::openvm_circuit::arch::MeteredExecutor<F> });
+            quote! {
+                impl #impl_generics ::openvm_circuit::arch::MeteredExecutor<F> for #name #ty_generics #where_clause {
+                    #[inline(always)]
+                    fn metered_pre_compute_size(&self) -> usize {
+                        self.0.metered_pre_compute_size()
+                    }
+                    #[inline(always)]
+                    fn metered_pre_compute<Ctx>(
+                        &self,
+                        chip_idx: usize,
+                        pc: u32,
+                        inst: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
+                        data: &mut [u8],
+                    ) -> Result<::openvm_circuit::arch::ExecuteFunc<F, Ctx>, ::openvm_circuit::arch::StaticProgramError>
+                    where
+                        Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, {
+                        self.0.metered_pre_compute(chip_idx, pc, inst, data)
+                    }
+                }
+            }
+                .into()
+        }
+        Data::Enum(e) => {
+            let variants = e
+                .variants
+                .iter()
+                .map(|variant| {
+                    let variant_name = &variant.ident;
+
+                    let mut fields = variant.fields.iter();
+                    let field = fields.next().unwrap();
+                    assert!(fields.next().is_none(), "Only one field is supported");
+                    (variant_name, field)
+                })
+                .collect::<Vec<_>>();
+            let default_ty_generic = Ident::new("F", proc_macro2::Span::call_site());
+            let mut new_generics = generics.clone();
+            let first_ty_generic = ast
+                .generics
+                .params
+                .first()
+                .and_then(|param| match param {
+                    GenericParam::Type(type_param) => Some(&type_param.ident),
+                    _ => None,
+                })
+                .unwrap_or_else(|| {
+                    new_generics.params.push(syn::parse_quote! { F });
+                    &default_ty_generic
+                });
+            // Use full path ::openvm_circuit... so it can be used either within or outside the vm
+            // crate. Assume F is already generic of the field.
+            let (pre_compute_size_arms, metered_pre_compute_arms, where_predicates): (Vec<_>, Vec<_>, Vec<_>) = multiunzip(variants.iter().map(|(variant_name, field)| {
+                let field_ty = &field.ty;
+                let pre_compute_size_arm = quote! {
+                    #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_pre_compute_size(x)
+                };
+                let metered_pre_compute_arm = quote! {
+                    #name::#variant_name(x) => <#field_ty as ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>>::metered_pre_compute(x, chip_idx, pc, instruction, data)
+                };
+                let where_predicate = syn::parse_quote! {
+                    #field_ty: ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic>
+                };
+                (pre_compute_size_arm, metered_pre_compute_arm, where_predicate)
+            }));
+            let where_clause = new_generics.make_where_clause();
+            for predicate in where_predicates {
+                where_clause.predicates.push(predicate);
+            }
+            // Don't use these ty_generics because it might have extra "F"
+            let (impl_generics, _, where_clause) = new_generics.split_for_impl();
+
+            quote! {
+                impl #impl_generics ::openvm_circuit::arch::MeteredExecutor<#first_ty_generic> for #name #ty_generics #where_clause {
+                    #[inline(always)]
+                    fn metered_pre_compute_size(&self) -> usize {
+                        match self {
+                            #(#pre_compute_size_arms,)*
+                        }
+                    }
+
+                    #[inline(always)]
+                    fn metered_pre_compute<Ctx>(
+                        &self,
+                        chip_idx: usize,
+                        pc: u32,
+                        instruction: &::openvm_circuit::arch::instructions::instruction::Instruction<F>,
+                        data: &mut [u8],
+                    ) -> Result<::openvm_circuit::arch::ExecuteFunc<F, Ctx>, ::openvm_circuit::arch::StaticProgramError>
+                    where
+                        Ctx: ::openvm_circuit::arch::execution_mode::MeteredExecutionCtxTrait, {
+                        match self {
+                            #(#metered_pre_compute_arms,)*
+                        }
+                    }
+                }
+            }
+                .into()
+        }
+        Data::Union(_) => unimplemented!("Unions are not supported"),
+    }
+}
+
 /// Derives `AnyEnum` trait on an enum type.
 /// By default an enum arm will just return `self` as `&dyn Any`.
 ///
@@ -189,18 +460,23 @@ pub fn any_enum_derive(input: TokenStream) -> TokenStream {
     }
 }
 
-// VmConfig derive macro
-#[derive(Debug)]
-enum Source {
-    System(Ident),
-    Config(Ident),
-}
-
-#[proc_macro_derive(VmConfig, attributes(system, config, extension))]
+#[proc_macro_derive(VmConfig, attributes(config, extension))]
 pub fn vm_generic_config_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream {
     let ast = syn::parse_macro_input!(input as syn::DeriveInput);
     let name = &ast.ident;
 
+    match &ast.data {
+        syn::Data::Struct(inner) => match generate_config_traits_impl(name, inner) {
+            Ok(tokens) => tokens,
+            Err(err) => err.to_compile_error().into(),
+        },
+        _ => syn::Error::new(name.span(), "Only structs are supported")
+            .to_compile_error()
+            .into(),
+    }
+}
+
+fn generate_config_traits_impl(name: &Ident, inner: &DataStruct) -> syn::Result<TokenStream> {
     let gen_name_with_uppercase_idents = |ident: &Ident| {
         let mut name = ident.to_string().chars().collect::<Vec<_>>();
         assert!(name[0].is_lowercase(), "Field name must not be capitalized");
@@ -210,180 +486,217 @@ pub fn vm_generic_config_derive(input: proc_macro::TokenStream) -> proc_macro::T
         (res_lower, res_upper)
     };
 
-    match &ast.data {
-        syn::Data::Struct(inner) => {
-            let fields = match &inner.fields {
-                Fields::Named(named) => named.named.iter().collect(),
-                Fields::Unnamed(_) => {
-                    return syn::Error::new(name.span(), "Only named fields are supported")
-                        .to_compile_error()
-                        .into();
-                }
-                Fields::Unit => vec![],
-            };
+    let fields = match &inner.fields {
+        Fields::Named(named) => named.named.iter().collect(),
+        Fields::Unnamed(_) => {
+            return Err(syn::Error::new(
+                name.span(),
+                "Only named fields are supported",
+            ))
+        }
+        Fields::Unit => vec![],
+    };
 
-            let source = fields
-                .iter()
-                .filter_map(|f| {
-                    if f.attrs.iter().any(|attr| attr.path().is_ident("system")) {
-                        Some(Source::System(f.ident.clone().unwrap()))
-                    } else if f.attrs.iter().any(|attr| attr.path().is_ident("config")) {
-                        Some(Source::Config(f.ident.clone().unwrap()))
-                    } else {
-                        None
-                    }
-                })
-                .exactly_one()
-                .expect("Exactly one field must have #[system] or #[config] attribute");
-            let (source_name, source_name_upper) = match &source {
-                Source::System(ident) | Source::Config(ident) => {
-                    gen_name_with_uppercase_idents(ident)
-                }
-            };
+    let source_field = fields
+        .iter()
+        .filter(|f| f.attrs.iter().any(|attr| attr.path().is_ident("config")))
+        .exactly_one()
+        .clone()
+        .expect("Exactly one field must have the #[config] attribute");
+    let (source_name, source_name_upper) =
+        gen_name_with_uppercase_idents(source_field.ident.as_ref().unwrap());
 
-            let extensions = fields
-                .iter()
-                .filter(|f| f.attrs.iter().any(|attr| attr.path().is_ident("extension")))
-                .cloned()
-                .collect::<Vec<_>>();
+    let extensions = fields
+        .iter()
+        .filter(|f| f.attrs.iter().any(|attr| attr.path().is_ident("extension")))
+        .cloned()
+        .collect::<Vec<_>>();
 
-            let mut executor_enum_fields = Vec::new();
-            let mut periphery_enum_fields = Vec::new();
-            let mut create_chip_complex = Vec::new();
-            for &e in extensions.iter() {
-                let (field_name, field_name_upper) =
-                    gen_name_with_uppercase_idents(&e.ident.clone().unwrap());
-                // TRACKING ISSUE:
-                // We cannot just use <e.ty.to_token_stream() as VmExtension<F>>::Executor because of this: <https://github.com/rust-lang/rust/issues/85576>
-                let mut executor_name = Ident::new(
-                    &format!("{}Executor", e.ty.to_token_stream()),
-                    Span::call_site().into(),
-                );
-                let mut periphery_name = Ident::new(
-                    &format!("{}Periphery", e.ty.to_token_stream()),
-                    Span::call_site().into(),
-                );
-                if let Some(attr) = e
-                    .attrs
-                    .iter()
-                    .find(|attr| attr.path().is_ident("extension"))
-                {
-                    match attr.meta {
-                        Meta::Path(_) => {}
-                        Meta::NameValue(_) => {
-                            return syn::Error::new(
-                                name.span(),
-                                "Only `#[extension]` or `#[extension(...)] formats are supported",
-                            )
-                            .to_compile_error()
-                            .into()
-                        }
-                        _ => {
-                            let nested = attr
-                                .parse_args_with(Punctuated::<Meta, Token![,]>::parse_terminated)
-                                .unwrap();
-                            for meta in nested {
-                                match meta {
-                                    Meta::NameValue(nv) => {
-                                        if nv.path.is_ident("executor") {
-                                            executor_name = Ident::new(
-                                                &nv.value.to_token_stream().to_string(),
-                                                Span::call_site().into(),
-                                            );
-                                            Ok(())
-                                        } else if nv.path.is_ident("periphery") {
-                                            periphery_name = Ident::new(
-                                                &nv.value.to_token_stream().to_string(),
-                                                Span::call_site().into(),
-                                            );
-                                            Ok(())
-                                        } else {
-                                            Err("only executor and periphery keys are supported")
-                                        }
-                                    }
-                                    _ => Err("only name = value format is supported"),
-                                }
-                                .expect("wrong attributes format");
-                            }
-                        }
-                    }
-                };
-                executor_enum_fields.push(quote! {
-                    #[any_enum]
-                    #field_name_upper(#executor_name<F>),
-                });
-                periphery_enum_fields.push(quote! {
-                    #[any_enum]
-                    #field_name_upper(#periphery_name<F>),
-                });
-                create_chip_complex.push(quote! {
-                    let complex: ::openvm_circuit::arch::VmChipComplex<F, Self::Executor, Self::Periphery> = complex.extend(&self.#field_name)?;
-                });
-            }
+    let mut executor_enum_fields = Vec::new();
+    let mut create_executors = Vec::new();
+    let mut create_airs = Vec::new();
+    let mut execution_where_predicates: Vec<syn::WherePredicate> = Vec::new();
+    let mut circuit_where_predicates: Vec<syn::WherePredicate> = Vec::new();
 
-            let (source_executor_type, source_periphery_type) = match &source {
-                Source::System(_) => (
-                    quote! { ::openvm_circuit::arch::SystemExecutor },
-                    quote! { ::openvm_circuit::arch::SystemPeriphery },
-                ),
-                Source::Config(field_ident) => {
-                    let field_type = fields
-                        .iter()
-                        .find(|f| f.ident.as_ref() == Some(field_ident))
-                        .map(|f| &f.ty)
-                        .expect("Field not found");
+    let source_field_ty = source_field.ty.clone();
+
+    for e in extensions.iter() {
+        let (ext_field_name, ext_name_upper) =
+            gen_name_with_uppercase_idents(e.ident.as_ref().expect("field must be named"));
+        let executor_type = parse_executor_type(e, false)?;
+        executor_enum_fields.push(quote! {
+            #[any_enum]
+            #ext_name_upper(#executor_type),
+        });
+        create_executors.push(quote! {
+            let inventory: ::openvm_circuit::arch::ExecutorInventory<Self::Executor> = inventory.extend::<F, _, _>(&self.#ext_field_name)?;
+        });
+        let extension_ty = e.ty.clone();
+        execution_where_predicates.push(parse_quote! {
+            #extension_ty: ::openvm_circuit::arch::VmExecutionExtension<F, Executor = #executor_type>
+        });
+        create_airs.push(quote! {
+            inventory.start_new_extension();
+            ::openvm_circuit::arch::VmCircuitExtension::extend_circuit(&self.#ext_field_name, &mut inventory)?;
+        });
+        circuit_where_predicates.push(parse_quote! {
+            #extension_ty: ::openvm_circuit::arch::VmCircuitExtension<SC>
+        });
+    }
 
-                    let executor_type = format!("{}Executor", quote!(#field_type));
-                    let periphery_type = format!("{}Periphery", quote!(#field_type));
+    // The config type always needs <F> due to SystemExecutor
+    let source_executor_type = parse_executor_type(source_field, true)?;
+    execution_where_predicates.push(parse_quote! {
+        #source_field_ty: ::openvm_circuit::arch::VmExecutionConfig<F, Executor = #source_executor_type>
+    });
+    circuit_where_predicates.push(parse_quote! {
+        #source_field_ty: ::openvm_circuit::arch::VmCircuitConfig<SC>
+    });
+    let execution_where_clause = quote! { where #(#execution_where_predicates),* };
+    let circuit_where_clause = quote! { where #(#circuit_where_predicates),* };
 
-                    let executor_ident = Ident::new(&executor_type, field_ident.span());
-                    let periphery_ident = Ident::new(&periphery_type, field_ident.span());
+    let executor_type = Ident::new(&format!("{}Executor", name), name.span());
 
-                    (quote! { #executor_ident }, quote! { #periphery_ident })
-                }
-            };
+    let token_stream = TokenStream::from(quote! {
+        #[derive(
+            Clone,
+            ::derive_more::derive::From,
+            ::openvm_circuit::derive::AnyEnum,
+            ::openvm_circuit::derive::Executor,
+            ::openvm_circuit::derive::MeteredExecutor,
+            ::openvm_circuit::derive::PreflightExecutor,
+        )]
+        pub enum #executor_type<F: openvm_stark_backend::p3_field::Field> {
+            #[any_enum]
+            #source_name_upper(#source_executor_type),
+            #(#executor_enum_fields)*
+        }
 
-            let executor_type = Ident::new(&format!("{}Executor", name), name.span());
-            let periphery_type = Ident::new(&format!("{}Periphery", name), name.span());
+        impl<F: openvm_stark_backend::p3_field::Field> ::openvm_circuit::arch::VmExecutionConfig<F> for #name #execution_where_clause {
+            type Executor = #executor_type<F>;
 
-            TokenStream::from(quote! {
-                #[derive(::openvm_circuit::circuit_derive::ChipUsageGetter, ::openvm_circuit::circuit_derive::Chip, ::openvm_circuit::derive::InstructionExecutor, ::derive_more::derive::From, ::openvm_circuit::derive::AnyEnum)]
-                pub enum #executor_type<F: PrimeField32> {
-                    #[any_enum]
-                    #source_name_upper(#source_executor_type<F>),
-                    #(#executor_enum_fields)*
-                }
+            fn create_executors(
+                &self,
+            ) -> Result<::openvm_circuit::arch::ExecutorInventory<Self::Executor>, ::openvm_circuit::arch::ExecutorInventoryError> {
+                let inventory = self.#source_name.create_executors()?.transmute::<Self::Executor>();
+                #(#create_executors)*
+                Ok(inventory)
+            }
+        }
 
-                #[derive(::openvm_circuit::circuit_derive::ChipUsageGetter, ::openvm_circuit::circuit_derive::Chip, ::derive_more::derive::From, ::openvm_circuit::derive::AnyEnum)]
-                pub enum #periphery_type<F: PrimeField32> {
-                    #[any_enum]
-                    #source_name_upper(#source_periphery_type<F>),
-                    #(#periphery_enum_fields)*
-                }
+        impl<SC: openvm_stark_backend::config::StarkGenericConfig> ::openvm_circuit::arch::VmCircuitConfig<SC> for #name #circuit_where_clause {
+            fn create_airs(
+                &self,
+            ) -> Result<::openvm_circuit::arch::AirInventory<SC>, ::openvm_circuit::arch::AirInventoryError> {
+                let mut inventory = self.#source_name.create_airs()?;
+                #(#create_airs)*
+                Ok(inventory)
+            }
+        }
 
-                impl<F: PrimeField32> ::openvm_circuit::arch::VmConfig<F> for #name {
-                    type Executor = #executor_type<F>;
-                    type Periphery = #periphery_type<F>;
+        impl AsRef<SystemConfig> for #name {
+            fn as_ref(&self) -> &SystemConfig {
+                self.#source_name.as_ref()
+            }
+        }
 
-                    fn system(&self) -> &::openvm_circuit::arch::SystemConfig {
-                        ::openvm_circuit::arch::VmConfig::<F>::system(&self.#source_name)
-                    }
-                    fn system_mut(&mut self) -> &mut ::openvm_circuit::arch::SystemConfig {
-                        ::openvm_circuit::arch::VmConfig::<F>::system_mut(&mut self.#source_name)
-                    }
+        impl AsMut<SystemConfig> for #name {
+            fn as_mut(&mut self) -> &mut SystemConfig {
+                self.#source_name.as_mut()
+            }
+        }
+    });
+    Ok(token_stream)
+}
 
-                    fn create_chip_complex(
-                        &self,
-                    ) -> Result<::openvm_circuit::arch::VmChipComplex<F, Self::Executor, Self::Periphery>, ::openvm_circuit::arch::VmInventoryError> {
-                        let complex = self.#source_name.create_chip_complex()?;
-                        #(#create_chip_complex)*
-                        Ok(complex)
+// Parse the executor name as either
+// `{type_name}Executor` or whatever the attribute `executor = ` specifies
+// Also determines whether the executor type needs generic parameters
+fn parse_executor_type(
+    f: &Field,
+    default_needs_generics: bool,
+) -> syn::Result<proc_macro2::TokenStream> {
+    // TRACKING ISSUE:
+    // We cannot just use <e.ty.to_token_stream() as VmExecutionExtension<F>>::Executor because of this: <https://github.com/rust-lang/rust/issues/85576>
+    let mut executor_type = None;
+    // Do not unwrap the Result until needed
+    let executor_name = syn::parse_str::<Ident>(&format!("{}Executor", f.ty.to_token_stream()));
+
+    if let Some(attr) = f
+        .attrs
+        .iter()
+        .find(|attr| attr.path().is_ident("extension") || attr.path().is_ident("config"))
+    {
+        match attr.meta {
+            Meta::Path(_) => {}
+            Meta::NameValue(_) => {
+                return Err(syn::Error::new(
+                    f.ty.span(),
+                    "Only `#[config]`, `#[extension]`, `#[config(...)]` or `#[extension(...)]` formats are supported",
+                ))
+            }
+            _ => {
+                let nested = attr
+                    .parse_args_with(Punctuated::<Meta, Token![,]>::parse_terminated)?;
+                for meta in nested {
+                    match meta {
+                        Meta::NameValue(nv) => {
+                            if nv.path.is_ident("executor") {
+                                executor_type = match nv.value {
+                                    syn::Expr::Lit(syn::ExprLit {
+                                        lit: syn::Lit::Str(lit_str), ..
+                                    }) => {
+                                        let executor_type: syn::Type = syn::parse_str(&lit_str.value())?;
+                                        Some(quote! { #executor_type })
+                                    },
+                                    syn::Expr::Path(path) => {
+                                        // Handle identifier paths like `executor = MyExecutor`
+                                        Some(path.to_token_stream())
+                                    },
+                                    _ => {
+                                        return Err(syn::Error::new(
+                                            nv.value.span(),
+                                            "executor value must be a string literal or identifier"
+                                        ));
+                                    }
+                                };
+                            } else if nv.path.is_ident("generics") {
+                                // Parse boolean value for generics
+                                let value_str = nv.value.to_token_stream().to_string();
+                                let needs_generics = match value_str.as_str() {
+                                    "true" => true,
+                                    "false" => false,
+                                    _ => return Err(syn::Error::new(
+                                        nv.value.span(),
+                                        "generics attribute must be either true or false"
+                                    ))
+                                };
+                                let executor_name = executor_name.clone()?;
+                                executor_type = Some(if needs_generics {
+                                    quote! { #executor_name<F> }
+                                } else {
+                                    quote! { #executor_name }
+                                });
+                            } else {
+                                return Err(syn::Error::new(nv.span(), "only executor and generics keys are supported"));
+                            }
+                        }
+                        _ => {
+                            return Err(syn::Error::new(meta.span(), "only name = value format is supported"));
+                        }
                     }
                 }
-            })
+            }
         }
-        _ => syn::Error::new(name.span(), "Only structs are supported")
-            .to_compile_error()
-            .into(),
+    }
+    if let Some(executor_type) = executor_type {
+        Ok(executor_type)
+    } else {
+        let executor_name = executor_name?;
+        Ok(if default_needs_generics {
+            quote! { #executor_name<F> }
+        } else {
+            quote! { #executor_name }
+        })
     }
 }
diff --git a/crates/vm/src/arch/config.rs b/crates/vm/src/arch/config.rs
index d82b5f7cf0..6b30a7c27b 100644
--- a/crates/vm/src/arch/config.rs
+++ b/crates/vm/src/arch/config.rs
@@ -1,17 +1,37 @@
-use std::{fs::File, io::Write, path::Path, sync::Arc};
+use std::{
+    fs::File,
+    io::{self, Write},
+    path::Path,
+};
 
 use derive_new::new;
-use openvm_circuit::system::memory::MemoryTraceHeights;
+use getset::{Setters, WithSetters};
+use openvm_instructions::{
+    riscv::{RV32_IMM_AS, RV32_MEMORY_AS, RV32_REGISTER_AS},
+    NATIVE_AS,
+};
 use openvm_poseidon2_air::Poseidon2Config;
-use openvm_stark_backend::{p3_field::PrimeField32, ChipUsageGetter};
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::Field,
+    p3_util::log2_strict_usize,
+};
 use serde::{de::DeserializeOwned, Deserialize, Serialize};
 
-use super::{
-    segment::DefaultSegmentationStrategy, AnyEnum, InstructionExecutor, SegmentationStrategy,
-    SystemComplex, SystemExecutor, SystemPeriphery, VmChipComplex, VmInventoryError,
-    PUBLIC_VALUES_AIR_ID,
+use super::{AnyEnum, VmChipComplex, PUBLIC_VALUES_AIR_ID};
+use crate::{
+    arch::{
+        execution_mode::metered::segment_ctx::SegmentationLimits, AirInventory, AirInventoryError,
+        Arena, ChipInventoryError, ExecutorInventory, ExecutorInventoryError,
+    },
+    system::{
+        memory::{
+            merkle::public_values::PUBLIC_VALUES_AS, num_memory_airs, CHUNK, POINTER_MAX_BITS,
+        },
+        SystemChipComplex,
+    },
 };
-use crate::system::memory::BOUNDARY_AIR_OFFSET;
 
 // sbox is decomposed to have this max degree for Poseidon2. We set to 3 so quotient_degree = 2
 // allows log_blowup = 1
@@ -19,28 +39,86 @@ const DEFAULT_POSEIDON2_MAX_CONSTRAINT_DEGREE: usize = 3;
 pub const DEFAULT_MAX_NUM_PUBLIC_VALUES: usize = 32;
 /// Width of Poseidon2 VM uses.
 pub const POSEIDON2_WIDTH: usize = 16;
+/// Offset for address space indices. This is used to distinguish between different memory spaces.
+pub const ADDR_SPACE_OFFSET: u32 = 1;
 /// Returns a Poseidon2 config for the VM.
-pub fn vm_poseidon2_config<F: PrimeField32>() -> Poseidon2Config<F> {
+pub fn vm_poseidon2_config<F: Field>() -> Poseidon2Config<F> {
     Poseidon2Config::default()
 }
 
-pub trait VmConfig<F: PrimeField32>:
-    Clone + Serialize + DeserializeOwned + InitFileGenerator
+/// A VM configuration is the minimum serializable format to be able to create the execution
+/// environment and circuit for a zkVM supporting a fixed set of instructions.
+///
+/// For users who only need to create an execution environment, use the sub-trait
+/// [VmExecutionConfig] to avoid the `SC` generic.
+///
+/// This trait does not contain the [VmProverBuilder] trait, because a single VM configuration may
+/// implement multiple [VmProverBuilder]s for different prover backends.
+pub trait VmConfig<SC>:
+    Clone
+    + Serialize
+    + DeserializeOwned
+    + InitFileGenerator
+    + VmExecutionConfig<Val<SC>>
+    + VmCircuitConfig<SC>
+    + AsRef<SystemConfig>
+    + AsMut<SystemConfig>
+where
+    SC: StarkGenericConfig,
 {
-    type Executor: InstructionExecutor<F> + AnyEnum + ChipUsageGetter;
-    type Periphery: AnyEnum + ChipUsageGetter;
+}
+
+pub trait VmExecutionConfig<F> {
+    type Executor: AnyEnum + Send + Sync;
+
+    fn create_executors(&self)
+        -> Result<ExecutorInventory<Self::Executor>, ExecutorInventoryError>;
+}
+
+pub trait VmCircuitConfig<SC: StarkGenericConfig> {
+    fn create_airs(&self) -> Result<AirInventory<SC>, AirInventoryError>;
+}
 
-    /// Must contain system config
-    fn system(&self) -> &SystemConfig;
-    fn system_mut(&mut self) -> &mut SystemConfig;
+/// This trait is intended to be implemented on a new type wrapper of the VmConfig struct to get
+/// around Rust orphan rules.
+pub trait VmBuilder<E: StarkEngine>: Sized {
+    type VmConfig: VmConfig<E::SC>;
+    type RecordArena: Arena;
+    type SystemChipInventory: SystemChipComplex<Self::RecordArena, E::PB>;
 
+    /// Create a [VmChipComplex] from the full [AirInventory], which should be the output of
+    /// [VmCircuitConfig::create_airs].
+    #[allow(clippy::type_complexity)]
     fn create_chip_complex(
         &self,
-    ) -> Result<VmChipComplex<F, Self::Executor, Self::Periphery>, VmInventoryError>;
+        config: &Self::VmConfig,
+        circuit: AirInventory<E::SC>,
+    ) -> Result<
+        VmChipComplex<E::SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    >;
+}
+
+impl<SC, VC> VmConfig<SC> for VC
+where
+    SC: StarkGenericConfig,
+    VC: Clone
+        + Serialize
+        + DeserializeOwned
+        + InitFileGenerator
+        + VmExecutionConfig<Val<SC>>
+        + VmCircuitConfig<SC>
+        + AsRef<SystemConfig>
+        + AsMut<SystemConfig>,
+{
 }
 
 pub const OPENVM_DEFAULT_INIT_FILE_BASENAME: &str = "openvm_init";
 pub const OPENVM_DEFAULT_INIT_FILE_NAME: &str = "openvm_init.rs";
+/// The minimum block size is 4, but RISC-V `lb` only requires alignment of 1 and `lh` only requires
+/// alignment of 2 because the instructions are implemented by doing an access of block size 4.
+const DEFAULT_U8_BLOCK_SIZE: usize = 4;
+const DEFAULT_NATIVE_BLOCK_SIZE: usize = 1;
 
 /// Trait for generating a init.rs file that contains a call to moduli_init!,
 /// complex_init!, sw_init! with the supported moduli and curves.
@@ -57,7 +135,7 @@ pub trait InitFileGenerator {
         &self,
         manifest_dir: &Path,
         init_file_name: Option<&str>,
-    ) -> eyre::Result<()> {
+    ) -> io::Result<()> {
         if let Some(contents) = self.generate_init_file_contents() {
             let dest_path = Path::new(manifest_dir)
                 .join(init_file_name.unwrap_or(OPENVM_DEFAULT_INIT_FILE_NAME));
@@ -68,37 +146,100 @@ pub trait InitFileGenerator {
     }
 }
 
-#[derive(Debug, Serialize, Deserialize, Clone, new, Copy)]
+/// Each address space in guest memory may be configured with a different type `T` to represent a
+/// memory cell in the address space. On host, the address space will be mapped to linear host
+/// memory in bytes. The type `T` must be plain old data (POD) and be safely transmutable from a
+/// fixed size array of bytes. Moreover, each type `T` must be convertible to a field element `F`.
+///
+/// We currently implement this trait on the enum [MemoryCellType], which includes all cell types
+/// that we expect to be used in the VM context.
+pub trait AddressSpaceHostLayout {
+    /// Size in bytes of the memory cell type.
+    fn size(&self) -> usize;
+
+    /// # Safety
+    /// - This function must only be called when `value` is guaranteed to be of size `self.size()`.
+    /// - Alignment of `value` must be a multiple of the alignment of `F`.
+    /// - The field type `F` must be plain old data.
+    unsafe fn to_field<F: Field>(&self, value: &[u8]) -> F;
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, new)]
 pub struct MemoryConfig {
-    /// The maximum height of the address space. This means the trie has `as_height` layers for
-    /// searching the address space. The allowed address spaces are those in the range `[as_offset,
-    /// as_offset + 2^as_height)` where `as_offset` is currently fixed to `1` to not allow address
-    /// space `0` in memory.
-    pub as_height: usize,
-    /// The offset of the address space. Should be fixed to equal `1`.
-    pub as_offset: u32,
+    /// The maximum height of the address space. This means the trie has `addr_space_height` layers
+    /// for searching the address space. The allowed address spaces are those in the range `[1,
+    /// 1 + 2^addr_space_height)` where it starts from 1 to not allow address space 0 in memory.
+    pub addr_space_height: usize,
+    /// It is expected that the size of the list is `(1 << addr_space_height) + 1` and the first
+    /// element is 0, which means no address space.
+    pub addr_spaces: Vec<AddressSpaceHostConfig>,
     pub pointer_max_bits: usize,
-    /// All timestamps must be in the range `[0, 2^clk_max_bits)`. Maximum allowed: 29.
-    pub clk_max_bits: usize,
+    /// All timestamps must be in the range `[0, 2^timestamp_max_bits)`. Maximum allowed: 29.
+    pub timestamp_max_bits: usize,
     /// Limb size used by the range checker
     pub decomp: usize,
     /// Maximum N AccessAdapter AIR to support.
     pub max_access_adapter_n: usize,
-    /// An expected upper bound on the number of memory accesses.
-    pub access_capacity: usize,
 }
 
 impl Default for MemoryConfig {
     fn default() -> Self {
-        Self::new(3, 1, 29, 29, 17, 32, 1 << 24)
+        let mut addr_spaces =
+            Self::empty_address_space_configs((1 << 3) + ADDR_SPACE_OFFSET as usize);
+        const MAX_CELLS: usize = 1 << 29;
+        addr_spaces[RV32_REGISTER_AS as usize].num_cells = 32 * size_of::<u32>();
+        addr_spaces[RV32_MEMORY_AS as usize].num_cells = MAX_CELLS;
+        addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = DEFAULT_MAX_NUM_PUBLIC_VALUES;
+        addr_spaces[NATIVE_AS as usize].num_cells = MAX_CELLS;
+        Self::new(3, addr_spaces, POINTER_MAX_BITS, 29, 17, 32)
+    }
+}
+
+impl MemoryConfig {
+    pub fn empty_address_space_configs(num_addr_spaces: usize) -> Vec<AddressSpaceHostConfig> {
+        // All except address spaces 0..4 default to native 32-bit field.
+        // By default only address spaces 1..=4 have non-empty cell counts.
+        let mut addr_spaces = vec![
+            AddressSpaceHostConfig::new(
+                0,
+                DEFAULT_NATIVE_BLOCK_SIZE,
+                MemoryCellType::native32()
+            );
+            num_addr_spaces
+        ];
+        addr_spaces[RV32_IMM_AS as usize] = AddressSpaceHostConfig::new(0, 1, MemoryCellType::Null);
+        addr_spaces[RV32_REGISTER_AS as usize] =
+            AddressSpaceHostConfig::new(0, DEFAULT_U8_BLOCK_SIZE, MemoryCellType::U8);
+        addr_spaces[RV32_MEMORY_AS as usize] =
+            AddressSpaceHostConfig::new(0, DEFAULT_U8_BLOCK_SIZE, MemoryCellType::U8);
+        addr_spaces[PUBLIC_VALUES_AS as usize] =
+            AddressSpaceHostConfig::new(0, DEFAULT_U8_BLOCK_SIZE, MemoryCellType::U8);
+
+        addr_spaces
+    }
+
+    /// Config for aggregation usage with only native address space.
+    pub fn aggregation() -> Self {
+        let mut addr_spaces =
+            Self::empty_address_space_configs((1 << 3) + ADDR_SPACE_OFFSET as usize);
+        addr_spaces[NATIVE_AS as usize].num_cells = 1 << 29;
+        Self::new(3, addr_spaces, POINTER_MAX_BITS, 29, 17, 8)
+    }
+
+    pub fn min_block_size_bits(&self) -> Vec<u8> {
+        self.addr_spaces
+            .iter()
+            .map(|addr_sp| log2_strict_usize(addr_sp.min_block_size) as u8)
+            .collect()
     }
 }
 
 /// System-level configuration for the virtual machine. Contains all configuration parameters that
 /// are managed by the architecture, including configuration for continuations support.
-#[derive(Debug, Clone, Serialize, Deserialize)]
+#[derive(Debug, Clone, Serialize, Deserialize, Setters, WithSetters)]
 pub struct SystemConfig {
     /// The maximum constraint degree any chip is allowed to use.
+    #[getset(set_with = "pub")]
     pub max_constraint_degree: usize,
     /// True if the VM is in continuation mode. In this mode, an execution could be segmented and
     /// each segment is proved by a proof. Each proof commits the before and after state of the
@@ -119,47 +260,41 @@ pub struct SystemConfig {
     /// Whether to collect detailed profiling metrics.
     /// **Warning**: this slows down the runtime.
     pub profiling: bool,
-    /// Segmentation strategy
+    /// Segmentation limits
     /// This field is skipped in serde as it's only used in execution and
     /// not needed after any serialize/deserialize.
-    #[serde(skip, default = "get_default_segmentation_strategy")]
-    pub segmentation_strategy: Arc<dyn SegmentationStrategy>,
-}
-
-pub fn get_default_segmentation_strategy() -> Arc<DefaultSegmentationStrategy> {
-    Arc::new(DefaultSegmentationStrategy::default())
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct SystemTraceHeights {
-    pub memory: MemoryTraceHeights,
-    // All other chips have constant heights.
+    #[serde(skip, default = "SegmentationLimits::default")]
+    #[getset(set = "pub")]
+    pub segmentation_limits: SegmentationLimits,
 }
 
 impl SystemConfig {
     pub fn new(
         max_constraint_degree: usize,
-        memory_config: MemoryConfig,
+        mut memory_config: MemoryConfig,
         num_public_values: usize,
     ) -> Self {
-        let segmentation_strategy = get_default_segmentation_strategy();
         assert!(
-            memory_config.clk_max_bits <= 29,
+            memory_config.timestamp_max_bits <= 29,
             "Timestamp max bits must be <= 29 for LessThan to work in 31-bit field"
         );
+        memory_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = num_public_values;
         Self {
             max_constraint_degree,
-            continuation_enabled: false,
+            continuation_enabled: true,
             memory_config,
             num_public_values,
-            segmentation_strategy,
             profiling: false,
+            segmentation_limits: SegmentationLimits::default(),
         }
     }
 
-    pub fn with_max_constraint_degree(mut self, max_constraint_degree: usize) -> Self {
-        self.max_constraint_degree = max_constraint_degree;
-        self
+    pub fn default_from_memory(memory_config: MemoryConfig) -> Self {
+        Self::new(
+            DEFAULT_POSEIDON2_MAX_CONSTRAINT_DEGREE,
+            memory_config,
+            DEFAULT_MAX_NUM_PUBLIC_VALUES,
+        )
     }
 
     pub fn with_continuations(mut self) -> Self {
@@ -174,20 +309,15 @@ impl SystemConfig {
 
     pub fn with_public_values(mut self, num_public_values: usize) -> Self {
         self.num_public_values = num_public_values;
+        self.memory_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = num_public_values;
         self
     }
 
     pub fn with_max_segment_len(mut self, max_segment_len: usize) -> Self {
-        self.segmentation_strategy = Arc::new(
-            DefaultSegmentationStrategy::new_with_max_segment_len(max_segment_len),
-        );
+        self.segmentation_limits.max_trace_height = max_segment_len as u32;
         self
     }
 
-    pub fn set_segmentation_strategy(&mut self, strategy: Arc<dyn SegmentationStrategy>) {
-        self.segmentation_strategy = strategy;
-    }
-
     pub fn with_profiling(mut self) -> Self {
         self.profiling = true;
         self
@@ -204,55 +334,133 @@ impl SystemConfig {
 
     /// Returns the AIR ID of the memory boundary AIR. Panic if the boundary AIR is not enabled.
     pub fn memory_boundary_air_id(&self) -> usize {
-        let mut ret = PUBLIC_VALUES_AIR_ID;
-        if self.has_public_values_chip() {
-            ret += 1;
+        PUBLIC_VALUES_AIR_ID + usize::from(self.has_public_values_chip())
+    }
+
+    /// Returns the AIR ID of the memory merkle AIR. Returns None if continuations are not enabled.
+    pub fn memory_merkle_air_id(&self) -> Option<usize> {
+        let boundary_idx = self.memory_boundary_air_id();
+        if self.continuation_enabled {
+            Some(boundary_idx + 1)
+        } else {
+            None
+        }
+    }
+
+    /// AIR ID for the first memory access adapter AIR.
+    pub fn access_adapter_air_id_offset(&self) -> usize {
+        let boundary_idx = self.memory_boundary_air_id();
+        // boundary, (if persistent memory) merkle AIRs
+        boundary_idx + 1 + usize::from(self.continuation_enabled)
+    }
+
+    /// This is O(1) and returns the length of
+    /// [`SystemAirInventory::into_airs`](crate::system::SystemAirInventory::into_airs).
+    pub fn num_airs(&self) -> usize {
+        self.memory_boundary_air_id()
+            + num_memory_airs(
+                self.continuation_enabled,
+                self.memory_config.max_access_adapter_n,
+            )
+    }
+
+    pub fn initial_block_size(&self) -> usize {
+        match self.continuation_enabled {
+            true => CHUNK,
+            false => 1,
         }
-        ret += BOUNDARY_AIR_OFFSET;
-        ret
     }
 }
 
 impl Default for SystemConfig {
     fn default() -> Self {
-        Self::new(
-            DEFAULT_POSEIDON2_MAX_CONSTRAINT_DEGREE,
-            Default::default(),
-            DEFAULT_MAX_NUM_PUBLIC_VALUES,
-        )
+        Self::default_from_memory(MemoryConfig::default())
     }
 }
 
-impl SystemTraceHeights {
-    /// Round all trace heights to the next power of two. This will round trace heights of 0 to 1.
-    pub fn round_to_next_power_of_two(&mut self) {
-        self.memory.round_to_next_power_of_two();
+impl AsRef<SystemConfig> for SystemConfig {
+    fn as_ref(&self) -> &SystemConfig {
+        self
     }
+}
 
-    /// Round all trace heights to the next power of two, except 0 stays 0.
-    pub fn round_to_next_power_of_two_or_zero(&mut self) {
-        self.memory.round_to_next_power_of_two_or_zero();
+impl AsMut<SystemConfig> for SystemConfig {
+    fn as_mut(&mut self) -> &mut SystemConfig {
+        self
     }
 }
 
-impl<F: PrimeField32> VmConfig<F> for SystemConfig {
-    type Executor = SystemExecutor<F>;
-    type Periphery = SystemPeriphery<F>;
+// Default implementation uses no init file
+impl InitFileGenerator for SystemConfig {}
 
-    fn system(&self) -> &SystemConfig {
-        self
-    }
-    fn system_mut(&mut self) -> &mut SystemConfig {
-        self
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, new)]
+pub struct AddressSpaceHostConfig {
+    /// The number of memory cells in each address space, where a memory cell refers to a single
+    /// addressable unit of memory as defined by the ISA.
+    pub num_cells: usize,
+    /// Minimum block size for memory accesses supported. This is a property of the address space
+    /// that is determined by the ISA.
+    ///
+    /// **Note**: Block size is in terms of memory cells.
+    pub min_block_size: usize,
+    pub layout: MemoryCellType,
+}
+
+impl AddressSpaceHostConfig {
+    /// The total size in bytes of the address space in a linear memory layout.
+    pub fn size(&self) -> usize {
+        self.num_cells * self.layout.size()
     }
+}
 
-    fn create_chip_complex(
-        &self,
-    ) -> Result<VmChipComplex<F, Self::Executor, Self::Periphery>, VmInventoryError> {
-        let complex = SystemComplex::new(self.clone());
-        Ok(complex)
+pub(crate) const MAX_CELL_BYTE_SIZE: usize = 8;
+
+#[derive(Debug, Serialize, Deserialize, Clone, Copy, PartialEq, Eq)]
+pub enum MemoryCellType {
+    Null,
+    U8,
+    U16,
+    /// Represented in little-endian format.
+    U32,
+    /// `size` is the size in bytes of the native field type. This should not exceed 8.
+    Native {
+        size: u8,
+    },
+}
+
+impl MemoryCellType {
+    pub fn native32() -> Self {
+        Self::Native {
+            size: size_of::<u32>() as u8,
+        }
     }
 }
 
-// Default implementation uses no init file
-impl InitFileGenerator for SystemConfig {}
+impl AddressSpaceHostLayout for MemoryCellType {
+    fn size(&self) -> usize {
+        match self {
+            Self::Null => 1, // to avoid divide by zero
+            Self::U8 => size_of::<u8>(),
+            Self::U16 => size_of::<u16>(),
+            Self::U32 => size_of::<u32>(),
+            Self::Native { size } => *size as usize,
+        }
+    }
+
+    /// # Safety
+    /// - This function must only be called when `value` is guaranteed to be of size `self.size()`.
+    /// - Alignment of `value` must be a multiple of the alignment of `F`.
+    /// - The field type `F` must be plain old data.
+    ///
+    /// # Panics
+    /// If the value is of integer type and overflows the field.
+    unsafe fn to_field<F: Field>(&self, value: &[u8]) -> F {
+        match self {
+            Self::Null => unreachable!(),
+            Self::U8 => F::from_canonical_u8(*value.get_unchecked(0)),
+            Self::U16 => F::from_canonical_u16(core::ptr::read(value.as_ptr() as *const u16)),
+            Self::U32 => F::from_canonical_u32(core::ptr::read(value.as_ptr() as *const u32)),
+            Self::Native { .. } => core::ptr::read(value.as_ptr() as *const F),
+        }
+    }
+}
diff --git a/crates/vm/src/arch/execution.rs b/crates/vm/src/arch/execution.rs
index 4edc88d355..4e3f11804c 100644
--- a/crates/vm/src/arch/execution.rs
+++ b/crates/vm/src/arch/execution.rs
@@ -1,5 +1,4 @@
-use std::{cell::RefCell, rc::Rc};
-
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
     instruction::Instruction, program::DEFAULT_PC_STEP, PhantomDiscriminant, VmOpcode,
@@ -8,32 +7,33 @@ use openvm_stark_backend::{
     interaction::{BusIndex, InteractionBuilder, PermutationCheckBus},
     p3_field::FieldAlgebra,
 };
+use rand::rngs::StdRng;
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
 
-use super::Streams;
-use crate::system::{memory::MemoryController, program::ProgramBus};
-
-pub type Result<T> = std::result::Result<T, ExecutionError>;
+use super::{execution_mode::ExecutionCtxTrait, Streams, VmExecState};
+#[cfg(feature = "metrics")]
+use crate::metrics::VmMetrics;
+use crate::{
+    arch::{execution_mode::MeteredExecutionCtxTrait, ExecutorInventoryError, MatrixRecordArena},
+    system::{
+        memory::online::{GuestMemory, TracingMemory},
+        program::ProgramBus,
+    },
+};
 
 #[derive(Error, Debug)]
 pub enum ExecutionError {
-    #[error("execution failed at pc {pc}")]
-    Fail { pc: u32 },
-    #[error("pc {pc} not found for program of length {program_len}, with pc_base {pc_base} and step = {step}")]
-    PcNotFound {
-        pc: u32,
-        step: u32,
-        pc_base: u32,
-        program_len: usize,
-    },
-    #[error("pc {pc} out of bounds for program of length {program_len}, with pc_base {pc_base} and step = {step}")]
+    #[error("execution failed at pc {pc}, err: {msg}")]
+    Fail { pc: u32, msg: &'static str },
+    #[error("pc {pc} out of bounds for program of length {program_len}, with pc_base {pc_base}")]
     PcOutOfBounds {
         pc: u32,
-        step: u32,
         pc_base: u32,
         program_len: usize,
     },
+    #[error("unreachable instruction at pc {0}")]
+    Unreachable(u32),
     #[error("at pc {pc}, opcode {opcode} was not enabled")]
     DisabledOperation { pc: u32, opcode: VmOpcode },
     #[error("at pc = {pc}")]
@@ -66,51 +66,110 @@ pub enum ExecutionError {
     DidNotTerminate,
     #[error("program exit code {0}")]
     FailedWithExitCode(u32),
+    #[error("trace buffer out of bounds: requested {requested} but capacity is {capacity}")]
+    TraceBufferOutOfBounds { requested: usize, capacity: usize },
+    #[error("inventory error: {0}")]
+    Inventory(#[from] ExecutorInventoryError),
+    #[error("static program error: {0}")]
+    Static(#[from] StaticProgramError),
+}
+
+/// Errors in the program that can be statically analyzed before runtime.
+#[derive(Error, Debug)]
+pub enum StaticProgramError {
+    #[error("invalid instruction at pc {0}")]
+    InvalidInstruction(u32),
+    #[error("Too many executors")]
+    TooManyExecutors,
+    #[error("at pc {pc}, opcode {opcode} was not enabled")]
+    DisabledOperation { pc: u32, opcode: VmOpcode },
+    #[error("Executor not found for opcode {opcode}")]
+    ExecutorNotFound { opcode: VmOpcode },
+}
+
+/// Function pointer for interpreter execution with function signature `(pre_compute, exec_state)`.
+/// The `pre_compute: &[u8]` is a pre-computed buffer of data corresponding to a single instruction.
+/// The contents of `pre_compute` are determined from the program code as specified by the
+/// [Executor] and [MeteredExecutor] traits.
+pub type ExecuteFunc<F, CTX> = unsafe fn(&[u8], &mut VmExecState<F, GuestMemory, CTX>);
+
+/// Trait for pure execution via a host interpreter. The trait methods provide the methods to
+/// pre-process the program code into function pointers which operate on `pre_compute` instruction
+/// data.
+// @dev: In the codebase this is sometimes referred to as (E1).
+pub trait Executor<F> {
+    fn pre_compute_size(&self) -> usize;
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait;
 }
 
-pub trait InstructionExecutor<F> {
+/// Trait for metered execution via a host interpreter. The trait methods provide the methods to
+/// pre-process the program code into function pointers which operate on `pre_compute` instruction
+/// data which contains auxiliary data (e.g., corresponding AIR ID) for metering purposes.
+// @dev: In the codebase this is sometimes referred to as (E2).
+pub trait MeteredExecutor<F> {
+    fn metered_pre_compute_size(&self) -> usize;
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        air_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait;
+}
+
+/// Trait for preflight execution via a host interpreter. The trait methods allow execution of
+/// instructions via enum dispatch within an interpreter. This execution is specialized to record
+/// "records" of execution which will be ingested later for trace matrix generation. The records are
+/// stored in a record arena, which is provided in the [VmStateMut] argument.
+// NOTE: In the codebase this is sometimes referred to as (E3).
+pub trait PreflightExecutor<F, RA = MatrixRecordArena<F>> {
     /// Runtime execution of the instruction, if the instruction is owned by the
     /// current instance. May internally store records of this call for later trace generation.
     fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>>;
+    ) -> Result<(), ExecutionError>;
 
     /// For display purposes. From absolute opcode as `usize`, return the string name of the opcode
     /// if it is a supported opcode by the present executor.
     fn get_opcode_name(&self, opcode: usize) -> String;
 }
 
-impl<F, C: InstructionExecutor<F>> InstructionExecutor<F> for RefCell<C> {
-    fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        prev_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>> {
-        self.borrow_mut().execute(memory, instruction, prev_state)
-    }
-
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        self.borrow().get_opcode_name(opcode)
-    }
+/// Global VM state accessible during instruction execution.
+/// The state is generic in guest memory `MEM` and additional record arena `RA`.
+/// The host state is execution context specific.
+#[derive(derive_new::new)]
+pub struct VmStateMut<'a, F, MEM, RA> {
+    pub pc: &'a mut u32,
+    pub memory: &'a mut MEM,
+    pub streams: &'a mut Streams<F>,
+    pub rng: &'a mut StdRng,
+    /// Custom public values to be set by the system PublicValuesExecutor
+    pub(crate) custom_pvs: &'a mut Vec<Option<F>>,
+    pub ctx: &'a mut RA,
+    #[cfg(feature = "metrics")]
+    pub metrics: &'a mut VmMetrics,
 }
 
-impl<F, C: InstructionExecutor<F>> InstructionExecutor<F> for Rc<RefCell<C>> {
-    fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        prev_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>> {
-        self.borrow_mut().execute(memory, instruction, prev_state)
-    }
-
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        self.borrow().get_opcode_name(opcode)
-    }
+/// Wrapper type for metered pre-computed data, which is always an AIR index together with the
+/// pre-computed data for pure execution.
+#[derive(Clone, AlignedBytesBorrow)]
+#[repr(C)]
+pub struct E2PreCompute<DATA> {
+    pub chip_idx: u32,
+    pub data: DATA,
 }
 
 #[repr(C)]
@@ -316,20 +375,22 @@ impl<T: FieldAlgebra> From<(u32, Option<T>)> for PcIncOrSet<T> {
 
 /// Phantom sub-instructions affect the runtime of the VM and the trace matrix values.
 /// However they all have no AIR constraints besides advancing the pc by
-/// [DEFAULT_PC_STEP](openvm_instructions::program::DEFAULT_PC_STEP).
+/// [DEFAULT_PC_STEP].
 ///
 /// They should not mutate memory, but they can mutate the input & hint streams.
 ///
 /// Phantom sub-instructions are only allowed to use operands
 /// `a,b` and `c_upper = c.as_canonical_u32() >> 16`.
-pub trait PhantomSubExecutor<F>: Send {
+#[allow(clippy::too_many_arguments)]
+pub trait PhantomSubExecutor<F>: Send + Sync {
     fn phantom_execute(
-        &mut self,
-        memory: &MemoryController<F>,
+        &self,
+        memory: &GuestMemory,
         streams: &mut Streams<F>,
+        rng: &mut StdRng,
         discriminant: PhantomDiscriminant,
-        a: F,
-        b: F,
+        a: u32,
+        b: u32,
         c_upper: u16,
     ) -> eyre::Result<()>;
 }
diff --git a/crates/vm/src/arch/execution_mode/metered/ctx.rs b/crates/vm/src/arch/execution_mode/metered/ctx.rs
new file mode 100644
index 0000000000..9fcadf4fd8
--- /dev/null
+++ b/crates/vm/src/arch/execution_mode/metered/ctx.rs
@@ -0,0 +1,233 @@
+use std::num::NonZero;
+
+use openvm_instructions::riscv::{RV32_IMM_AS, RV32_REGISTER_AS};
+
+use super::{
+    memory_ctx::MemoryCtx,
+    segment_ctx::{Segment, SegmentationCtx},
+};
+use crate::{
+    arch::{
+        execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait},
+        SystemConfig, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+
+pub const DEFAULT_PAGE_BITS: usize = 6;
+
+#[derive(Clone, Debug)]
+pub struct MeteredCtx<const PAGE_BITS: usize = DEFAULT_PAGE_BITS> {
+    pub trace_heights: Vec<u32>,
+    pub is_trace_height_constant: Vec<bool>,
+    pub memory_ctx: MemoryCtx<PAGE_BITS>,
+    pub segmentation_ctx: SegmentationCtx,
+}
+
+impl<const PAGE_BITS: usize> MeteredCtx<PAGE_BITS> {
+    // Note[jpw]: prefer to use `build_metered_ctx` in `VmExecutor` or `VirtualMachine`.
+    pub fn new(
+        constant_trace_heights: Vec<Option<usize>>,
+        air_names: Vec<String>,
+        widths: Vec<usize>,
+        interactions: Vec<usize>,
+        config: &SystemConfig,
+    ) -> Self {
+        let (trace_heights, is_trace_height_constant): (Vec<u32>, Vec<bool>) =
+            constant_trace_heights
+                .iter()
+                .map(|&constant_height| {
+                    if let Some(height) = constant_height {
+                        (height as u32, true)
+                    } else {
+                        (0, false)
+                    }
+                })
+                .unzip();
+
+        let memory_ctx = MemoryCtx::new(config);
+
+        // Assert that the indices are correct
+        debug_assert!(
+            air_names[memory_ctx.boundary_idx].contains("Boundary"),
+            "air_name={}",
+            air_names[memory_ctx.boundary_idx]
+        );
+        if let Some(merkle_tree_index) = memory_ctx.merkle_tree_index {
+            debug_assert!(
+                air_names[merkle_tree_index].contains("Merkle"),
+                "air_name={}",
+                air_names[merkle_tree_index]
+            );
+        }
+        debug_assert!(
+            air_names[memory_ctx.adapter_offset].contains("AccessAdapterAir<2>"),
+            "air_name={}",
+            air_names[memory_ctx.adapter_offset]
+        );
+
+        let segmentation_ctx =
+            SegmentationCtx::new(air_names, widths, interactions, config.segmentation_limits);
+
+        let mut ctx = Self {
+            trace_heights,
+            is_trace_height_constant,
+            memory_ctx,
+            segmentation_ctx,
+        };
+        if !config.continuation_enabled {
+            // force single segment
+            ctx.segmentation_ctx.segment_check_insns = u64::MAX;
+        }
+
+        // Add merkle height contributions for all registers
+        ctx.memory_ctx.add_register_merkle_heights();
+
+        ctx
+    }
+
+    pub fn with_max_trace_height(mut self, max_trace_height: u32) -> Self {
+        self.segmentation_ctx.set_max_trace_height(max_trace_height);
+        let max_check_freq = (max_trace_height / 2) as u64;
+        if max_check_freq < self.segmentation_ctx.segment_check_insns {
+            self.segmentation_ctx.segment_check_insns = max_check_freq;
+        }
+        self
+    }
+
+    pub fn with_max_cells(mut self, max_cells: usize) -> Self {
+        self.segmentation_ctx.set_max_cells(max_cells);
+        self
+    }
+
+    pub fn with_max_interactions(mut self, max_interactions: usize) -> Self {
+        self.segmentation_ctx.set_max_interactions(max_interactions);
+        self
+    }
+
+    pub fn segments(&self) -> &[Segment] {
+        &self.segmentation_ctx.segments
+    }
+
+    pub fn into_segments(self) -> Vec<Segment> {
+        self.segmentation_ctx.segments
+    }
+
+    fn reset_segment(&mut self) {
+        self.memory_ctx.clear();
+        for (i, &is_constant) in self.is_trace_height_constant.iter().enumerate() {
+            if !is_constant {
+                self.trace_heights[i] = 0;
+            }
+        }
+        // Add merkle height contributions for all registers
+        self.memory_ctx.add_register_merkle_heights();
+    }
+
+    #[inline(always)]
+    pub fn check_and_segment(&mut self, instret: u64) {
+        let threshold = self
+            .segmentation_ctx
+            .instret_last_segment_check
+            .wrapping_add(self.segmentation_ctx.segment_check_insns);
+        debug_assert!(
+            threshold >= self.segmentation_ctx.instret_last_segment_check,
+            "overflow in segment check threshold calculation"
+        );
+        if instret < threshold {
+            return;
+        }
+
+        self.memory_ctx
+            .lazy_update_boundary_heights(&mut self.trace_heights);
+        let did_segment = self.segmentation_ctx.check_and_segment(
+            instret,
+            &self.trace_heights,
+            &self.is_trace_height_constant,
+        );
+
+        if did_segment {
+            self.reset_segment();
+        }
+    }
+
+    #[allow(dead_code)]
+    pub fn print_heights(&self) {
+        println!("{:>10} {:<30}", "Height", "Air Name");
+        println!("{}", "-".repeat(42));
+        for (i, height) in self.trace_heights.iter().enumerate() {
+            let air_name = self
+                .segmentation_ctx
+                .air_names
+                .get(i)
+                .map(|s| s.as_str())
+                .unwrap_or("Unknown");
+            println!("{:>10} {:<30}", height, air_name);
+        }
+    }
+}
+
+impl<const PAGE_BITS: usize> ExecutionCtxTrait for MeteredCtx<PAGE_BITS> {
+    #[inline(always)]
+    fn on_memory_operation(&mut self, address_space: u32, ptr: u32, size: u32) {
+        debug_assert!(
+            address_space != RV32_IMM_AS,
+            "address space must not be immediate"
+        );
+        debug_assert!(size > 0, "size must be greater than 0, got {}", size);
+        debug_assert!(
+            size.is_power_of_two(),
+            "size must be a power of 2, got {}",
+            size
+        );
+
+        // Handle access adapter updates
+        // SAFETY: size passed is always a non-zero power of 2
+        let size_bits = unsafe { NonZero::new_unchecked(size).ilog2() };
+        self.memory_ctx
+            .update_adapter_heights(&mut self.trace_heights, address_space, size_bits);
+
+        // Handle merkle tree updates
+        if address_space != RV32_REGISTER_AS {
+            self.memory_ctx
+                .update_boundary_merkle_heights(address_space, ptr, size);
+        }
+    }
+
+    #[inline(always)]
+    fn should_suspend<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) -> bool {
+        // E2 always runs until termination. Here we use the function as a hook called every
+        // instruction.
+        vm_state.ctx.check_and_segment(vm_state.instret);
+        false
+    }
+
+    #[inline(always)]
+    fn on_terminate<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) {
+        vm_state
+            .ctx
+            .memory_ctx
+            .lazy_update_boundary_heights(&mut vm_state.ctx.trace_heights);
+        vm_state
+            .ctx
+            .segmentation_ctx
+            .segment(vm_state.instret, &vm_state.ctx.trace_heights);
+    }
+}
+
+impl<const PAGE_BITS: usize> MeteredExecutionCtxTrait for MeteredCtx<PAGE_BITS> {
+    #[inline(always)]
+    fn on_height_change(&mut self, chip_idx: usize, height_delta: u32) {
+        debug_assert!(
+            chip_idx < self.trace_heights.len(),
+            "chip_idx out of bounds"
+        );
+        // SAFETY: chip_idx is created in executor_idx_to_air_idx and is always within bounds
+        unsafe {
+            *self.trace_heights.get_unchecked_mut(chip_idx) = self
+                .trace_heights
+                .get_unchecked(chip_idx)
+                .wrapping_add(height_delta);
+        }
+    }
+}
diff --git a/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs
new file mode 100644
index 0000000000..40d3e012d6
--- /dev/null
+++ b/crates/vm/src/arch/execution_mode/metered/memory_ctx.rs
@@ -0,0 +1,319 @@
+use openvm_instructions::riscv::{RV32_NUM_REGISTERS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS};
+
+use crate::{arch::SystemConfig, system::memory::dimensions::MemoryDimensions};
+
+#[derive(Clone, Debug)]
+pub struct BitSet {
+    words: Box<[u64]>,
+}
+
+impl BitSet {
+    pub fn new(num_bits: usize) -> Self {
+        Self {
+            words: vec![0; num_bits.div_ceil(u64::BITS as usize)].into_boxed_slice(),
+        }
+    }
+
+    #[inline(always)]
+    pub fn insert(&mut self, index: usize) -> bool {
+        let word_index = index >> 6;
+        let bit_index = index & 63;
+        let mask = 1u64 << bit_index;
+
+        debug_assert!(word_index < self.words.len(), "BitSet index out of bounds");
+
+        // SAFETY: word_index is derived from a memory address that is bounds-checked
+        //         during memory access. The bitset is sized to accommodate all valid
+        //         memory addresses, so word_index is always within bounds.
+        let word = unsafe { self.words.get_unchecked_mut(word_index) };
+        let was_set = (*word & mask) != 0;
+        *word |= mask;
+        !was_set
+    }
+
+    /// Set all bits within [start, end) to 1, return the number of flipped bits.
+    /// Assumes start < end and end <= self.words.len() * 64.
+    #[inline(always)]
+    pub fn insert_range(&mut self, start: usize, end: usize) -> usize {
+        debug_assert!(start < end);
+        debug_assert!(end <= self.words.len() * 64, "BitSet range out of bounds");
+
+        let mut ret = 0;
+        let start_word_index = start >> 6;
+        let end_word_index = (end - 1) >> 6;
+        let start_bit = (start & 63) as u32;
+
+        if start_word_index == end_word_index {
+            let end_bit = ((end - 1) & 63) as u32 + 1;
+            let mask_bits = end_bit - start_bit;
+            let mask = (u64::MAX >> (64 - mask_bits)) << start_bit;
+            // SAFETY: Caller ensures start < end and end <= self.words.len() * 64,
+            // so start_word_index < self.words.len()
+            let word = unsafe { self.words.get_unchecked_mut(start_word_index) };
+            ret += mask_bits - (*word & mask).count_ones();
+            *word |= mask;
+        } else {
+            let end_bit = (end & 63) as u32;
+            let mask_bits = 64 - start_bit;
+            let mask = u64::MAX << start_bit;
+            // SAFETY: Caller ensures start < end and end <= self.words.len() * 64,
+            // so start_word_index < self.words.len()
+            let start_word = unsafe { self.words.get_unchecked_mut(start_word_index) };
+            ret += mask_bits - (*start_word & mask).count_ones();
+            *start_word |= mask;
+
+            let mask_bits = end_bit;
+            let mask = if end_bit == 0 {
+                0
+            } else {
+                u64::MAX >> (64 - end_bit)
+            };
+            // SAFETY: Caller ensures end <= self.words.len() * 64, so
+            // end_word_index < self.words.len()
+            let end_word = unsafe { self.words.get_unchecked_mut(end_word_index) };
+            ret += mask_bits - (*end_word & mask).count_ones();
+            *end_word |= mask;
+        }
+
+        if start_word_index + 1 < end_word_index {
+            for i in (start_word_index + 1)..end_word_index {
+                // SAFETY: Caller ensures proper start and end, so i is within bounds
+                // of self.words.len()
+                let word = unsafe { self.words.get_unchecked_mut(i) };
+                ret += word.count_zeros();
+                *word = u64::MAX;
+            }
+        }
+        ret as usize
+    }
+
+    #[inline(always)]
+    pub fn clear(&mut self) {
+        // SAFETY: words is valid for self.words.len() elements
+        unsafe {
+            std::ptr::write_bytes(self.words.as_mut_ptr(), 0, self.words.len());
+        }
+    }
+}
+
+#[derive(Clone, Debug)]
+pub struct MemoryCtx<const PAGE_BITS: usize> {
+    pub page_indices: BitSet,
+    memory_dimensions: MemoryDimensions,
+    min_block_size_bits: Vec<u8>,
+    pub boundary_idx: usize,
+    pub merkle_tree_index: Option<usize>,
+    pub adapter_offset: usize,
+    continuations_enabled: bool,
+    chunk: u32,
+    chunk_bits: u32,
+    page_access_count: usize,
+    // Note: 32 is the maximum access adapter size.
+    addr_space_access_count: Vec<usize>,
+}
+
+impl<const PAGE_BITS: usize> MemoryCtx<PAGE_BITS> {
+    pub fn new(config: &SystemConfig) -> Self {
+        let chunk = config.initial_block_size() as u32;
+        let chunk_bits = chunk.ilog2();
+
+        let memory_dimensions = config.memory_config.memory_dimensions();
+        let merkle_height = memory_dimensions.overall_height();
+
+        Self {
+            // Address height already considers `chunk_bits`.
+            page_indices: BitSet::new(1 << (merkle_height.saturating_sub(PAGE_BITS))),
+            min_block_size_bits: config.memory_config.min_block_size_bits(),
+            boundary_idx: config.memory_boundary_air_id(),
+            merkle_tree_index: config.memory_merkle_air_id(),
+            adapter_offset: config.access_adapter_air_id_offset(),
+            chunk,
+            chunk_bits,
+            memory_dimensions,
+            continuations_enabled: config.continuation_enabled,
+            page_access_count: 0,
+            addr_space_access_count: vec![0; (1 << memory_dimensions.addr_space_height) + 1],
+        }
+    }
+
+    #[inline(always)]
+    pub fn clear(&mut self) {
+        self.page_indices.clear();
+    }
+
+    #[inline(always)]
+    pub(crate) fn add_register_merkle_heights(&mut self) {
+        if self.continuations_enabled {
+            self.update_boundary_merkle_heights(
+                RV32_REGISTER_AS,
+                0,
+                (RV32_NUM_REGISTERS * RV32_REGISTER_NUM_LIMBS) as u32,
+            );
+        }
+    }
+
+    /// For each memory access, record the minimal necessary data to update heights of
+    /// memory-related chips. The actual height updates happen during segment checks. The
+    /// implementation is in `lazy_update_boundary_heights`.
+    #[inline(always)]
+    pub(crate) fn update_boundary_merkle_heights(
+        &mut self,
+        address_space: u32,
+        ptr: u32,
+        size: u32,
+    ) {
+        debug_assert!((address_space as usize) < self.addr_space_access_count.len());
+
+        let num_blocks = (size + self.chunk - 1) >> self.chunk_bits;
+        let start_chunk_id = ptr >> self.chunk_bits;
+        let start_block_id = if self.chunk == 1 {
+            start_chunk_id
+        } else {
+            self.memory_dimensions
+                .label_to_index((address_space, start_chunk_id)) as u32
+        };
+        // Because `self.chunk == 1 << self.chunk_bits`
+        let end_block_id = start_block_id + num_blocks;
+        let start_page_id = start_block_id >> PAGE_BITS;
+        let end_page_id = ((end_block_id - 1) >> PAGE_BITS) + 1;
+
+        for page_id in start_page_id..end_page_id {
+            if self.page_indices.insert(page_id as usize) {
+                self.page_access_count += 1;
+                // SAFETY: address_space passed is usually a hardcoded constant or derived from an
+                // Instruction where it is bounds checked before passing
+                unsafe {
+                    *self
+                        .addr_space_access_count
+                        .get_unchecked_mut(address_space as usize) += 1;
+                }
+            }
+        }
+    }
+
+    #[inline(always)]
+    pub fn update_adapter_heights(
+        &mut self,
+        trace_heights: &mut [u32],
+        address_space: u32,
+        size_bits: u32,
+    ) {
+        self.update_adapter_heights_batch(trace_heights, address_space, size_bits, 1);
+    }
+
+    #[inline(always)]
+    pub fn update_adapter_heights_batch(
+        &self,
+        trace_heights: &mut [u32],
+        address_space: u32,
+        size_bits: u32,
+        num: u32,
+    ) {
+        debug_assert!((address_space as usize) < self.min_block_size_bits.len());
+
+        // SAFETY: address_space passed is usually a hardcoded constant or derived from an
+        // Instruction where it is bounds checked before passing
+        let align_bits = unsafe {
+            *self
+                .min_block_size_bits
+                .get_unchecked(address_space as usize)
+        };
+        debug_assert!(
+            align_bits as u32 <= size_bits,
+            "align_bits ({}) must be <= size_bits ({})",
+            align_bits,
+            size_bits
+        );
+
+        for adapter_bits in (align_bits as u32 + 1..=size_bits).rev() {
+            let adapter_idx = self.adapter_offset + adapter_bits as usize - 1;
+            debug_assert!(adapter_idx < trace_heights.len());
+            // SAFETY: trace_heights is initialized taking access adapters into account
+            unsafe {
+                *trace_heights.get_unchecked_mut(adapter_idx) +=
+                    num << (size_bits - adapter_bits + 1);
+            }
+        }
+    }
+
+    /// Resolve all lazy updates of each memory access for memory adapters/poseidon2/merkle chip.
+    #[inline(always)]
+    pub(crate) fn lazy_update_boundary_heights(&mut self, trace_heights: &mut [u32]) {
+        debug_assert!(self.boundary_idx < trace_heights.len());
+
+        // On page fault, assume we add all leaves in a page
+        let leaves = (self.page_access_count << PAGE_BITS) as u32;
+        // SAFETY: boundary_idx is a compile time constant within bounds
+        unsafe {
+            *trace_heights.get_unchecked_mut(self.boundary_idx) += leaves;
+        }
+
+        if let Some(merkle_tree_idx) = self.merkle_tree_index {
+            debug_assert!(merkle_tree_idx < trace_heights.len());
+            debug_assert!(trace_heights.len() >= 2);
+
+            let poseidon2_idx = trace_heights.len() - 2;
+            // SAFETY: poseidon2_idx is trace_heights.len() - 2, guaranteed to be in bounds
+            unsafe {
+                *trace_heights.get_unchecked_mut(poseidon2_idx) += leaves * 2;
+            }
+
+            let merkle_height = self.memory_dimensions.overall_height();
+            let nodes = (((1 << PAGE_BITS) - 1) + (merkle_height - PAGE_BITS)) as u32;
+            // SAFETY: merkle_tree_idx is guaranteed to be in bounds
+            unsafe {
+                *trace_heights.get_unchecked_mut(poseidon2_idx) += nodes * 2;
+                *trace_heights.get_unchecked_mut(merkle_tree_idx) += nodes * 2;
+            }
+        }
+        self.page_access_count = 0;
+
+        for address_space in 0..self.addr_space_access_count.len() {
+            // SAFETY: address_space is from 0 to len(), guaranteed to be in bounds
+            let x = unsafe { *self.addr_space_access_count.get_unchecked(address_space) };
+            if x > 0 {
+                // After finalize, we'll need to read it in chunk-sized units for the merkle chip
+                self.update_adapter_heights_batch(
+                    trace_heights,
+                    address_space as u32,
+                    self.chunk_bits,
+                    (x << PAGE_BITS) as u32,
+                );
+                // SAFETY: address_space is from 0 to len(), guaranteed to be in bounds
+                unsafe {
+                    *self
+                        .addr_space_access_count
+                        .get_unchecked_mut(address_space) = 0;
+                }
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    #[test]
+    fn test_bitset_insert_range() {
+        // 513 bits
+        let mut bit_set = BitSet::new(8 * 64 + 1);
+        let num_flips = bit_set.insert_range(2, 29);
+        assert_eq!(num_flips, 27);
+        let num_flips = bit_set.insert_range(1, 31);
+        assert_eq!(num_flips, 3);
+
+        let num_flips = bit_set.insert_range(32, 65);
+        assert_eq!(num_flips, 33);
+        let num_flips = bit_set.insert_range(0, 66);
+        assert_eq!(num_flips, 3);
+        let num_flips = bit_set.insert_range(0, 66);
+        assert_eq!(num_flips, 0);
+
+        let num_flips = bit_set.insert_range(256, 320);
+        assert_eq!(num_flips, 64);
+        let num_flips = bit_set.insert_range(256, 377);
+        assert_eq!(num_flips, 57);
+        let num_flips = bit_set.insert_range(100, 513);
+        assert_eq!(num_flips, 413 - 121);
+    }
+}
diff --git a/crates/vm/src/arch/execution_mode/metered/mod.rs b/crates/vm/src/arch/execution_mode/metered/mod.rs
new file mode 100644
index 0000000000..e44bba9867
--- /dev/null
+++ b/crates/vm/src/arch/execution_mode/metered/mod.rs
@@ -0,0 +1,3 @@
+pub mod ctx;
+pub mod memory_ctx;
+pub mod segment_ctx;
diff --git a/crates/vm/src/arch/execution_mode/metered/segment_ctx.rs b/crates/vm/src/arch/execution_mode/metered/segment_ctx.rs
new file mode 100644
index 0000000000..3d43ea60ea
--- /dev/null
+++ b/crates/vm/src/arch/execution_mode/metered/segment_ctx.rs
@@ -0,0 +1,236 @@
+use getset::WithSetters;
+use openvm_stark_backend::p3_field::PrimeField32;
+use p3_baby_bear::BabyBear;
+use serde::{Deserialize, Serialize};
+
+pub const DEFAULT_SEGMENT_CHECK_INSNS: u64 = 1000;
+
+const DEFAULT_MAX_TRACE_HEIGHT: u32 = (1 << 23) - 10000;
+pub const DEFAULT_MAX_CELLS: usize = 2_000_000_000; // 2B
+const DEFAULT_MAX_INTERACTIONS: usize = BabyBear::ORDER_U32 as usize;
+
+#[derive(derive_new::new, Clone, Debug, Serialize, Deserialize)]
+pub struct Segment {
+    pub instret_start: u64,
+    pub num_insns: u64,
+    pub trace_heights: Vec<u32>,
+}
+
+#[derive(Clone, Copy, Debug, WithSetters)]
+pub struct SegmentationLimits {
+    #[getset(set_with = "pub")]
+    pub max_trace_height: u32,
+    #[getset(set_with = "pub")]
+    pub max_cells: usize,
+    #[getset(set_with = "pub")]
+    pub max_interactions: usize,
+}
+
+impl Default for SegmentationLimits {
+    fn default() -> Self {
+        Self {
+            max_trace_height: DEFAULT_MAX_TRACE_HEIGHT,
+            max_cells: DEFAULT_MAX_CELLS,
+            max_interactions: DEFAULT_MAX_INTERACTIONS,
+        }
+    }
+}
+
+#[derive(Clone, Debug, WithSetters)]
+pub struct SegmentationCtx {
+    pub segments: Vec<Segment>,
+    pub(crate) air_names: Vec<String>,
+    widths: Vec<usize>,
+    interactions: Vec<usize>,
+    pub(crate) segmentation_limits: SegmentationLimits,
+    pub instret_last_segment_check: u64,
+    #[getset(set_with = "pub")]
+    pub segment_check_insns: u64,
+}
+
+impl SegmentationCtx {
+    pub fn new(
+        air_names: Vec<String>,
+        widths: Vec<usize>,
+        interactions: Vec<usize>,
+        segmentation_limits: SegmentationLimits,
+    ) -> Self {
+        assert_eq!(air_names.len(), widths.len());
+        assert_eq!(air_names.len(), interactions.len());
+
+        Self {
+            segments: Vec::new(),
+            air_names,
+            widths,
+            interactions,
+            segmentation_limits,
+            segment_check_insns: DEFAULT_SEGMENT_CHECK_INSNS,
+            instret_last_segment_check: 0,
+        }
+    }
+
+    pub fn new_with_default_segmentation_limits(
+        air_names: Vec<String>,
+        widths: Vec<usize>,
+        interactions: Vec<usize>,
+    ) -> Self {
+        assert_eq!(air_names.len(), widths.len());
+        assert_eq!(air_names.len(), interactions.len());
+
+        Self {
+            segments: Vec::new(),
+            air_names,
+            widths,
+            interactions,
+            segmentation_limits: SegmentationLimits::default(),
+            segment_check_insns: DEFAULT_SEGMENT_CHECK_INSNS,
+            instret_last_segment_check: 0,
+        }
+    }
+
+    pub fn set_max_trace_height(&mut self, max_trace_height: u32) {
+        self.segmentation_limits.max_trace_height = max_trace_height;
+    }
+
+    pub fn set_max_cells(&mut self, max_cells: usize) {
+        self.segmentation_limits.max_cells = max_cells;
+    }
+
+    pub fn set_max_interactions(&mut self, max_interactions: usize) {
+        self.segmentation_limits.max_interactions = max_interactions;
+    }
+
+    /// Calculate the total cells used based on trace heights and widths
+    #[inline(always)]
+    fn calculate_total_cells(&self, trace_heights: &[u32]) -> usize {
+        debug_assert_eq!(trace_heights.len(), self.widths.len());
+
+        // SAFETY: Length equality is asserted during initialization
+        let widths_slice = unsafe { self.widths.get_unchecked(..trace_heights.len()) };
+
+        trace_heights
+            .iter()
+            .zip(widths_slice)
+            .map(|(&height, &width)| height as usize * width)
+            .sum()
+    }
+
+    /// Calculate the total interactions based on trace heights and interaction counts
+    #[inline(always)]
+    fn calculate_total_interactions(&self, trace_heights: &[u32]) -> usize {
+        debug_assert_eq!(trace_heights.len(), self.interactions.len());
+
+        // SAFETY: Length equality is asserted during initialization
+        let interactions_slice = unsafe { self.interactions.get_unchecked(..trace_heights.len()) };
+
+        trace_heights
+            .iter()
+            .zip(interactions_slice)
+            // We add 1 for the zero messages from the padding rows
+            .map(|(&height, &interactions)| (height + 1) as usize * interactions)
+            .sum()
+    }
+
+    #[inline(always)]
+    fn should_segment(
+        &self,
+        instret: u64,
+        trace_heights: &[u32],
+        is_trace_height_constant: &[bool],
+    ) -> bool {
+        debug_assert_eq!(trace_heights.len(), is_trace_height_constant.len());
+        debug_assert_eq!(trace_heights.len(), self.air_names.len());
+
+        let instret_start = self
+            .segments
+            .last()
+            .map_or(0, |s| s.instret_start + s.num_insns);
+        let num_insns = instret - instret_start;
+
+        // Segment should contain at least one cycle
+        if num_insns == 0 {
+            return false;
+        }
+
+        for (i, (height, is_constant)) in trace_heights
+            .iter()
+            .zip(is_trace_height_constant.iter())
+            .enumerate()
+        {
+            // Only segment if the height is not constant and exceeds the maximum height
+            if !is_constant && *height > self.segmentation_limits.max_trace_height {
+                let air_name = &self.air_names[i];
+                tracing::info!(
+                    "Segment {:2} | instret {:9} | chip {} ({}) height ({:8}) > max ({:8})",
+                    self.segments.len(),
+                    instret,
+                    i,
+                    air_name,
+                    height,
+                    self.segmentation_limits.max_trace_height
+                );
+                return true;
+            }
+        }
+
+        let total_cells = self.calculate_total_cells(trace_heights);
+        if total_cells > self.segmentation_limits.max_cells {
+            tracing::info!(
+                "Segment {:2} | instret {:9} | total cells ({:10}) > max ({:10})",
+                self.segments.len(),
+                instret,
+                total_cells,
+                self.segmentation_limits.max_cells
+            );
+            return true;
+        }
+
+        let total_interactions = self.calculate_total_interactions(trace_heights);
+        if total_interactions > self.segmentation_limits.max_interactions {
+            tracing::info!(
+                "Segment {:2} | instret {:9} | total interactions ({:11}) > max ({:11})",
+                self.segments.len(),
+                instret,
+                total_interactions,
+                self.segmentation_limits.max_interactions
+            );
+            return true;
+        }
+
+        false
+    }
+
+    #[inline(always)]
+    pub fn check_and_segment(
+        &mut self,
+        instret: u64,
+        trace_heights: &[u32],
+        is_trace_height_constant: &[bool],
+    ) -> bool {
+        let ret = self.should_segment(instret, trace_heights, is_trace_height_constant);
+        if ret {
+            self.segment(instret, trace_heights);
+        }
+        self.instret_last_segment_check = instret;
+
+        ret
+    }
+
+    /// Try segment if there is at least one cycle
+    #[inline(always)]
+    pub fn segment(&mut self, instret: u64, trace_heights: &[u32]) {
+        let instret_start = self
+            .segments
+            .last()
+            .map_or(0, |s| s.instret_start + s.num_insns);
+        let num_insns = instret - instret_start;
+
+        debug_assert!(num_insns > 0, "Segment should contain at least one cycle");
+
+        self.segments.push(Segment {
+            instret_start,
+            num_insns,
+            trace_heights: trace_heights.to_vec(),
+        });
+    }
+}
diff --git a/crates/vm/src/arch/execution_mode/metered_cost.rs b/crates/vm/src/arch/execution_mode/metered_cost.rs
new file mode 100644
index 0000000000..c3c9c183d0
--- /dev/null
+++ b/crates/vm/src/arch/execution_mode/metered_cost.rs
@@ -0,0 +1,145 @@
+use std::num::NonZero;
+
+use getset::WithSetters;
+use openvm_instructions::riscv::RV32_IMM_AS;
+
+use crate::{
+    arch::{
+        execution_mode::metered::segment_ctx::DEFAULT_MAX_CELLS as DEFAULT_SEGMENT_MAX_CELLS,
+        ExecutionCtxTrait, MeteredExecutionCtxTrait, SystemConfig, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+
+const DEFAULT_MAX_SEGMENTS: u64 = 100;
+pub const DEFAULT_MAX_COST: u64 = DEFAULT_MAX_SEGMENTS * DEFAULT_SEGMENT_MAX_CELLS as u64;
+
+#[derive(Debug, Copy, Clone, derive_new::new)]
+pub struct MeteredCostExecutionOutput {
+    pub instret: u64,
+    pub cost: u64,
+}
+
+#[derive(Clone, Debug)]
+pub struct AccessAdapterCtx {
+    min_block_size_bits: Vec<u8>,
+    idx_offset: usize,
+}
+
+impl AccessAdapterCtx {
+    pub fn new(config: &SystemConfig) -> Self {
+        Self {
+            min_block_size_bits: config.memory_config.min_block_size_bits(),
+            idx_offset: config.access_adapter_air_id_offset(),
+        }
+    }
+
+    #[inline(always)]
+    pub fn update_cells(
+        &self,
+        cost: &mut u64,
+        address_space: u32,
+        size_bits: u32,
+        widths: &[usize],
+    ) {
+        debug_assert!((address_space as usize) < self.min_block_size_bits.len());
+
+        // SAFETY: address_space passed is usually a hardcoded constant or derived from an
+        // Instruction where it is bounds checked before passing
+        let align_bits = unsafe {
+            *self
+                .min_block_size_bits
+                .get_unchecked(address_space as usize)
+        };
+        debug_assert!(
+            align_bits as u32 <= size_bits,
+            "align_bits ({}) must be <= size_bits ({})",
+            align_bits,
+            size_bits
+        );
+
+        for adapter_bits in (align_bits as u32 + 1..=size_bits).rev() {
+            let adapter_idx = self.idx_offset + adapter_bits as usize - 1;
+            debug_assert!(adapter_idx < widths.len());
+            // SAFETY: widths is initialized taking access adapters into account
+            let width = unsafe { *widths.get_unchecked(adapter_idx) };
+            let height_delta = 1 << (size_bits - adapter_bits + 1);
+            *cost += (height_delta as u64) * (width as u64);
+        }
+    }
+}
+
+#[derive(Clone, Debug, WithSetters)]
+pub struct MeteredCostCtx {
+    pub widths: Vec<usize>,
+    pub access_adapter_ctx: AccessAdapterCtx,
+    #[getset(set_with = "pub")]
+    pub max_execution_cost: u64,
+    // Cost is number of trace cells (height * width)
+    pub cost: u64,
+}
+
+impl MeteredCostCtx {
+    pub fn new(widths: Vec<usize>, config: &SystemConfig) -> Self {
+        let access_adapter_ctx = AccessAdapterCtx::new(config);
+        Self {
+            widths,
+            access_adapter_ctx,
+            max_execution_cost: DEFAULT_MAX_COST,
+            cost: 0,
+        }
+    }
+
+    #[cold]
+    fn check_cost_limit(&self) {
+        if self.cost > 2 * std::cmp::max(self.max_execution_cost, DEFAULT_MAX_COST) {
+            panic!(
+                "Execution cost {} exceeded maximum allowed cost of {}",
+                self.cost,
+                2 * DEFAULT_MAX_COST
+            );
+        }
+    }
+}
+
+impl ExecutionCtxTrait for MeteredCostCtx {
+    #[inline(always)]
+    fn on_memory_operation(&mut self, address_space: u32, _ptr: u32, size: u32) {
+        debug_assert!(
+            address_space != RV32_IMM_AS,
+            "address space must not be immediate"
+        );
+        debug_assert!(size > 0, "size must be greater than 0, got {}", size);
+        debug_assert!(
+            size.is_power_of_two(),
+            "size must be a power of 2, got {}",
+            size
+        );
+        // Prevent unbounded memory accesses per instruction
+        self.check_cost_limit();
+
+        // Handle access adapter updates
+        // SAFETY: size passed is always a non-zero power of 2
+        let size_bits = unsafe { NonZero::new_unchecked(size).ilog2() };
+        self.access_adapter_ctx.update_cells(
+            &mut self.cost,
+            address_space,
+            size_bits,
+            &self.widths,
+        );
+    }
+
+    fn should_suspend<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) -> bool {
+        vm_state.ctx.cost > vm_state.ctx.max_execution_cost
+    }
+}
+
+impl MeteredExecutionCtxTrait for MeteredCostCtx {
+    #[inline(always)]
+    fn on_height_change(&mut self, chip_idx: usize, height_delta: u32) {
+        debug_assert!(chip_idx < self.widths.len(), "chip_idx out of bounds");
+        // SAFETY: chip_idx is created in executor_idx_to_air_idx and is always within bounds
+        let width = unsafe { *self.widths.get_unchecked(chip_idx) };
+        self.cost += (height_delta as u64) * (width as u64);
+    }
+}
diff --git a/crates/vm/src/arch/execution_mode/mod.rs b/crates/vm/src/arch/execution_mode/mod.rs
new file mode 100644
index 0000000000..640e30a305
--- /dev/null
+++ b/crates/vm/src/arch/execution_mode/mod.rs
@@ -0,0 +1,21 @@
+use crate::{arch::VmExecState, system::memory::online::GuestMemory};
+
+pub mod metered;
+pub mod metered_cost;
+mod preflight;
+mod pure;
+
+pub use metered::{ctx::MeteredCtx, segment_ctx::Segment};
+pub use metered_cost::{MeteredCostCtx, MeteredCostExecutionOutput};
+pub use preflight::PreflightCtx;
+pub use pure::ExecutionCtx;
+
+pub trait ExecutionCtxTrait: Sized {
+    fn on_memory_operation(&mut self, address_space: u32, ptr: u32, size: u32);
+    fn should_suspend<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) -> bool;
+    fn on_terminate<F>(_vm_state: &mut VmExecState<F, GuestMemory, Self>) {}
+}
+
+pub trait MeteredExecutionCtxTrait: ExecutionCtxTrait {
+    fn on_height_change(&mut self, chip_idx: usize, height_delta: u32);
+}
diff --git a/crates/vm/src/arch/execution_mode/preflight.rs b/crates/vm/src/arch/execution_mode/preflight.rs
new file mode 100644
index 0000000000..7b15bf76b4
--- /dev/null
+++ b/crates/vm/src/arch/execution_mode/preflight.rs
@@ -0,0 +1,24 @@
+use crate::arch::Arena;
+
+pub struct PreflightCtx<RA> {
+    pub arenas: Vec<RA>,
+    pub instret_end: Option<u64>,
+}
+
+impl<RA: Arena> PreflightCtx<RA> {
+    /// `capacities` is list of `(height, width)` dimensions for each arena, indexed by AIR index.
+    /// The length of `capacities` must equal the number of AIRs.
+    /// Here `height` will always mean an overestimate of the trace height for that AIR, while
+    /// `width` may have different meanings depending on the `RA` type.
+    pub fn new_with_capacity(capacities: &[(usize, usize)], instret_end: Option<u64>) -> Self {
+        let arenas = capacities
+            .iter()
+            .map(|&(height, main_width)| RA::with_capacity(height, main_width))
+            .collect();
+
+        Self {
+            arenas,
+            instret_end,
+        }
+    }
+}
diff --git a/crates/vm/src/arch/execution_mode/pure.rs b/crates/vm/src/arch/execution_mode/pure.rs
new file mode 100644
index 0000000000..176a8c8a2b
--- /dev/null
+++ b/crates/vm/src/arch/execution_mode/pure.rs
@@ -0,0 +1,35 @@
+use crate::{
+    arch::{execution_mode::ExecutionCtxTrait, VmExecState},
+    system::memory::online::GuestMemory,
+};
+
+pub struct ExecutionCtx {
+    instret_end: u64,
+}
+
+impl ExecutionCtx {
+    pub fn new(instret_end: Option<u64>) -> Self {
+        ExecutionCtx {
+            instret_end: if let Some(end) = instret_end {
+                end
+            } else {
+                u64::MAX
+            },
+        }
+    }
+}
+
+impl Default for ExecutionCtx {
+    fn default() -> Self {
+        Self::new(None)
+    }
+}
+
+impl ExecutionCtxTrait for ExecutionCtx {
+    #[inline(always)]
+    fn on_memory_operation(&mut self, _address_space: u32, _ptr: u32, _size: u32) {}
+    #[inline(always)]
+    fn should_suspend<F>(vm_state: &mut VmExecState<F, GuestMemory, Self>) -> bool {
+        vm_state.instret >= vm_state.ctx.instret_end
+    }
+}
diff --git a/crates/vm/src/arch/extensions.rs b/crates/vm/src/arch/extensions.rs
index adda318f6a..4f8ab194f1 100644
--- a/crates/vm/src/arch/extensions.rs
+++ b/crates/vm/src/arch/extensions.rs
@@ -1,56 +1,47 @@
+/// A full VM extension consists of three components, represented by sub-traits:
+/// - [VmExecutionExtension]
+/// - [VmCircuitExtension]
+/// - [VmProverExtension]: there may be multiple implementations of `VmProverExtension` for the
+///   same `VmCircuitExtension` for different prover backends.
+///
+/// It is intended that `VmExecutionExtension` and `VmCircuitExtension` are implemented on the
+/// same struct and `VmProverExtension` is implemented on a separate struct (usually a ZST) to
+/// get around Rust orphan rules.
 use std::{
-    any::{Any, TypeId},
-    cell::RefCell,
-    iter::once,
-    sync::{Arc, Mutex},
+    any::{type_name, Any},
+    iter::{self, zip},
+    sync::Arc,
 };
 
-use derive_more::derive::From;
-use getset::Getters;
-use itertools::{zip_eq, Itertools};
-#[cfg(feature = "bench-metrics")]
-use metrics::counter;
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor};
-use openvm_circuit_primitives::{
-    utils::next_power_of_two_or_zero,
-    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_instructions::{
-    program::Program, LocalOpcode, PhantomDiscriminant, PublishOpcode, SystemOpcode, VmOpcode,
+use getset::{CopyGetters, Getters};
+use openvm_circuit_primitives::var_range::{
+    SharedVariableRangeCheckerChip, VariableRangeCheckerAir,
 };
+use openvm_instructions::{PhantomDiscriminant, VmOpcode};
 use openvm_stark_backend::{
-    config::{Domain, StarkGenericConfig},
-    interaction::{BusIndex, PermutationCheckBus},
-    keygen::types::LinearConstraint,
-    p3_commit::PolynomialSpace,
-    p3_field::{FieldAlgebra, PrimeField32, TwoAdicField},
-    p3_matrix::Matrix,
-    p3_util::log2_ceil_usize,
-    prover::types::{AirProofInput, CommittedTraceData, ProofInput},
-    AirRef, Chip, ChipUsageGetter,
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    interaction::BusIndex,
+    keygen::types::MultiStarkProvingKey,
+    prover::{
+        cpu::CpuBackend,
+        hal::ProverBackend,
+        types::{AirProvingContext, ProvingContext},
+    },
+    rap::AnyRap,
+    AirRef, AnyChip, Chip,
 };
-use p3_baby_bear::BabyBear;
 use rustc_hash::FxHashMap;
-use serde::{Deserialize, Serialize};
-
-use super::{
-    vm_poseidon2_config, ExecutionBus, GenerationError, InstructionExecutor, PhantomSubExecutor,
-    Streams, SystemConfig, SystemTraceHeights,
-};
-#[cfg(feature = "bench-metrics")]
-use crate::metrics::VmMetrics;
-use crate::system::{
-    connector::VmConnectorChip,
-    memory::{
-        offline_checker::{MemoryBridge, MemoryBus},
-        MemoryController, MemoryImage, OfflineMemory, BOUNDARY_AIR_OFFSET, MERKLE_AIR_OFFSET,
+use tracing::info_span;
+
+use super::{GenerationError, PhantomSubExecutor, SystemConfig};
+use crate::{
+    arch::Arena,
+    system::{
+        memory::{BOUNDARY_AIR_OFFSET, MERKLE_AIR_OFFSET},
+        phantom::PhantomExecutor,
+        SystemAirInventory, SystemChipComplex, SystemRecords,
     },
-    native_adapter::NativeAdapterChip,
-    phantom::PhantomChip,
-    poseidon2::Poseidon2PeripheryChip,
-    program::{ProgramBus, ProgramChip},
-    public_values::{core::PublicValuesCoreChip, PublicValuesChip},
 };
 
 /// Global AIR ID in the VM circuit verifying key.
@@ -67,240 +58,169 @@ pub const BOUNDARY_AIR_ID: usize = PUBLIC_VALUES_AIR_ID + 1 + BOUNDARY_AIR_OFFSE
 /// Merkle AIR commits start/final memory states.
 pub const MERKLE_AIR_ID: usize = CONNECTOR_AIR_ID + 1 + MERKLE_AIR_OFFSET;
 
-/// Configuration for a processor extension.
-///
-/// There are two associated types:
-/// - `Executor`: enum for chips that are [`InstructionExecutor`]s.
-/// -
-pub trait VmExtension<F: PrimeField32> {
-    /// Enum of chips that implement [`InstructionExecutor`] for instruction execution.
-    /// `Executor` **must** implement `Chip<SC>` but the trait bound is omitted to omit the
-    /// `StarkGenericConfig` generic parameter.
-    type Executor: InstructionExecutor<F> + AnyEnum;
-    /// Enum of periphery chips that do not implement [`InstructionExecutor`].
-    /// `Periphery` **must** implement `Chip<SC>` but the trait bound is omitted to omit the
-    /// `StarkGenericConfig` generic parameter.
-    type Periphery: AnyEnum;
-
-    fn build(
+pub type ExecutorId = u32;
+
+// ======================= VM Extension Traits =============================
+
+/// Extension of VM execution. Allows registration of custom execution of new instructions by
+/// opcode.
+pub trait VmExecutionExtension<F> {
+    /// Enum of executor variants
+    type Executor: AnyEnum;
+
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError>;
+        inventory: &mut ExecutorInventoryBuilder<F, Self::Executor>,
+    ) -> Result<(), ExecutorInventoryError>;
 }
 
-impl<F: PrimeField32, E: VmExtension<F>> VmExtension<F> for Option<E> {
-    type Executor = E::Executor;
-    type Periphery = E::Periphery;
+/// Extension of the VM circuit. Allows _in-order_ addition of new AIRs with interactions.
+pub trait VmCircuitExtension<SC: StarkGenericConfig> {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError>;
+}
 
-    fn build(
+/// Extension of VM trace generation. The generics are `E` for [StarkEngine], `RA` for record arena,
+/// and `EXT` for execution and circuit extension. The returned vector should exactly match the
+/// order of AIRs in [`VmCircuitExtension`] for this extension.
+///
+/// Note that this trait differs from [VmExecutionExtension] and [VmCircuitExtension]. This trait is
+/// meant to be implemented on a separate ZST which may be different for different [ProverBackend]s.
+/// This is done to get around Rust orphan rules.
+pub trait VmProverExtension<E, RA, EXT>
+where
+    E: StarkEngine,
+    EXT: VmExecutionExtension<Val<E::SC>> + VmCircuitExtension<E::SC>,
+{
+    /// We do not provide access to the [ExecutorInventory] because the process to find an executor
+    /// from the inventory seems more cumbersome than to simply re-construct any necessary executors
+    /// directly within this function implementation.
+    fn extend_prover(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        if let Some(extension) = self {
-            extension.build(builder)
-        } else {
-            Ok(VmInventory::new())
-        }
-    }
+        extension: &EXT,
+        inventory: &mut ChipInventory<E::SC, RA, E::PB>,
+    ) -> Result<(), ChipInventoryError>;
 }
 
-/// SystemPort combines system resources needed by most extensions
-#[derive(Clone, Copy)]
-pub struct SystemPort {
-    pub execution_bus: ExecutionBus,
-    pub program_bus: ProgramBus,
-    pub memory_bridge: MemoryBridge,
+// ======================= Different Inventory Struct Definitions =============================
+
+pub struct ExecutorInventory<E> {
+    config: SystemConfig,
+    /// Lookup table to executor ID.
+    /// This is stored in a hashmap because it is _not_ expected to be used in the hot path.
+    /// A direct opcode -> executor mapping should be generated before runtime execution.
+    pub instruction_lookup: FxHashMap<VmOpcode, ExecutorId>,
+    pub executors: Vec<E>,
+    /// `ext_start[i]` will have the starting index in `executors` for extension `i`
+    ext_start: Vec<usize>,
 }
 
-/// Builder for processing unit. Processing units extend an existing system unit.
-pub struct VmInventoryBuilder<'a, F: PrimeField32> {
-    system_config: &'a SystemConfig,
-    system: &'a SystemBase<F>,
-    streams: &'a Arc<Mutex<Streams<F>>>,
-    bus_idx_mgr: BusIndexManager,
+// @dev: We need ExecutorInventoryBuilder separate from ExecutorInventory because of how
+// ExecutorInventory::extend works: we want to build an inventory with some big E3 enum that
+// includes both enum types E1, E2. However the interface for an ExecutionExtension will only know
+// about the enum E2. In order to be able to allow access to the old executors with type E1 without
+// referring to the type E1, we need to create this separate builder struct.
+pub struct ExecutorInventoryBuilder<'a, F, E> {
     /// Chips that are already included in the chipset and may be used
     /// as dependencies. The order should be that depended-on chips are ordered
     /// **before** their dependents.
-    chips: Vec<&'a dyn AnyEnum>,
+    old_executors: Vec<&'a dyn AnyEnum>,
+    new_inventory: ExecutorInventory<E>,
+    phantom_executors: FxHashMap<PhantomDiscriminant, Arc<dyn PhantomSubExecutor<F>>>,
 }
 
-impl<'a, F: PrimeField32> VmInventoryBuilder<'a, F> {
-    pub fn new(
-        system_config: &'a SystemConfig,
-        system: &'a SystemBase<F>,
-        streams: &'a Arc<Mutex<Streams<F>>>,
-        bus_idx_mgr: BusIndexManager,
-    ) -> Self {
-        Self {
-            system_config,
-            system,
-            streams,
-            bus_idx_mgr,
-            chips: Vec::new(),
-        }
-    }
-
-    pub fn system_config(&self) -> &SystemConfig {
-        self.system_config
-    }
-
-    pub fn system_base(&self) -> &SystemBase<F> {
-        self.system
-    }
-
-    pub fn system_port(&self) -> SystemPort {
-        SystemPort {
-            execution_bus: self.system_base().execution_bus(),
-            program_bus: self.system_base().program_bus(),
-            memory_bridge: self.system_base().memory_bridge(),
-        }
-    }
-
-    pub fn new_bus_idx(&mut self) -> BusIndex {
-        self.bus_idx_mgr.new_bus_idx()
-    }
-
-    /// Looks through built chips to see if there exists any of type `C` by downcasting.
-    /// Returns all chips of type `C` in the chipset.
+#[derive(Clone, Getters, CopyGetters)]
+pub struct AirInventory<SC: StarkGenericConfig> {
+    #[get = "pub"]
+    config: SystemConfig,
+    /// The system AIRs required by the circuit architecture.
+    #[get = "pub"]
+    system: SystemAirInventory<SC>,
+    /// List of all non-system AIRs in the circuit, in insertion order, which is the **reverse** of
+    /// the order they appear in the verifying key.
     ///
-    /// Note: the type `C` will usually be a smart pointer to a chip.
-    pub fn find_chip<C: 'static>(&self) -> Vec<&C> {
-        self.chips
-            .iter()
-            .filter_map(|c| c.as_any_kind().downcast_ref())
-            .collect()
-    }
+    /// Note that the system will ensure that the first AIR in the list is always the
+    /// [VariableRangeCheckerAir].
+    #[get = "pub"]
+    ext_airs: Vec<AirRef<SC>>,
+    /// `ext_start[i]` will have the starting index in `ext_airs` for extension `i`
+    ext_start: Vec<usize>,
 
-    /// The generic `F` must match that of the `PhantomChip<F>`.
-    pub fn add_phantom_sub_executor<PE: PhantomSubExecutor<F> + 'static>(
-        &self,
-        phantom_sub: PE,
-        discriminant: PhantomDiscriminant,
-    ) -> Result<(), VmInventoryError> {
-        let chip_ref: &RefCell<PhantomChip<F>> =
-            self.find_chip().first().expect("PhantomChip always exists");
-        let mut chip = chip_ref.borrow_mut();
-        let existing = chip.add_sub_executor(phantom_sub, discriminant);
-        if existing.is_some() {
-            return Err(VmInventoryError::PhantomSubExecutorExists { discriminant });
-        }
-        Ok(())
-    }
-
-    /// Shareable streams. Clone to get a shared mutable reference.
-    pub fn streams(&self) -> &Arc<Mutex<Streams<F>>> {
-        self.streams
-    }
-
-    fn add_chip<E: AnyEnum>(&mut self, chip: &'a E) {
-        self.chips.push(chip);
-    }
-}
-
-#[derive(Clone, Debug)]
-pub struct VmInventory<E, P> {
-    /// Lookup table to executor ID. We store executors separately due to mutable borrow issues.
-    instruction_lookup: FxHashMap<VmOpcode, ExecutorId>,
-    pub executors: Vec<E>,
-    pub periphery: Vec<P>,
-    /// Order of insertion. The reverse of this will be the order the chips are destroyed
-    /// to generate trace.
-    insertion_order: Vec<ChipId>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub struct VmInventoryTraceHeights {
-    pub chips: FxHashMap<ChipId, usize>,
+    bus_idx_mgr: BusIndexManager,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, derive_new::new)]
-pub struct VmComplexTraceHeights {
-    pub system: SystemTraceHeights,
-    pub inventory: VmInventoryTraceHeights,
+#[derive(Clone, Copy, Debug, Default)]
+pub struct BusIndexManager {
+    /// All existing buses use indices in [0, bus_idx_max)
+    bus_idx_max: BusIndex,
 }
 
-type ExecutorId = usize;
-
-#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, Serialize, Deserialize)]
-pub enum ChipId {
-    Executor(usize),
-    Periphery(usize),
+// @dev: ChipInventory does not have the SystemChipComplex because that is custom depending on `PB`.
+// The full struct with SystemChipComplex is VmChipComplex
+#[derive(Getters)]
+pub struct ChipInventory<SC, RA, PB>
+where
+    SC: StarkGenericConfig,
+    PB: ProverBackend,
+{
+    /// Read-only view of AIRs, as constructed via the [VmCircuitExtension] trait.
+    #[get = "pub"]
+    airs: AirInventory<SC>,
+    /// Chips that are being built.
+    #[get = "pub"]
+    chips: Vec<Box<dyn AnyChip<RA, PB>>>,
+
+    /// Number of extensions that have chips added, including the current one that is still being
+    /// built.
+    cur_num_exts: usize,
+    /// Mapping from executor index to chip insertion index. Chips must be added in order so the
+    /// chip insertion index matches the AIR insertion index. Reminder: this is in **reverse**
+    /// order of the verifying key AIR ordering.
+    ///
+    /// Note: if public values chip exists, then it will be the first entry and point to
+    /// `usize::MAX`. This entry should never be used.
+    pub executor_idx_to_insertion_idx: Vec<usize>,
 }
 
-#[derive(thiserror::Error, Debug)]
-pub enum VmInventoryError {
-    #[error("Opcode {opcode} already owned by executor id {id}")]
-    ExecutorExists { opcode: VmOpcode, id: ExecutorId },
-    #[error("Phantom discriminant {} already has sub-executor", .discriminant.0)]
-    PhantomSubExecutorExists { discriminant: PhantomDiscriminant },
-    #[error("Chip {name} not found")]
-    ChipNotFound { name: String },
+/// The collection of all chips in the VM. The chips should correspond 1-to-1 with the associated
+/// [AirInventory]. The [VmChipComplex] coordinates the trace generation for all chips in the VM
+/// after construction.
+#[derive(Getters)]
+pub struct VmChipComplex<SC, RA, PB, SCC>
+where
+    SC: StarkGenericConfig,
+    PB: ProverBackend,
+{
+    /// System chip complex responsible for trace generation of [SystemAirInventory]
+    pub system: SCC,
+    pub inventory: ChipInventory<SC, RA, PB>,
 }
 
-impl<E, P> Default for VmInventory<E, P> {
-    fn default() -> Self {
-        Self::new()
-    }
-}
+// ======================= Inventory Function Definitions =============================
 
-impl<E, P> VmInventory<E, P> {
-    pub fn new() -> Self {
+impl<E> ExecutorInventory<E> {
+    /// Empty inventory should be created at the start of the declaration of a new extension.
+    #[allow(clippy::new_without_default)]
+    pub fn new(config: SystemConfig) -> Self {
         Self {
-            instruction_lookup: FxHashMap::default(),
-            executors: Vec::new(),
-            periphery: Vec::new(),
-            insertion_order: Vec::new(),
-        }
-    }
-
-    pub fn transmute<E2, P2>(self) -> VmInventory<E2, P2>
-    where
-        E: Into<E2>,
-        P: Into<P2>,
-    {
-        VmInventory {
-            instruction_lookup: self.instruction_lookup,
-            executors: self.executors.into_iter().map(|e| e.into()).collect(),
-            periphery: self.periphery.into_iter().map(|p| p.into()).collect(),
-            insertion_order: self.insertion_order,
-        }
-    }
-
-    /// Append `other` to current inventory. This means `self` comes earlier in the dependency
-    /// chain.
-    pub fn append(&mut self, mut other: VmInventory<E, P>) -> Result<(), VmInventoryError> {
-        let num_executors = self.executors.len();
-        let num_periphery = self.periphery.len();
-        for (opcode, mut id) in other.instruction_lookup.into_iter() {
-            id += num_executors;
-            if let Some(old_id) = self.instruction_lookup.insert(opcode, id) {
-                return Err(VmInventoryError::ExecutorExists { opcode, id: old_id });
-            }
-        }
-        for chip_id in other.insertion_order.iter_mut() {
-            match chip_id {
-                ChipId::Executor(id) => *id += num_executors,
-                ChipId::Periphery(id) => *id += num_periphery,
-            }
+            config,
+            instruction_lookup: Default::default(),
+            executors: Default::default(),
+            ext_start: vec![0],
         }
-        self.executors.append(&mut other.executors);
-        self.periphery.append(&mut other.periphery);
-        self.insertion_order.append(&mut other.insertion_order);
-        Ok(())
     }
 
     /// Inserts an executor with the collection of opcodes that it handles.
-    /// If some executor already owns one of the opcodes, it will be replaced and the old
-    /// executor ID is returned.
+    /// If some executor already owns one of the opcodes, an error is returned with the existing
+    /// executor.
     pub fn add_executor(
         &mut self,
         executor: impl Into<E>,
         opcodes: impl IntoIterator<Item = VmOpcode>,
-    ) -> Result<(), VmInventoryError> {
+    ) -> Result<(), ExecutorInventoryError> {
         let opcodes: Vec<_> = opcodes.into_iter().collect();
         for opcode in &opcodes {
             if let Some(id) = self.instruction_lookup.get(opcode) {
-                return Err(VmInventoryError::ExecutorExists {
+                return Err(ExecutorInventoryError::ExecutorExists {
                     opcode: *opcode,
                     id: *id,
                 });
@@ -308,897 +228,521 @@ impl<E, P> VmInventory<E, P> {
         }
         let id = self.executors.len();
         self.executors.push(executor.into());
-        self.insertion_order.push(ChipId::Executor(id));
         for opcode in opcodes {
-            self.instruction_lookup.insert(opcode, id);
+            self.instruction_lookup
+                .insert(opcode, id.try_into().unwrap());
         }
         Ok(())
     }
 
-    pub fn add_periphery_chip(&mut self, periphery_chip: impl Into<P>) {
-        let id = self.periphery.len();
-        self.periphery.push(periphery_chip.into());
-        self.insertion_order.push(ChipId::Periphery(id));
-    }
-
-    pub fn get_executor(&self, opcode: VmOpcode) -> Option<&E> {
-        let id = self.instruction_lookup.get(&opcode)?;
-        self.executors.get(*id)
-    }
-
-    pub fn get_mut_executor(&mut self, opcode: &VmOpcode) -> Option<&mut E> {
-        let id = self.instruction_lookup.get(opcode)?;
-        self.executors.get_mut(*id)
-    }
-
-    pub fn executors(&self) -> &[E] {
-        &self.executors
-    }
-
-    pub fn periphery(&self) -> &[P] {
-        &self.periphery
-    }
+    /// Extend the inventory with a new extension.
+    /// A new inventory with different type generics is returned with the combined inventory.
+    pub fn extend<F, E3, EXT>(
+        self,
+        other: &EXT,
+    ) -> Result<ExecutorInventory<E3>, ExecutorInventoryError>
+    where
+        F: 'static,
+        E: Into<E3> + AnyEnum,
+        E3: AnyEnum,
+        EXT: VmExecutionExtension<F>,
+        EXT::Executor: Into<E3>,
+    {
+        let mut builder: ExecutorInventoryBuilder<F, EXT::Executor> = self.builder();
+        other.extend_execution(&mut builder)?;
+        let other_inventory = builder.new_inventory;
+        let other_phantom_executors = builder.phantom_executors;
+        let mut inventory_ext = self.transmute();
+        inventory_ext.append(other_inventory.transmute())?;
+        let phantom_chip: &mut PhantomExecutor<F> = inventory_ext
+            .find_executor_mut()
+            .next()
+            .expect("system always has phantom chip");
+        let phantom_executors = &mut phantom_chip.phantom_executors;
+        for (discriminant, sub_executor) in other_phantom_executors {
+            if phantom_executors
+                .insert(discriminant, sub_executor)
+                .is_some()
+            {
+                return Err(ExecutorInventoryError::PhantomSubExecutorExists { discriminant });
+            }
+        }
 
-    pub fn num_airs(&self) -> usize {
-        self.executors.len() + self.periphery.len()
+        Ok(inventory_ext)
     }
 
-    /// Return trace heights of all chips in the inventory.
-    /// The order is deterministic:
-    /// - All executors come first, in the order they were added.
-    /// - All periphery chips come after, in the order they were added.
-    pub fn get_trace_heights(&self) -> VmInventoryTraceHeights
+    pub fn builder<F, E2>(&self) -> ExecutorInventoryBuilder<'_, F, E2>
     where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
+        F: 'static,
+        E: AnyEnum,
     {
-        VmInventoryTraceHeights {
-            chips: self
-                .executors
-                .iter()
-                .enumerate()
-                .map(|(i, chip)| (ChipId::Executor(i), chip.current_trace_height()))
-                .chain(
-                    self.periphery
-                        .iter()
-                        .enumerate()
-                        .map(|(i, chip)| (ChipId::Periphery(i), chip.current_trace_height())),
-                )
-                .collect(),
+        let old_executors = self.executors.iter().map(|e| e as &dyn AnyEnum).collect();
+        ExecutorInventoryBuilder {
+            old_executors,
+            new_inventory: ExecutorInventory::new(self.config.clone()),
+            phantom_executors: Default::default(),
         }
     }
 
-    /// Return the dummy trace heights of the inventory. This is used for generating a dummy proof.
-    /// Regular users should not need this.
-    pub fn get_dummy_trace_heights(&self) -> VmInventoryTraceHeights
+    pub fn transmute<E2>(self) -> ExecutorInventory<E2>
     where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
+        E: Into<E2>,
     {
-        VmInventoryTraceHeights {
-            chips: self
-                .executors
-                .iter()
-                .enumerate()
-                .map(|(i, _)| (ChipId::Executor(i), 1))
-                .chain(self.periphery.iter().enumerate().map(|(i, chip)| {
-                    (
-                        ChipId::Periphery(i),
-                        chip.constant_trace_height().unwrap_or(1),
-                    )
-                }))
-                .collect(),
+        ExecutorInventory {
+            config: self.config,
+            instruction_lookup: self.instruction_lookup,
+            executors: self.executors.into_iter().map(|e| e.into()).collect(),
+            ext_start: self.ext_start,
         }
     }
-}
-
-impl VmInventoryTraceHeights {
-    /// Round all trace heights to the next power of two. This will round trace heights of 0 to 1.
-    pub fn round_to_next_power_of_two(&mut self) {
-        self.chips
-            .values_mut()
-            .for_each(|v| *v = v.next_power_of_two());
-    }
-
-    /// Round all trace heights to the next power of two, except 0 stays 0.
-    pub fn round_to_next_power_of_two_or_zero(&mut self) {
-        self.chips
-            .values_mut()
-            .for_each(|v| *v = next_power_of_two_or_zero(*v));
-    }
-}
-
-impl VmComplexTraceHeights {
-    /// Round all trace heights to the next power of two. This will round trace heights of 0 to 1.
-    pub fn round_to_next_power_of_two(&mut self) {
-        self.system.round_to_next_power_of_two();
-        self.inventory.round_to_next_power_of_two();
-    }
-
-    /// Round all trace heights to the next power of two, except 0 stays 0.
-    pub fn round_to_next_power_of_two_or_zero(&mut self) {
-        self.system.round_to_next_power_of_two_or_zero();
-        self.inventory.round_to_next_power_of_two_or_zero();
-    }
-}
-
-// PublicValuesChip needs F: PrimeField32 due to Adapter
-/// The minimum collection of chips that any VM must have.
-#[derive(Getters)]
-pub struct VmChipComplex<F: PrimeField32, E, P> {
-    #[getset(get = "pub")]
-    config: SystemConfig,
-    // ATTENTION: chip destruction should follow the **reverse** of the following field order:
-    pub base: SystemBase<F>,
-    /// Extendable collection of chips for executing instructions.
-    /// System ensures it contains:
-    /// - PhantomChip
-    /// - PublicValuesChip if continuations disabled
-    /// - Poseidon2Chip if continuations enabled
-    pub inventory: VmInventory<E, P>,
-    overridden_inventory_heights: Option<VmInventoryTraceHeights>,
-
-    /// Absolute maximum value a trace height can be and still be provable.
-    max_trace_height: usize,
-
-    streams: Arc<Mutex<Streams<F>>>,
-    bus_idx_mgr: BusIndexManager,
-}
-
-#[derive(Clone, Copy, Debug, Default)]
-pub struct BusIndexManager {
-    /// All existing buses use indices in [0, bus_idx_max)
-    bus_idx_max: BusIndex,
-}
-
-impl BusIndexManager {
-    pub fn new() -> Self {
-        Self { bus_idx_max: 0 }
-    }
-
-    pub fn new_bus_idx(&mut self) -> BusIndex {
-        let idx = self.bus_idx_max;
-        self.bus_idx_max = self.bus_idx_max.checked_add(1).unwrap();
-        idx
-    }
-}
-
-/// The base [VmChipComplex] with only system chips.
-pub type SystemComplex<F> = VmChipComplex<F, SystemExecutor<F>, SystemPeriphery<F>>;
-
-/// Base system chips.
-/// The following don't execute instructions, but are essential
-/// for the VM architecture.
-pub struct SystemBase<F> {
-    // RangeCheckerChip **must** be the last chip to have trace generation called on
-    pub range_checker_chip: SharedVariableRangeCheckerChip,
-    pub memory_controller: MemoryController<F>,
-    pub connector_chip: VmConnectorChip<F>,
-    pub program_chip: ProgramChip<F>,
-}
 
-impl<F: PrimeField32> SystemBase<F> {
-    pub fn range_checker_bus(&self) -> VariableRangeCheckerBus {
-        self.range_checker_chip.bus()
-    }
-
-    pub fn memory_bus(&self) -> MemoryBus {
-        self.memory_controller.memory_bus
+    /// Append `other` to current inventory. This means `self` comes earlier in the dependency
+    /// chain.
+    fn append(&mut self, mut other: ExecutorInventory<E>) -> Result<(), ExecutorInventoryError> {
+        let num_executors = self.executors.len();
+        for (opcode, mut id) in other.instruction_lookup.into_iter() {
+            id = id.checked_add(num_executors.try_into().unwrap()).unwrap();
+            if let Some(old_id) = self.instruction_lookup.insert(opcode, id) {
+                return Err(ExecutorInventoryError::ExecutorExists { opcode, id: old_id });
+            }
+        }
+        for id in &mut other.ext_start {
+            *id = id.checked_add(num_executors).unwrap();
+        }
+        self.executors.append(&mut other.executors);
+        self.ext_start.append(&mut other.ext_start);
+        Ok(())
     }
 
-    pub fn program_bus(&self) -> ProgramBus {
-        self.program_chip.air.bus
+    pub fn get_executor(&self, opcode: VmOpcode) -> Option<&E> {
+        let id = self.instruction_lookup.get(&opcode)?;
+        self.executors.get(*id as usize)
     }
 
-    pub fn memory_bridge(&self) -> MemoryBridge {
-        self.memory_controller.memory_bridge()
+    pub fn get_mut_executor(&mut self, opcode: &VmOpcode) -> Option<&mut E> {
+        let id = self.instruction_lookup.get(opcode)?;
+        self.executors.get_mut(*id as usize)
     }
 
-    pub fn offline_memory(&self) -> Arc<Mutex<OfflineMemory<F>>> {
-        self.memory_controller.offline_memory().clone()
+    pub fn executors(&self) -> &[E] {
+        &self.executors
     }
 
-    pub fn execution_bus(&self) -> ExecutionBus {
-        self.connector_chip.air.execution_bus
+    pub fn find_executor<EX: 'static>(&self) -> impl Iterator<Item = &'_ EX>
+    where
+        E: AnyEnum,
+    {
+        self.executors
+            .iter()
+            .filter_map(|e| e.as_any_kind().downcast_ref())
     }
 
-    /// Return trace heights of SystemBase. Usually this is for aggregation and not useful for
-    /// regular users.
-    pub fn get_system_trace_heights(&self) -> SystemTraceHeights {
-        SystemTraceHeights {
-            memory: self.memory_controller.get_memory_trace_heights(),
-        }
+    pub fn find_executor_mut<EX: 'static>(&mut self) -> impl Iterator<Item = &'_ mut EX>
+    where
+        E: AnyEnum,
+    {
+        self.executors
+            .iter_mut()
+            .filter_map(|e| e.as_any_kind_mut().downcast_mut())
     }
 
-    /// Return dummy trace heights of SystemBase. Usually this is for aggregation to generate a
-    /// dummy proof and not useful for regular users.
-    pub fn get_dummy_system_trace_heights(&self) -> SystemTraceHeights {
-        SystemTraceHeights {
-            memory: self.memory_controller.get_dummy_memory_trace_heights(),
-        }
+    /// Returns the system config of the inventory.
+    pub fn config(&self) -> &SystemConfig {
+        &self.config
     }
 }
 
-#[derive(ChipUsageGetter, Chip, AnyEnum, From, InstructionExecutor)]
-pub enum SystemExecutor<F: PrimeField32> {
-    PublicValues(PublicValuesChip<F>),
-    Phantom(RefCell<PhantomChip<F>>),
-}
-
-#[derive(ChipUsageGetter, Chip, AnyEnum, From)]
-pub enum SystemPeriphery<F: PrimeField32> {
-    /// Poseidon2 chip with direct compression interactions
-    Poseidon2(Poseidon2PeripheryChip<F>),
-}
-
-impl<F: PrimeField32> SystemComplex<F> {
-    pub fn new(config: SystemConfig) -> Self {
-        let mut bus_idx_mgr = BusIndexManager::new();
-        let execution_bus = ExecutionBus::new(bus_idx_mgr.new_bus_idx());
-        let memory_bus = MemoryBus::new(bus_idx_mgr.new_bus_idx());
-        let program_bus = ProgramBus::new(bus_idx_mgr.new_bus_idx());
-        let range_bus =
-            VariableRangeCheckerBus::new(bus_idx_mgr.new_bus_idx(), config.memory_config.decomp);
-
-        let range_checker = SharedVariableRangeCheckerChip::new(range_bus);
-        let memory_controller = if config.continuation_enabled {
-            MemoryController::with_persistent_memory(
-                memory_bus,
-                config.memory_config,
-                range_checker.clone(),
-                PermutationCheckBus::new(bus_idx_mgr.new_bus_idx()),
-                PermutationCheckBus::new(bus_idx_mgr.new_bus_idx()),
-            )
-        } else {
-            MemoryController::with_volatile_memory(
-                memory_bus,
-                config.memory_config,
-                range_checker.clone(),
-            )
-        };
-        let memory_bridge = memory_controller.memory_bridge();
-        let offline_memory = memory_controller.offline_memory();
-        let program_chip = ProgramChip::new(program_bus);
-        let connector_chip = VmConnectorChip::new(
-            execution_bus,
-            program_bus,
-            range_checker.clone(),
-            config.memory_config.clk_max_bits,
-        );
-
-        let mut inventory = VmInventory::new();
-        // PublicValuesChip is required when num_public_values > 0 in single segment mode.
-        if config.has_public_values_chip() {
-            assert_eq!(inventory.executors().len(), Self::PV_EXECUTOR_IDX);
-            let chip = PublicValuesChip::new(
-                NativeAdapterChip::new(execution_bus, program_bus, memory_bridge),
-                PublicValuesCoreChip::new(
-                    config.num_public_values,
-                    config.max_constraint_degree as u32 - 1,
-                ),
-                offline_memory,
-            );
-            inventory
-                .add_executor(chip, [PublishOpcode::PUBLISH.global_opcode()])
-                .unwrap();
-        }
-        if config.continuation_enabled {
-            assert_eq!(inventory.periphery().len(), Self::POSEIDON2_PERIPHERY_IDX);
-            // Add direct poseidon2 chip for persistent memory.
-            // This is **not** an instruction executor.
-            // Currently we never use poseidon2 opcodes when continuations is enabled: we will need
-            // special handling when that happens
-            let direct_bus_idx = memory_controller
-                .interface_chip
-                .compression_bus()
-                .unwrap()
-                .index;
-            let chip = Poseidon2PeripheryChip::new(
-                vm_poseidon2_config(),
-                direct_bus_idx,
-                config.max_constraint_degree,
-            );
-            inventory.add_periphery_chip(chip);
-        }
-        let streams = Arc::new(Mutex::new(Streams::default()));
-        let phantom_opcode = SystemOpcode::PHANTOM.global_opcode();
-        let mut phantom_chip =
-            PhantomChip::new(execution_bus, program_bus, SystemOpcode::CLASS_OFFSET);
-        phantom_chip.set_streams(streams.clone());
-        inventory
-            .add_executor(RefCell::new(phantom_chip), [phantom_opcode])
-            .unwrap();
-
-        let base = SystemBase {
-            program_chip,
-            connector_chip,
-            memory_controller,
-            range_checker_chip: range_checker,
-        };
-
-        let max_trace_height = if TypeId::of::<F>() == TypeId::of::<BabyBear>() {
-            let min_log_blowup = log2_ceil_usize(config.max_constraint_degree - 1);
-            1 << (BabyBear::TWO_ADICITY - min_log_blowup)
-        } else {
-            tracing::warn!(
-                "constructing SystemComplex for unrecognized field; using max_trace_height = 2^30"
-            );
-            1 << 30
-        };
-
-        Self {
-            config,
-            base,
-            inventory,
-            bus_idx_mgr,
-            streams,
-            overridden_inventory_heights: None,
-            max_trace_height,
-        }
+impl<F, E> ExecutorInventoryBuilder<'_, F, E> {
+    pub fn add_executor(
+        &mut self,
+        executor: impl Into<E>,
+        opcodes: impl IntoIterator<Item = VmOpcode>,
+    ) -> Result<(), ExecutorInventoryError> {
+        self.new_inventory.add_executor(executor, opcodes)
     }
-}
-
-impl<F: PrimeField32, E, P> VmChipComplex<F, E, P> {
-    /// **If** public values chip exists, then its executor index is 0.
-    pub(super) const PV_EXECUTOR_IDX: ExecutorId = 0;
-    /// **If** internal poseidon2 chip exists, then its periphery index is 0.
-    pub(super) const POSEIDON2_PERIPHERY_IDX: usize = 0;
 
-    // @dev: Remember to update self.bus_idx_mgr after dropping this!
-    pub fn inventory_builder(&self) -> VmInventoryBuilder<F>
+    pub fn add_phantom_sub_executor<PE>(
+        &mut self,
+        phantom_sub: PE,
+        discriminant: PhantomDiscriminant,
+    ) -> Result<(), ExecutorInventoryError>
     where
         E: AnyEnum,
-        P: AnyEnum,
+        F: 'static,
+        PE: PhantomSubExecutor<F> + 'static,
     {
-        let mut builder =
-            VmInventoryBuilder::new(&self.config, &self.base, &self.streams, self.bus_idx_mgr);
-        // Add range checker for convenience, the other system base chips aren't included - they can
-        // be accessed directly from builder
-        builder.add_chip(&self.base.range_checker_chip);
-        for chip in self.inventory.executors() {
-            builder.add_chip(chip);
-        }
-        for chip in self.inventory.periphery() {
-            builder.add_chip(chip);
+        let existing = self
+            .phantom_executors
+            .insert(discriminant, Arc::new(phantom_sub));
+        if existing.is_some() {
+            return Err(ExecutorInventoryError::PhantomSubExecutorExists { discriminant });
         }
-
-        builder
-    }
-
-    /// Extend the chip complex with a new extension.
-    /// A new chip complex with different type generics is returned with the combined inventory.
-    pub fn extend<E3, P3, Ext>(
-        mut self,
-        config: &Ext,
-    ) -> Result<VmChipComplex<F, E3, P3>, VmInventoryError>
-    where
-        Ext: VmExtension<F>,
-        E: Into<E3> + AnyEnum,
-        P: Into<P3> + AnyEnum,
-        Ext::Executor: Into<E3>,
-        Ext::Periphery: Into<P3>,
-    {
-        let mut builder = self.inventory_builder();
-        let inventory_ext = config.build(&mut builder)?;
-        self.bus_idx_mgr = builder.bus_idx_mgr;
-        let mut ext_complex = self.transmute();
-        ext_complex.append(inventory_ext.transmute())?;
-        Ok(ext_complex)
+        Ok(())
     }
 
-    pub fn transmute<E2, P2>(self) -> VmChipComplex<F, E2, P2>
+    pub fn find_executor<EX: 'static>(&self) -> impl Iterator<Item = &'_ EX>
     where
-        E: Into<E2>,
-        P: Into<P2>,
+        E: AnyEnum,
     {
-        VmChipComplex {
-            config: self.config,
-            base: self.base,
-            inventory: self.inventory.transmute(),
-            bus_idx_mgr: self.bus_idx_mgr,
-            streams: self.streams,
-            overridden_inventory_heights: self.overridden_inventory_heights,
-            max_trace_height: self.max_trace_height,
-        }
+        self.old_executors
+            .iter()
+            .filter_map(|e| e.as_any_kind().downcast_ref())
     }
 
-    /// Appends `other` to the current inventory.
-    /// This means `self` comes earlier in the dependency chain.
-    pub fn append(&mut self, other: VmInventory<E, P>) -> Result<(), VmInventoryError> {
-        self.inventory.append(other)
+    /// Returns the maximum number of bits used to represent addresses in memory
+    pub fn pointer_max_bits(&self) -> usize {
+        self.new_inventory.config().memory_config.pointer_max_bits
     }
+}
 
-    pub fn program_chip(&self) -> &ProgramChip<F> {
-        &self.base.program_chip
+impl<SC: StarkGenericConfig> AirInventory<SC> {
+    /// Outside of this crate, [AirInventory] must be constructed via [SystemConfig].
+    pub(crate) fn new(
+        config: SystemConfig,
+        system: SystemAirInventory<SC>,
+        bus_idx_mgr: BusIndexManager,
+    ) -> Self {
+        Self {
+            config,
+            system,
+            ext_start: Vec::new(),
+            ext_airs: Vec::new(),
+            bus_idx_mgr,
+        }
     }
 
-    pub fn program_chip_mut(&mut self) -> &mut ProgramChip<F> {
-        &mut self.base.program_chip
+    /// This should be called **exactly once** at the start of the declaration of a new extension.
+    pub fn start_new_extension(&mut self) {
+        self.ext_start.push(self.ext_airs.len());
     }
 
-    pub fn connector_chip(&self) -> &VmConnectorChip<F> {
-        &self.base.connector_chip
+    pub fn new_bus_idx(&mut self) -> BusIndex {
+        self.bus_idx_mgr.new_bus_idx()
     }
 
-    pub fn connector_chip_mut(&mut self) -> &mut VmConnectorChip<F> {
-        &mut self.base.connector_chip
+    /// Looks through already-defined AIRs to see if there exists any of type `A` by downcasting.
+    /// Returns all chips of type `A` in the circuit.
+    ///
+    /// This should not be used to look for system AIRs.
+    pub fn find_air<A: 'static>(&self) -> impl Iterator<Item = &'_ A> {
+        self.ext_airs
+            .iter()
+            .filter_map(|air| air.as_any().downcast_ref())
     }
 
-    pub fn memory_controller(&self) -> &MemoryController<F> {
-        &self.base.memory_controller
+    pub fn add_air<A: AnyRap<SC> + 'static>(&mut self, air: A) {
+        self.add_air_ref(Arc::new(air));
     }
 
-    pub fn range_checker_chip(&self) -> &SharedVariableRangeCheckerChip {
-        &self.base.range_checker_chip
+    pub fn add_air_ref(&mut self, air: AirRef<SC>) {
+        self.ext_airs.push(air);
     }
 
-    pub fn public_values_chip(&self) -> Option<&PublicValuesChip<F>>
-    where
-        E: AnyEnum,
-    {
-        let chip = self.inventory.executors().get(Self::PV_EXECUTOR_IDX)?;
-        chip.as_any_kind().downcast_ref()
+    pub fn range_checker(&self) -> &VariableRangeCheckerAir {
+        self.find_air()
+            .next()
+            .expect("system always has range checker AIR")
     }
 
-    pub fn poseidon2_chip(&self) -> Option<&Poseidon2PeripheryChip<F>>
-    where
-        P: AnyEnum,
-    {
-        let chip = self
-            .inventory
-            .periphery
-            .get(Self::POSEIDON2_PERIPHERY_IDX)?;
-        chip.as_any_kind().downcast_ref()
+    /// The AIRs in the order they appear in the verifying key.
+    /// This is the system AIRs, followed by the other AIRs in the **reverse** of the order they
+    /// were added in the VM extension definitions. In particular, the AIRs that have dependencies
+    /// appear later. The system guarantees that the last AIR is the [VariableRangeCheckerAir].
+    pub fn into_airs(self) -> impl Iterator<Item = AirRef<SC>> {
+        self.system
+            .into_airs()
+            .into_iter()
+            .chain(self.ext_airs.into_iter().rev())
     }
 
-    pub fn poseidon2_chip_mut(&mut self) -> Option<&mut Poseidon2PeripheryChip<F>>
-    where
-        P: AnyEnum,
-    {
-        let chip = self
-            .inventory
-            .periphery
-            .get_mut(Self::POSEIDON2_PERIPHERY_IDX)?;
-        chip.as_any_kind_mut().downcast_mut()
+    /// This is O(1). Returns the total number of AIRs and equals the length of [`Self::into_airs`].
+    pub fn num_airs(&self) -> usize {
+        self.config.num_airs() + self.ext_airs.len()
     }
 
-    pub fn finalize_memory(&mut self)
-    where
-        P: AnyEnum,
-    {
-        if self.config.continuation_enabled {
-            let chip = self
-                .inventory
-                .periphery
-                .get_mut(Self::POSEIDON2_PERIPHERY_IDX)
-                .expect("Poseidon2 chip required for persistent memory");
-            let hasher: &mut Poseidon2PeripheryChip<F> = chip
-                .as_any_kind_mut()
-                .downcast_mut()
-                .expect("Poseidon2 chip required for persistent memory");
-            self.base.memory_controller.finalize(Some(hasher))
-        } else {
-            self.base
-                .memory_controller
-                .finalize(None::<&mut Poseidon2PeripheryChip<F>>)
-        };
+    /// Standalone function to generate proving key and verifying key for this circuit.
+    pub fn keygen<E: StarkEngine<SC = SC>>(self, engine: &E) -> MultiStarkProvingKey<SC> {
+        let mut builder = engine.keygen_builder();
+        for air in self.into_airs() {
+            builder.add_air(air);
+        }
+        builder.generate_pk()
     }
 
-    pub(crate) fn set_program(&mut self, program: Program<F>) {
-        self.base.program_chip.set_program(program);
+    /// Returns the maximum number of bits used to represent addresses in memory
+    pub fn pointer_max_bits(&self) -> usize {
+        self.config.memory_config.pointer_max_bits
     }
+}
 
-    pub(crate) fn set_initial_memory(&mut self, memory: MemoryImage<F>) {
-        self.base.memory_controller.set_initial_memory(memory);
+impl BusIndexManager {
+    pub fn new() -> Self {
+        Self { bus_idx_max: 0 }
     }
 
-    /// Warning: this sets the stream in all chips which have a shared mutable reference to the
-    /// streams.
-    pub(crate) fn set_streams(&mut self, streams: Streams<F>) {
-        *self.streams.lock().unwrap() = streams;
+    pub fn new_bus_idx(&mut self) -> BusIndex {
+        let idx = self.bus_idx_max;
+        self.bus_idx_max = self.bus_idx_max.checked_add(1).unwrap();
+        idx
     }
+}
 
-    /// This should **only** be called after segment execution has finished.
-    pub fn take_streams(&mut self) -> Streams<F> {
-        std::mem::take(&mut self.streams.lock().unwrap())
+impl<SC, RA, PB> ChipInventory<SC, RA, PB>
+where
+    SC: StarkGenericConfig,
+    PB: ProverBackend,
+{
+    pub fn new(airs: AirInventory<SC>) -> Self {
+        Self {
+            airs,
+            chips: Vec::new(),
+            cur_num_exts: 0,
+            executor_idx_to_insertion_idx: Vec::new(),
+        }
     }
 
-    // This is O(1).
-    pub fn num_airs(&self) -> usize {
-        3 + self.memory_controller().num_airs() + self.inventory.num_airs()
+    pub fn config(&self) -> &SystemConfig {
+        &self.airs.config
     }
 
-    // we always need to special case it because we need to fix the air id.
-    fn public_values_chip_idx(&self) -> Option<ExecutorId> {
-        self.config
-            .has_public_values_chip()
-            .then_some(Self::PV_EXECUTOR_IDX)
-    }
+    pub fn start_new_extension(&mut self) -> Result<(), ChipInventoryError> {
+        if self.cur_num_exts >= self.airs.ext_start.len() {
+            return Err(ChipInventoryError::MissingCircuitExtension(
+                self.airs.ext_start.len(),
+            ));
+        }
+        if self.chips.len() != self.airs.ext_start[self.cur_num_exts] {
+            return Err(ChipInventoryError::MissingChip {
+                actual: self.chips.len(),
+                expected: self.airs.ext_start[self.cur_num_exts],
+            });
+        }
 
-    // Avoids a downcast when you don't need the concrete type.
-    fn _public_values_chip(&self) -> Option<&E> {
-        self.config
-            .has_public_values_chip()
-            .then(|| &self.inventory.executors[Self::PV_EXECUTOR_IDX])
+        self.cur_num_exts += 1;
+        Ok(())
     }
 
-    // All inventory chips except public values chip, in reverse order they were added.
-    pub(crate) fn chips_excluding_pv_chip(&self) -> impl Iterator<Item = Either<&'_ E, &'_ P>> {
-        let public_values_chip_idx = self.public_values_chip_idx();
-        self.inventory
-            .insertion_order
-            .iter()
-            .rev()
-            .flat_map(move |chip_idx| match *chip_idx {
-                // Skip public values chip if it exists.
-                ChipId::Executor(id) => (Some(id) != public_values_chip_idx)
-                    .then(|| Either::Executor(&self.inventory.executors[id])),
-                ChipId::Periphery(id) => Some(Either::Periphery(&self.inventory.periphery[id])),
+    /// Gets the next AIR from the pre-existing AIR inventory according to the index of the next
+    /// chip to be built.
+    pub fn next_air<A: 'static>(&self) -> Result<&A, ChipInventoryError> {
+        let cur_idx = self.chips.len();
+        self.airs
+            .ext_airs
+            .get(cur_idx)
+            .and_then(|air| air.as_any().downcast_ref())
+            .ok_or_else(|| ChipInventoryError::AirNotFound {
+                name: type_name::<A>().to_string(),
             })
     }
 
-    /// Return air names of all chips in order.
-    pub(crate) fn air_names(&self) -> Vec<String>
-    where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
-    {
-        once(self.program_chip().air_name())
-            .chain([self.connector_chip().air_name()])
-            .chain(self._public_values_chip().map(|c| c.air_name()))
-            .chain(self.memory_controller().air_names())
-            .chain(self.chips_excluding_pv_chip().map(|c| c.air_name()))
-            .chain([self.range_checker_chip().air_name()])
-            .collect()
-    }
-    /// Return trace heights of all chips in order corresponding to `air_names`.
-    pub(crate) fn current_trace_heights(&self) -> Vec<usize>
-    where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
-    {
-        once(self.program_chip().current_trace_height())
-            .chain([self.connector_chip().current_trace_height()])
-            .chain(self._public_values_chip().map(|c| c.current_trace_height()))
-            .chain(self.memory_controller().current_trace_heights())
-            .chain(
-                self.chips_excluding_pv_chip()
-                    .map(|c| c.current_trace_height()),
-            )
-            .chain([self.range_checker_chip().current_trace_height()])
-            .collect()
-    }
-
-    /// Return trace heights of (SystemBase, Inventory). Usually this is for aggregation and not
-    /// useful for regular users.
+    /// Looks through built chips to see if there exists any of type `C` by downcasting.
+    /// Returns all chips of type `C` in the chipset.
     ///
-    /// **Warning**: the order of `get_trace_heights` is deterministic, but it is not the same as
-    /// the order of `air_names`. In other words, the order here does not match the order of AIR
-    /// IDs.
-    pub fn get_internal_trace_heights(&self) -> VmComplexTraceHeights
-    where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
-    {
-        VmComplexTraceHeights::new(
-            self.base.get_system_trace_heights(),
-            self.inventory.get_trace_heights(),
-        )
+    /// Note: the type `C` will usually be a smart pointer to a chip.
+    pub fn find_chip<C: 'static>(&self) -> impl Iterator<Item = &'_ C> {
+        self.chips.iter().filter_map(|c| c.as_any().downcast_ref())
     }
 
-    /// Return dummy trace heights of (SystemBase, Inventory). Usually this is for aggregation to
-    /// generate a dummy proof and not useful for regular users.
-    ///
-    /// **Warning**: the order of `get_dummy_trace_heights` is deterministic, but it is not the same
-    /// as the order of `air_names`. In other words, the order here does not match the order of
-    /// AIR IDs.
-    pub fn get_dummy_internal_trace_heights(&self) -> VmComplexTraceHeights
-    where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
-    {
-        VmComplexTraceHeights::new(
-            self.base.get_dummy_system_trace_heights(),
-            self.inventory.get_dummy_trace_heights(),
-        )
+    /// Adds a chip that is not associated with any executor, as defined by the
+    /// [VmExecutionExtension] trait.
+    pub fn add_periphery_chip<C: Chip<RA, PB> + 'static>(&mut self, chip: C) {
+        self.chips.push(Box::new(chip));
     }
 
-    /// Override the trace heights for chips in the inventory. Usually this is for aggregation to
-    /// generate a dummy proof and not useful for regular users.
-    pub(crate) fn set_override_inventory_trace_heights(
-        &mut self,
-        overridden_inventory_heights: VmInventoryTraceHeights,
-    ) {
-        self.overridden_inventory_heights = Some(overridden_inventory_heights);
+    /// Adds a chip and associates it to the next executor.
+    /// **Caution:** you must add chips in the order matching the order that executors were added in
+    /// the [VmExecutionExtension] implementation.
+    pub fn add_executor_chip<C: Chip<RA, PB> + 'static>(&mut self, chip: C) {
+        tracing::debug!("add_executor_chip: {}", type_name::<C>());
+        self.executor_idx_to_insertion_idx.push(self.chips.len());
+        self.chips.push(Box::new(chip));
     }
 
-    pub(crate) fn set_override_system_trace_heights(
-        &mut self,
-        overridden_system_heights: SystemTraceHeights,
-    ) {
-        let memory_controller = &mut self.base.memory_controller;
-        memory_controller.set_override_trace_heights(overridden_system_heights.memory);
+    /// Returns the mapping from executor index to the AIR index, where AIR index is the index of
+    /// the AIR within the verifying key.
+    ///
+    /// This should only be called after the `ChipInventory` is fully built.
+    pub fn executor_idx_to_air_idx(&self) -> Vec<usize> {
+        let num_airs = self.airs.num_airs();
+        assert_eq!(
+            num_airs,
+            self.config().num_airs() + self.chips.len(),
+            "Number of chips does not match number of AIRs"
+        );
+        // system AIRs are at the front of vkey, and then insertion index is the reverse ordering of
+        // AIR index
+        self.executor_idx_to_insertion_idx
+            .iter()
+            .map(|insertion_idx| {
+                num_airs
+                    .checked_sub(insertion_idx.checked_add(1).unwrap())
+                    .unwrap_or_else(|| {
+                        panic!(
+                            "Attempt to subtract num_airs={num_airs} by {}",
+                            insertion_idx + 1
+                        )
+                    })
+            })
+            .collect()
     }
 
-    /// Return dynamic trace heights of all chips in order, or 0 if
-    /// chip has constant height.
-    // Used for continuation segmentation logic, so this is performance-sensitive.
-    // Return iterator so we can break early.
-    pub(crate) fn dynamic_trace_heights(&self) -> impl Iterator<Item = usize> + '_
-    where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
-    {
-        // program_chip, connector_chip
-        [0, 0]
-            .into_iter()
-            .chain(self._public_values_chip().map(|c| c.current_trace_height()))
-            .chain(self.memory_controller().current_trace_heights())
-            .chain(self.chips_excluding_pv_chip().map(|c| match c {
-                // executor should never be constant height
-                Either::Executor(c) => c.current_trace_height(),
-                Either::Periphery(c) => {
-                    if c.constant_trace_height().is_some() {
-                        0
-                    } else {
-                        c.current_trace_height()
-                    }
-                }
-            }))
-            .chain([0]) // range_checker_chip
+    pub fn timestamp_max_bits(&self) -> usize {
+        self.airs.config().memory_config.timestamp_max_bits
     }
+}
 
-    /// Return trace cells of all chips in order.
-    /// This returns 0 cells for chips with preprocessed trace because the number of trace cells is
-    /// constant in those cases. This function is used to sample periodically and provided to
-    /// the segmentation strategy to decide whether to segment during execution.
-    pub(crate) fn current_trace_cells(&self) -> Vec<usize>
-    where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
-    {
-        // program_chip, connector_chip
-        [0, 0]
-            .into_iter()
-            .chain(self._public_values_chip().map(|c| c.current_trace_cells()))
-            .chain(self.memory_controller().current_trace_cells())
-            .chain(self.chips_excluding_pv_chip().map(|c| match c {
-                Either::Executor(c) => c.current_trace_cells(),
-                Either::Periphery(c) => {
-                    if c.constant_trace_height().is_some() {
-                        0
-                    } else {
-                        c.current_trace_cells()
-                    }
-                }
-            }))
-            .chain([0]) // range_checker_chip
-            .collect()
+// SharedVariableRangeCheckerChip is only used by the CPU backend.
+impl<SC, RA> ChipInventory<SC, RA, CpuBackend<SC>>
+where
+    SC: StarkGenericConfig,
+{
+    pub fn range_checker(&self) -> Result<&SharedVariableRangeCheckerChip, ChipInventoryError> {
+        self.find_chip::<SharedVariableRangeCheckerChip>()
+            .next()
+            .ok_or_else(|| ChipInventoryError::ChipNotFound {
+                name: "VariableRangeCheckerChip".to_string(),
+            })
     }
+}
 
-    pub fn airs<SC: StarkGenericConfig>(&self) -> Vec<AirRef<SC>>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-        E: Chip<SC>,
-        P: Chip<SC>,
-    {
-        // ATTENTION: The order of AIR MUST be consistent with `generate_proof_input`.
-        let program_rap = Arc::new(self.program_chip().air) as AirRef<SC>;
-        let connector_rap = Arc::new(self.connector_chip().air) as AirRef<SC>;
-        [program_rap, connector_rap]
-            .into_iter()
-            .chain(self._public_values_chip().map(|chip| chip.air()))
-            .chain(self.memory_controller().airs())
-            .chain(self.chips_excluding_pv_chip().map(|chip| match chip {
-                Either::Executor(chip) => chip.air(),
-                Either::Periphery(chip) => chip.air(),
-            }))
-            .chain(once(self.range_checker_chip().air()))
-            .collect()
-    }
+// ================================== Error Types =====================================
 
-    pub(crate) fn generate_proof_input<SC: StarkGenericConfig>(
-        mut self,
-        cached_program: Option<CommittedTraceData<SC>>,
-        trace_height_constraints: &[LinearConstraint],
-        #[cfg(feature = "bench-metrics")] metrics: &mut VmMetrics,
-    ) -> Result<ProofInput<SC>, GenerationError>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-        E: Chip<SC>,
-        P: AnyEnum + Chip<SC>,
-    {
-        // System: Finalize memory.
-        self.finalize_memory();
+#[derive(thiserror::Error, Debug)]
+pub enum ExecutorInventoryError {
+    #[error("Opcode {opcode} already owned by executor id {id}")]
+    ExecutorExists { opcode: VmOpcode, id: ExecutorId },
+    #[error("Phantom discriminant {} already has sub-executor", .discriminant.0)]
+    PhantomSubExecutorExists { discriminant: PhantomDiscriminant },
+}
 
-        let trace_heights = self
-            .current_trace_heights()
-            .iter()
-            .map(|h| next_power_of_two_or_zero(*h))
-            .collect_vec();
-        if let Some(index) = trace_heights
-            .iter()
-            .position(|h| *h > self.max_trace_height)
-        {
-            tracing::info!(
-                "trace height of air {index} has height {} greater than maximum {}",
-                trace_heights[index],
-                self.max_trace_height
-            );
-            return Err(GenerationError::TraceHeightsLimitExceeded);
-        }
-        if trace_height_constraints.is_empty() {
-            tracing::warn!("generating proof input without trace height constraints");
-        }
-        for (i, constraint) in trace_height_constraints.iter().enumerate() {
-            let value = zip_eq(&constraint.coefficients, &trace_heights)
-                .map(|(&c, &h)| c as u64 * h as u64)
-                .sum::<u64>();
-
-            if value >= constraint.threshold as u64 {
-                tracing::info!(
-                    "trace heights {:?} violate linear constraint {} ({} >= {})",
-                    trace_heights,
-                    i,
-                    value,
-                    constraint.threshold
-                );
-                return Err(GenerationError::TraceHeightsLimitExceeded);
-            }
-        }
+#[derive(thiserror::Error, Debug)]
+pub enum AirInventoryError {
+    #[error("AIR {name} not found")]
+    AirNotFound { name: String },
+}
 
-        #[cfg(feature = "bench-metrics")]
-        self.finalize_metrics(metrics);
-
-        let has_pv_chip = self.public_values_chip_idx().is_some();
-        // ATTENTION: The order of AIR proof input generation MUST be consistent with `airs`.
-        let mut builder = VmProofInputBuilder::new();
-        let SystemBase {
-            range_checker_chip,
-            memory_controller,
-            connector_chip,
-            program_chip,
-            ..
-        } = self.base;
-
-        // System: Program Chip
-        debug_assert_eq!(builder.curr_air_id, PROGRAM_AIR_ID);
-        builder.add_air_proof_input(program_chip.generate_air_proof_input(cached_program));
-        // System: Connector Chip
-        debug_assert_eq!(builder.curr_air_id, CONNECTOR_AIR_ID);
-        builder.add_air_proof_input(connector_chip.generate_air_proof_input());
-
-        // Go through all chips in inventory in reverse order they were added (to resolve
-        // dependencies) Important Note: for air_id ordering reasons, we want to
-        // generate_air_proof_input for public values and memory chips **last** but include
-        // them into the `builder` **first**.
-        let mut public_values_input = None;
-        let mut insertion_order = self.inventory.insertion_order;
-        insertion_order.reverse();
-        let mut non_sys_inputs = Vec::with_capacity(insertion_order.len());
-        for chip_id in insertion_order {
-            let mut height = None;
-            if let Some(overridden_heights) = self.overridden_inventory_heights.as_ref() {
-                height = overridden_heights.chips.get(&chip_id).copied();
-            }
-            let air_proof_input = match chip_id {
-                ChipId::Executor(id) => {
-                    let chip = self.inventory.executors.pop().unwrap();
-                    assert_eq!(id, self.inventory.executors.len());
-                    generate_air_proof_input(chip, height)
-                }
-                ChipId::Periphery(id) => {
-                    let chip = self.inventory.periphery.pop().unwrap();
-                    assert_eq!(id, self.inventory.periphery.len());
-                    generate_air_proof_input(chip, height)
-                }
-            };
-            if has_pv_chip && chip_id == ChipId::Executor(Self::PV_EXECUTOR_IDX) {
-                public_values_input = Some(air_proof_input);
-            } else {
-                non_sys_inputs.push(air_proof_input);
-            }
-        }
+#[derive(thiserror::Error, Debug)]
+pub enum ChipInventoryError {
+    #[error("Air {name} not found")]
+    AirNotFound { name: String },
+    #[error("Chip {name} not found")]
+    ChipNotFound { name: String },
+    #[error("Adding prover extension without execution extension. Number of execution extensions is {0}")]
+    MissingExecutionExtension(usize),
+    #[error(
+        "Adding prover extension without circuit extension. Number of circuit extensions is {0}"
+    )]
+    MissingCircuitExtension(usize),
+    #[error("Missing chip. Number of chips is {actual}, expected number is {expected}")]
+    MissingChip { actual: usize, expected: usize },
+    #[error("Missing executor chip. Number of executors with associated chips is {actual}, expected number is {expected}")]
+    MissingExecutor { actual: usize, expected: usize },
+}
 
-        if let Some(input) = public_values_input {
-            debug_assert_eq!(builder.curr_air_id, PUBLIC_VALUES_AIR_ID);
-            builder.add_air_proof_input(input);
-        }
-        // System: Memory Controller
-        {
-            // memory
-            let air_proof_inputs = memory_controller.generate_air_proof_inputs();
-            for air_proof_input in air_proof_inputs {
-                builder.add_air_proof_input(air_proof_input);
-            }
-        }
-        // Non-system chips
-        non_sys_inputs
-            .into_iter()
-            .for_each(|input| builder.add_air_proof_input(input));
-        // System: Range Checker Chip
-        builder.add_air_proof_input(range_checker_chip.generate_air_proof_input());
+// ======================= VM Chip Complex Implementation =============================
 
-        Ok(builder.build())
+impl<SC, RA, PB, SCC> VmChipComplex<SC, RA, PB, SCC>
+where
+    SC: StarkGenericConfig,
+    RA: Arena,
+    PB: ProverBackend,
+    SCC: SystemChipComplex<RA, PB>,
+{
+    pub fn system_config(&self) -> &SystemConfig {
+        self.inventory.config()
     }
 
-    #[cfg(feature = "bench-metrics")]
-    fn finalize_metrics(&self, metrics: &mut VmMetrics)
-    where
-        E: ChipUsageGetter,
-        P: ChipUsageGetter,
-    {
-        tracing::info!(metrics.cycle_count);
-        counter!("total_cycles").absolute(metrics.cycle_count as u64);
-        counter!("main_cells_used")
-            .absolute(self.current_trace_cells().into_iter().sum::<usize>() as u64);
-
-        if self.config.profiling {
-            metrics.chip_heights =
-                itertools::izip!(self.air_names(), self.current_trace_heights()).collect();
-            metrics.emit();
-        }
+    /// `record_arenas` is expected to have length equal to the number of AIRs in the verifying key
+    /// and in the same order as the AIRs appearing in the verifying key, even though some chips may
+    /// not require a record arena.
+    pub(crate) fn generate_proving_ctx(
+        &mut self,
+        system_records: SystemRecords<PB::Val>,
+        record_arenas: Vec<RA>,
+        // trace_height_constraints: &[LinearConstraint],
+    ) -> Result<ProvingContext<PB>, GenerationError> {
+        // ATTENTION: The order of AIR proving context generation MUST be consistent with
+        // `AirInventory::into_airs`.
+
+        // Execution has finished at this point.
+        // ASSUMPTION WHICH MUST HOLD: non-system chips do not have a dependency on the system chips
+        // during trace generation. Given this assumption, we can generate trace on the system chips
+        // first.
+        let num_sys_airs = self.system_config().num_airs();
+        let num_airs = num_sys_airs + self.inventory.chips.len();
+        if num_airs != record_arenas.len() {
+            return Err(GenerationError::UnexpectedNumArenas {
+                actual: record_arenas.len(),
+                expected: num_airs,
+            });
+        }
+        let mut _record_arenas = record_arenas;
+        let record_arenas = _record_arenas.split_off(num_sys_airs);
+        let sys_record_arenas = _record_arenas;
+
+        // First go through all system chips
+        // Then go through all other chips in inventory in **reverse** order they were added (to
+        // resolve dependencies)
+        //
+        // Perf[jpw]: currently we call tracegen on each chip **serially** (although tracegen per
+        // chip is parallelized). We could introduce more parallelism, while potentially increasing
+        // the peak memory usage, by keeping a dependency tree and generating traces at the same
+        // layer of the tree in parallel.
+        let ctx_without_empties: Vec<(usize, AirProvingContext<_>)> = iter::empty()
+            .chain(info_span!("system_trace_gen").in_scope(|| {
+                self.system
+                    .generate_proving_ctx(system_records, sys_record_arenas)
+            }))
+            .chain(
+                zip(self.inventory.chips.iter().enumerate().rev(), record_arenas).map(
+                    |((insertion_idx, chip), records)| {
+                        // Only create a span if record is not empty:
+                        let _span = (!records.is_empty()).then(|| {
+                            let air_name = self.inventory.airs.ext_airs[insertion_idx].name();
+                            info_span!("single_trace_gen", air = air_name).entered()
+                        });
+                        chip.generate_proving_ctx(records)
+                    },
+                ),
+            )
+            .enumerate()
+            .filter(|(_air_id, ctx)| {
+                (!ctx.cached_mains.is_empty() || ctx.common_main.is_some())
+                    && ctx.main_trace_height() > 0
+            })
+            .collect();
+
+        Ok(ProvingContext {
+            per_air: ctx_without_empties,
+        })
     }
 }
 
-struct VmProofInputBuilder<SC: StarkGenericConfig> {
-    curr_air_id: usize,
-    proof_input_per_air: Vec<(usize, AirProofInput<SC>)>,
-}
+// ============ Blanket implementation of VM extension traits for Option<E> ===========
 
-impl<SC: StarkGenericConfig> VmProofInputBuilder<SC> {
-    fn new() -> Self {
-        Self {
-            curr_air_id: 0,
-            proof_input_per_air: vec![],
-        }
-    }
-    /// Adds air proof input if one of the main trace matrices is non-empty.
-    /// Always increments the internal `curr_air_id` regardless of whether a new air proof input was
-    /// added or not.
-    fn add_air_proof_input(&mut self, air_proof_input: AirProofInput<SC>) {
-        let h = if !air_proof_input.raw.cached_mains.is_empty() {
-            air_proof_input.raw.cached_mains[0].height()
-        } else {
-            air_proof_input
-                .raw
-                .common_main
-                .as_ref()
-                .map(|trace| trace.height())
-                .unwrap()
-        };
-        if h > 0 {
-            self.proof_input_per_air
-                .push((self.curr_air_id, air_proof_input));
-        }
-        self.curr_air_id += 1;
-    }
+impl<F, EXT: VmExecutionExtension<F>> VmExecutionExtension<F> for Option<EXT> {
+    type Executor = EXT::Executor;
 
-    fn build(self) -> ProofInput<SC> {
-        ProofInput {
-            per_air: self.proof_input_per_air,
+    fn extend_execution(
+        &self,
+        inventory: &mut ExecutorInventoryBuilder<F, Self::Executor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        if let Some(extension) = self {
+            extension.extend_execution(inventory)
+        } else {
+            Ok(())
         }
     }
 }
 
-/// Generates an AIR proof input of the chip with the given height, if any.
-///
-/// Assumption: an all-0 row is a valid dummy row for `chip`.
-pub fn generate_air_proof_input<SC: StarkGenericConfig, C: Chip<SC>>(
-    chip: C,
-    height: Option<usize>,
-) -> AirProofInput<SC> {
-    let mut proof_input = chip.generate_air_proof_input();
-    if let Some(height) = height {
-        let height = height.next_power_of_two();
-        let main = proof_input.raw.common_main.as_mut().unwrap();
-        assert!(
-            height >= main.height(),
-            "Overridden height must be greater than or equal to the used height"
-        );
-        main.pad_to_height(height, FieldAlgebra::ZERO);
+impl<SC: StarkGenericConfig, EXT: VmCircuitExtension<SC>> VmCircuitExtension<SC> for Option<EXT> {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
+        if let Some(extension) = self {
+            extension.extend_circuit(inventory)
+        } else {
+            Ok(())
+        }
     }
-    proof_input
 }
 
 /// A helper trait for downcasting types that may be enums.
@@ -1219,57 +763,13 @@ impl AnyEnum for () {
     }
 }
 
-impl AnyEnum for SharedVariableRangeCheckerChip {
-    fn as_any_kind(&self) -> &dyn Any {
-        self
-    }
-    fn as_any_kind_mut(&mut self) -> &mut dyn Any {
-        self
-    }
-}
-
-pub(crate) enum Either<E, P> {
-    Executor(E),
-    Periphery(P),
-}
-
-impl<'a, E, P> ChipUsageGetter for Either<&'a E, &'a P>
-where
-    E: ChipUsageGetter,
-    P: ChipUsageGetter,
-{
-    fn air_name(&self) -> String {
-        match self {
-            Either::Executor(chip) => chip.air_name(),
-            Either::Periphery(chip) => chip.air_name(),
-        }
-    }
-    fn current_trace_height(&self) -> usize {
-        match self {
-            Either::Executor(chip) => chip.current_trace_height(),
-            Either::Periphery(chip) => chip.current_trace_height(),
-        }
-    }
-    fn trace_width(&self) -> usize {
-        match self {
-            Either::Executor(chip) => chip.trace_width(),
-            Either::Periphery(chip) => chip.trace_width(),
-        }
-    }
-    fn current_trace_cells(&self) -> usize {
-        match self {
-            Either::Executor(chip) => chip.current_trace_cells(),
-            Either::Periphery(chip) => chip.current_trace_cells(),
-        }
-    }
-}
-
 #[cfg(test)]
 mod tests {
-    use p3_baby_bear::BabyBear;
+    use openvm_circuit_derive::AnyEnum;
+    use openvm_stark_sdk::config::baby_bear_poseidon2::BabyBearPoseidon2Config;
 
     use super::*;
-    use crate::system::memory::interface::MemoryInterface;
+    use crate::{arch::VmCircuitConfig, system::memory::interface::MemoryInterfaceAirs};
 
     #[allow(dead_code)]
     #[derive(Copy, Clone)]
@@ -1342,16 +842,18 @@ mod tests {
 
     #[test]
     fn test_system_bus_indices() {
-        let config = SystemConfig::default().with_continuations();
-        let complex = SystemComplex::<BabyBear>::new(config);
-        assert_eq!(complex.base.execution_bus().index(), 0);
-        assert_eq!(complex.base.memory_bus().index(), 1);
-        assert_eq!(complex.base.program_bus().index(), 2);
-        assert_eq!(complex.base.range_checker_bus().index(), 3);
-        match &complex.memory_controller().interface_chip {
-            MemoryInterface::Persistent { boundary_chip, .. } => {
-                assert_eq!(boundary_chip.air.merkle_bus.index, 4);
-                assert_eq!(boundary_chip.air.compression_bus.index, 5);
+        let config = SystemConfig::default();
+        let inventory: AirInventory<BabyBearPoseidon2Config> = config.create_airs().unwrap();
+        let system = inventory.system();
+        let port = system.port();
+        assert_eq!(port.execution_bus.index(), 0);
+        assert_eq!(port.memory_bridge.memory_bus().index(), 1);
+        assert_eq!(port.program_bus.index(), 2);
+        assert_eq!(port.memory_bridge.range_bus().index(), 3);
+        match &system.memory.interface {
+            MemoryInterfaceAirs::Persistent { boundary, .. } => {
+                assert_eq!(boundary.merkle_bus.index, 4);
+                assert_eq!(boundary.compression_bus.index, 5);
             }
             _ => unreachable!(),
         };
diff --git a/crates/vm/src/arch/hasher/mod.rs b/crates/vm/src/arch/hasher/mod.rs
index df90a55e4b..e858da25f9 100644
--- a/crates/vm/src/arch/hasher/mod.rs
+++ b/crates/vm/src/arch/hasher/mod.rs
@@ -24,10 +24,10 @@ pub trait Hasher<const CHUNK: usize, F: Field> {
         leaves[0]
     }
 }
-pub trait HasherChip<const CHUNK: usize, F: Field>: Hasher<CHUNK, F> {
+pub trait HasherChip<const CHUNK: usize, F: Field>: Hasher<CHUNK, F> + Send + Sync {
     /// Stateful version of `hash` for recording the event in the chip.
-    fn compress_and_record(&mut self, left: &[F; CHUNK], right: &[F; CHUNK]) -> [F; CHUNK];
-    fn hash_and_record(&mut self, values: &[F; CHUNK]) -> [F; CHUNK] {
+    fn compress_and_record(&self, left: &[F; CHUNK], right: &[F; CHUNK]) -> [F; CHUNK];
+    fn hash_and_record(&self, values: &[F; CHUNK]) -> [F; CHUNK] {
         self.compress_and_record(values, &[F::ZERO; CHUNK])
     }
 }
diff --git a/crates/vm/src/arch/integration_api.rs b/crates/vm/src/arch/integration_api.rs
index b1116d8c48..1105cb40a8 100644
--- a/crates/vm/src/arch/integration_api.rs
+++ b/crates/vm/src/arch/integration_api.rs
@@ -1,28 +1,23 @@
-use std::{
-    array::from_fn,
-    borrow::Borrow,
-    marker::PhantomData,
-    sync::{Arc, Mutex},
-};
+use std::{array::from_fn, borrow::Borrow, marker::PhantomData, sync::Arc};
 
-use openvm_circuit_primitives::utils::next_power_of_two_or_zero;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
 use openvm_stark_backend::{
-    air_builders::{debug::DebugConstraintBuilder, symbolic::SymbolicRapBuilder},
     config::{StarkGenericConfig, Val},
     p3_air::{Air, AirBuilder, BaseAir},
-    p3_field::{FieldAlgebra, PrimeField32},
+    p3_field::FieldAlgebra,
     p3_matrix::{dense::RowMajorMatrix, Matrix},
     p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
-    rap::{get_air_name, BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
+    rap::{BaseAirWithPublicValues, PartitionedBaseAir},
+    Chip,
 };
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
+use serde::{Deserialize, Serialize};
 
-use super::{ExecutionState, InstructionExecutor, Result};
-use crate::system::memory::{MemoryController, OfflineMemory};
+use crate::{
+    arch::RowMajorMatrixArena,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory, SharedMemoryHelper},
+};
 
 /// The interface between primitive AIR and machine adapter AIR.
 pub trait VmAdapterInterface<T> {
@@ -37,60 +32,6 @@ pub trait VmAdapterInterface<T> {
     type ProcessedInstruction;
 }
 
-/// The adapter owns all memory accesses and timestamp changes.
-/// The adapter AIR should also own `ExecutionBridge` and `MemoryBridge`.
-pub trait VmAdapterChip<F> {
-    /// Records generated by adapter before main instruction execution
-    type ReadRecord: Send + Serialize + DeserializeOwned;
-    /// Records generated by adapter after main instruction execution
-    type WriteRecord: Send + Serialize + DeserializeOwned;
-    /// AdapterAir should not have public values
-    type Air: BaseAir<F> + Clone;
-
-    type Interface: VmAdapterInterface<F>;
-
-    /// Given instruction, perform memory reads and return only the read data that the integrator
-    /// needs to use. This is called at the start of instruction execution.
-    ///
-    /// The implementer may choose to store data in the `Self::ReadRecord` struct, for example in
-    /// an [Option], which will later be sent to the `postprocess` method.
-    #[allow(clippy::type_complexity)]
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )>;
-
-    /// Given instruction and the data to write, perform memory writes and return the `(record,
-    /// next_timestamp)` of the full adapter record for this instruction. This is guaranteed to
-    /// be called after `preprocess`.
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)>;
-
-    /// Populates `row_slice` with values corresponding to `record`.
-    /// The provided `row_slice` will have length equal to `self.air().width()`.
-    /// This function will be called for each row in the trace which is being used, and all other
-    /// rows in the trace will be filled with zeroes.
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
-    );
-
-    fn air(&self) -> &Self::Air;
-}
-
 pub trait VmAdapterAir<AB: AirBuilder>: BaseAir<AB::F> {
     type Interface: VmAdapterInterface<AB::Expr>;
 
@@ -111,47 +52,6 @@ pub trait VmAdapterAir<AB: AirBuilder>: BaseAir<AB::F> {
     fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var;
 }
 
-/// Trait to be implemented on primitive chip to integrate with the machine.
-pub trait VmCoreChip<F, I: VmAdapterInterface<F>> {
-    /// Minimum data that must be recorded to be able to generate trace for one row of
-    /// `PrimitiveAir`.
-    type Record: Send + Serialize + DeserializeOwned;
-    /// The primitive AIR with main constraints that do not depend on memory and other
-    /// architecture-specifics.
-    type Air: BaseAirWithPublicValues<F> + Clone;
-
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
-        &self,
-        instruction: &Instruction<F>,
-        from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)>;
-
-    fn get_opcode_name(&self, opcode: usize) -> String;
-
-    /// Populates `row_slice` with values corresponding to `record`.
-    /// The provided `row_slice` will have length equal to `self.air().width()`.
-    /// This function will be called for each row in the trace which is being used, and all other
-    /// rows in the trace will be filled with zeroes.
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record);
-
-    /// Returns a list of public values to publish.
-    fn generate_public_values(&self) -> Vec<F> {
-        vec![]
-    }
-
-    fn air(&self) -> &Self::Air;
-
-    /// Finalize the trace, especially the padded rows if the all-zero rows don't satisfy the
-    /// constraints. This is done **after** records are consumed and the trace matrix is
-    /// generated. Most implementations should just leave the default implementation if padding
-    /// with rows of all 0s satisfies the constraints.
-    fn finalize(&self, _trace: &mut RowMajorMatrix<F>, _num_records: usize) {
-        // do nothing by default
-    }
-}
-
 pub trait VmCoreAir<AB, I>: BaseAirWithPublicValues<AB::F>
 where
     AB: AirBuilder,
@@ -183,22 +83,6 @@ where
     }
 }
 
-pub struct AdapterRuntimeContext<T, I: VmAdapterInterface<T>> {
-    /// Leave as `None` to allow the adapter to decide the `to_pc` automatically.
-    pub to_pc: Option<u32>,
-    pub writes: I::Writes,
-}
-
-impl<T, I: VmAdapterInterface<T>> AdapterRuntimeContext<T, I> {
-    /// Leave `to_pc` as `None` to allow the adapter to decide the `to_pc` automatically.
-    pub fn without_pc(writes: impl Into<I::Writes>) -> Self {
-        Self {
-            to_pc: None,
-            writes: writes.into(),
-        }
-    }
-}
-
 pub struct AdapterAirContext<T, I: VmAdapterInterface<T>> {
     /// Leave as `None` to allow the adapter to decide the `to_pc` automatically.
     pub to_pc: Option<T>,
@@ -207,140 +91,125 @@ pub struct AdapterAirContext<T, I: VmAdapterInterface<T>> {
     pub instruction: I::ProcessedInstruction,
 }
 
-pub struct VmChipWrapper<F, A: VmAdapterChip<F>, C: VmCoreChip<F, A::Interface>> {
-    pub adapter: A,
-    pub core: C,
-    pub records: Vec<(A::ReadRecord, A::WriteRecord, C::Record)>,
-    offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-}
-
-const DEFAULT_RECORDS_CAPACITY: usize = 1 << 20;
+/// Helper trait for CPU tracegen.
+pub trait TraceFiller<F>: Send + Sync {
+    /// Populates `trace`. This function will always be called after
+    /// [`TraceExecutor::execute`], so the `trace` should already contain the records necessary to
+    /// fill in the rest of it.
+    fn fill_trace(
+        &self,
+        mem_helper: &MemoryAuxColsFactory<F>,
+        trace: &mut RowMajorMatrix<F>,
+        rows_used: usize,
+    ) where
+        F: Send + Sync + Clone,
+    {
+        let width = trace.width();
+        trace.values[..rows_used * width]
+            .par_chunks_exact_mut(width)
+            .for_each(|row_slice| {
+                self.fill_trace_row(mem_helper, row_slice);
+            });
+        trace.values[rows_used * width..]
+            .par_chunks_exact_mut(width)
+            .for_each(|row_slice| {
+                self.fill_dummy_trace_row(row_slice);
+            });
+    }
 
-impl<F, A, C> VmChipWrapper<F, A, C>
-where
-    A: VmAdapterChip<F>,
-    C: VmCoreChip<F, A::Interface>,
-{
-    pub fn new(adapter: A, core: C, offline_memory: Arc<Mutex<OfflineMemory<F>>>) -> Self {
-        Self {
-            adapter,
-            core,
-            records: Vec::with_capacity(DEFAULT_RECORDS_CAPACITY),
-            offline_memory,
-        }
+    /// Populates `row_slice`. This function will always be called after
+    /// [`TraceExecutor::execute`], so the `row_slice` should already contain context necessary to
+    /// fill in the rest of the row. This function will be called for each row in the trace which
+    /// is being used, and for all other rows in the trace see `fill_dummy_trace_row`.
+    ///
+    /// The provided `row_slice` will have length equal to the width of the AIR.
+    fn fill_trace_row(&self, _mem_helper: &MemoryAuxColsFactory<F>, _row_slice: &mut [F]) {
+        unreachable!("fill_trace_row is not implemented")
     }
-}
 
-impl<F, A, M> InstructionExecutor<F> for VmChipWrapper<F, A, M>
-where
-    F: PrimeField32,
-    A: VmAdapterChip<F> + Send + Sync,
-    M: VmCoreChip<F, A::Interface> + Send + Sync,
-{
-    fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>> {
-        let (reads, read_record) = self.adapter.preprocess(memory, instruction)?;
-        let (output, core_record) =
-            self.core
-                .execute_instruction(instruction, from_state.pc, reads)?;
-        let (to_state, write_record) =
-            self.adapter
-                .postprocess(memory, instruction, from_state, output, &read_record)?;
-        self.records.push((read_record, write_record, core_record));
-        Ok(to_state)
+    /// Populates `row_slice`. This function will be called on dummy rows.
+    /// By default the trace is padded with empty (all 0) rows to make the height a power of 2.
+    ///
+    /// The provided `row_slice` will have length equal to the width of the AIR.
+    fn fill_dummy_trace_row(&self, _row_slice: &mut [F]) {
+        // By default, the row is filled with zeroes
     }
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        self.core.get_opcode_name(opcode)
+    /// Returns a list of public values to publish.
+    fn generate_public_values(&self) -> Vec<F> {
+        vec![]
     }
 }
 
-// Note[jpw]: the statement we want is:
-// - when A::Air is an AdapterAir for all AirBuilders needed by stark-backend
-// - and when M::Air is an CoreAir for all AirBuilders needed by stark-backend,
-// then VmAirWrapper<A::Air, M::Air> is an Air for all AirBuilders needed
-// by stark-backend, which is equivalent to saying it implements AirRef<SC>
-// The where clauses to achieve this statement is unfortunately really verbose.
-impl<SC, A, C> Chip<SC> for VmChipWrapper<Val<SC>, A, C>
+/// We want a blanket implementation of `Chip<MatrixRecordArena, CpuBackend>` on any struct that
+/// implements [TraceFiller] but due to Rust orphan rules, we need a wrapper struct.
+// @dev: You could make a macro, but it's hard to handle generics in the struct definition.
+#[derive(derive_new::new)]
+pub struct VmChipWrapper<F, FILLER> {
+    pub inner: FILLER,
+    pub mem_helper: SharedMemoryHelper<F>,
+}
+
+impl<SC, FILLER, RA> Chip<RA, CpuBackend<SC>> for VmChipWrapper<Val<SC>, FILLER>
 where
     SC: StarkGenericConfig,
-    Val<SC>: PrimeField32,
-    A: VmAdapterChip<Val<SC>> + Send + Sync,
-    C: VmCoreChip<Val<SC>, A::Interface> + Send + Sync,
-    A::Air: Send + Sync + 'static,
-    A::Air: VmAdapterAir<SymbolicRapBuilder<Val<SC>>>,
-    A::Air: for<'a> VmAdapterAir<DebugConstraintBuilder<'a, SC>>,
-    C::Air: Send + Sync + 'static,
-    C::Air: VmCoreAir<
-        SymbolicRapBuilder<Val<SC>>,
-        <A::Air as VmAdapterAir<SymbolicRapBuilder<Val<SC>>>>::Interface,
-    >,
-    C::Air: for<'a> VmCoreAir<
-        DebugConstraintBuilder<'a, SC>,
-        <A::Air as VmAdapterAir<DebugConstraintBuilder<'a, SC>>>::Interface,
-    >,
+    FILLER: TraceFiller<Val<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
 {
-    fn air(&self) -> AirRef<SC> {
-        let air: VmAirWrapper<A::Air, C::Air> = VmAirWrapper {
-            adapter: self.adapter.air().clone(),
-            core: self.core.air().clone(),
-        };
-        Arc::new(air)
+    fn generate_proving_ctx(&self, arena: RA) -> AirProvingContext<CpuBackend<SC>> {
+        let rows_used = arena.trace_offset() / arena.width();
+        let mut trace = arena.into_matrix();
+        let mem_helper = self.mem_helper.as_borrowed();
+        self.inner.fill_trace(&mem_helper, &mut trace, rows_used);
+
+        AirProvingContext::simple(Arc::new(trace), self.inner.generate_public_values())
     }
+}
 
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let num_records = self.records.len();
-        let height = next_power_of_two_or_zero(num_records);
-        let core_width = self.core.air().width();
-        let adapter_width = self.adapter.air().width();
-        let width = core_width + adapter_width;
-        let mut values = Val::<SC>::zero_vec(height * width);
-
-        let memory = self.offline_memory.lock().unwrap();
-
-        // This zip only goes through records.
-        // The padding rows between records.len()..height are filled with zeros.
-        values
-            .par_chunks_mut(width)
-            .zip(self.records.into_par_iter())
-            .for_each(|(row_slice, record)| {
-                let (adapter_row, core_row) = row_slice.split_at_mut(adapter_width);
-                self.adapter
-                    .generate_trace_row(adapter_row, record.0, record.1, &memory);
-                self.core.generate_trace_row(core_row, record.2);
-            });
+/// A helper trait for expressing generic state accesses within the implementation of
+/// [TraceExecutor]. Note that this is only a helper trait when the same interface of state access
+/// is reused or shared by multiple implementations. It is not required to implement this trait if
+/// it is easier to implement the [TraceExecutor] trait directly without this trait.
+pub trait AdapterTraceExecutor<F>: Clone {
+    const WIDTH: usize;
+    type ReadData;
+    type WriteData;
+    // @dev This can either be a &mut _ type or a struct with &mut _ fields.
+    // The latter is helpful if we want to directly write certain values in place into a trace
+    // matrix.
+    type RecordMut<'a>
+    where
+        Self: 'a;
 
-        let mut trace = RowMajorMatrix::new(values, width);
-        self.core.finalize(&mut trace, num_records);
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>);
 
-        AirProofInput::simple(trace, self.core.generate_public_values())
-    }
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData;
+
+    fn write(
+        &self,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
+    );
 }
 
-impl<F, A, M> ChipUsageGetter for VmChipWrapper<F, A, M>
-where
-    A: VmAdapterChip<F> + Sync,
-    M: VmCoreChip<F, A::Interface> + Sync,
-{
-    fn air_name(&self) -> String {
-        format!(
-            "<{},{}>",
-            get_air_name(self.adapter.air()),
-            get_air_name(self.core.air())
-        )
-    }
-    fn current_trace_height(&self) -> usize {
-        self.records.len()
-    }
-    fn trace_width(&self) -> usize {
-        self.adapter.air().width() + self.core.air().width()
-    }
+// NOTE[jpw]: cannot reuse `TraceSubRowGenerator` trait because we need associated constant
+// `WIDTH`.
+pub trait AdapterTraceFiller<F>: Send + Sync {
+    const WIDTH: usize;
+    /// Post-execution filling of rest of adapter row.
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, adapter_row: &mut [F]);
 }
 
+// ============================== Adapter|Core Air Wrapper ===============================
+
+#[derive(Clone, Copy, derive_new::new)]
 pub struct VmAirWrapper<A, C> {
     pub adapter: A,
     pub core: C,
@@ -455,40 +324,6 @@ impl<
     type ProcessedInstruction = MinimalInstruction<T>;
 }
 
-pub struct VecHeapTwoReadsAdapterInterface<
-    T,
-    const BLOCKS_PER_READ1: usize,
-    const BLOCKS_PER_READ2: usize,
-    const BLOCKS_PER_WRITE: usize,
-    const READ_SIZE: usize,
-    const WRITE_SIZE: usize,
->(PhantomData<T>);
-
-impl<
-        T,
-        const BLOCKS_PER_READ1: usize,
-        const BLOCKS_PER_READ2: usize,
-        const BLOCKS_PER_WRITE: usize,
-        const READ_SIZE: usize,
-        const WRITE_SIZE: usize,
-    > VmAdapterInterface<T>
-    for VecHeapTwoReadsAdapterInterface<
-        T,
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >
-{
-    type Reads = (
-        [[T; READ_SIZE]; BLOCKS_PER_READ1],
-        [[T; READ_SIZE]; BLOCKS_PER_READ2],
-    );
-    type Writes = [[T; WRITE_SIZE]; BLOCKS_PER_WRITE];
-    type ProcessedInstruction = MinimalInstruction<T>;
-}
-
 /// Similar to `BasicAdapterInterface`, but it flattens the reads and writes into a single flat
 /// array for each
 pub struct FlatInterface<T, PI, const READ_CELLS: usize, const WRITE_CELLS: usize>(
@@ -608,49 +443,6 @@ mod conversions {
         }
     }
 
-    // AdapterRuntimeContext: VecHeapAdapterInterface -> DynInterface
-    impl<
-            T,
-            const NUM_READS: usize,
-            const BLOCKS_PER_READ: usize,
-            const BLOCKS_PER_WRITE: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-        >
-        From<
-            AdapterRuntimeContext<
-                T,
-                VecHeapAdapterInterface<
-                    T,
-                    NUM_READS,
-                    BLOCKS_PER_READ,
-                    BLOCKS_PER_WRITE,
-                    READ_SIZE,
-                    WRITE_SIZE,
-                >,
-            >,
-        > for AdapterRuntimeContext<T, DynAdapterInterface<T>>
-    {
-        fn from(
-            ctx: AdapterRuntimeContext<
-                T,
-                VecHeapAdapterInterface<
-                    T,
-                    NUM_READS,
-                    BLOCKS_PER_READ,
-                    BLOCKS_PER_WRITE,
-                    READ_SIZE,
-                    WRITE_SIZE,
-                >,
-            >,
-        ) -> Self {
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes: ctx.writes.into(),
-            }
-        }
-    }
-
     // AdapterAirContext: DynInterface -> VecHeapAdapterInterface
     impl<
             T,
@@ -682,155 +474,6 @@ mod conversions {
         }
     }
 
-    // AdapterRuntimeContext: DynInterface -> VecHeapAdapterInterface
-    impl<
-            T,
-            const NUM_READS: usize,
-            const BLOCKS_PER_READ: usize,
-            const BLOCKS_PER_WRITE: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-        > From<AdapterRuntimeContext<T, DynAdapterInterface<T>>>
-        for AdapterRuntimeContext<
-            T,
-            VecHeapAdapterInterface<
-                T,
-                NUM_READS,
-                BLOCKS_PER_READ,
-                BLOCKS_PER_WRITE,
-                READ_SIZE,
-                WRITE_SIZE,
-            >,
-        >
-    {
-        fn from(ctx: AdapterRuntimeContext<T, DynAdapterInterface<T>>) -> Self {
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes: ctx.writes.into(),
-            }
-        }
-    }
-
-    // AdapterAirContext: DynInterface -> VecHeapTwoReadsAdapterInterface
-    impl<
-            T: Clone,
-            const BLOCKS_PER_READ1: usize,
-            const BLOCKS_PER_READ2: usize,
-            const BLOCKS_PER_WRITE: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-        > From<AdapterAirContext<T, DynAdapterInterface<T>>>
-        for AdapterAirContext<
-            T,
-            VecHeapTwoReadsAdapterInterface<
-                T,
-                BLOCKS_PER_READ1,
-                BLOCKS_PER_READ2,
-                BLOCKS_PER_WRITE,
-                READ_SIZE,
-                WRITE_SIZE,
-            >,
-        >
-    {
-        fn from(ctx: AdapterAirContext<T, DynAdapterInterface<T>>) -> Self {
-            AdapterAirContext {
-                to_pc: ctx.to_pc,
-                reads: ctx.reads.into(),
-                writes: ctx.writes.into(),
-                instruction: ctx.instruction.into(),
-            }
-        }
-    }
-
-    // AdapterRuntimeContext: DynInterface -> VecHeapAdapterInterface
-    impl<
-            T,
-            const BLOCKS_PER_READ1: usize,
-            const BLOCKS_PER_READ2: usize,
-            const BLOCKS_PER_WRITE: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-        > From<AdapterRuntimeContext<T, DynAdapterInterface<T>>>
-        for AdapterRuntimeContext<
-            T,
-            VecHeapTwoReadsAdapterInterface<
-                T,
-                BLOCKS_PER_READ1,
-                BLOCKS_PER_READ2,
-                BLOCKS_PER_WRITE,
-                READ_SIZE,
-                WRITE_SIZE,
-            >,
-        >
-    {
-        fn from(ctx: AdapterRuntimeContext<T, DynAdapterInterface<T>>) -> Self {
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes: ctx.writes.into(),
-            }
-        }
-    }
-
-    // AdapterRuntimeContext: BasicInterface -> VecHeapAdapterInterface
-    impl<
-            T,
-            PI,
-            const BASIC_NUM_READS: usize,
-            const BASIC_NUM_WRITES: usize,
-            const NUM_READS: usize,
-            const BLOCKS_PER_READ: usize,
-            const BLOCKS_PER_WRITE: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-        >
-        From<
-            AdapterRuntimeContext<
-                T,
-                BasicAdapterInterface<
-                    T,
-                    PI,
-                    BASIC_NUM_READS,
-                    BASIC_NUM_WRITES,
-                    READ_SIZE,
-                    WRITE_SIZE,
-                >,
-            >,
-        >
-        for AdapterRuntimeContext<
-            T,
-            VecHeapAdapterInterface<
-                T,
-                NUM_READS,
-                BLOCKS_PER_READ,
-                BLOCKS_PER_WRITE,
-                READ_SIZE,
-                WRITE_SIZE,
-            >,
-        >
-    {
-        fn from(
-            ctx: AdapterRuntimeContext<
-                T,
-                BasicAdapterInterface<
-                    T,
-                    PI,
-                    BASIC_NUM_READS,
-                    BASIC_NUM_WRITES,
-                    READ_SIZE,
-                    WRITE_SIZE,
-                >,
-            >,
-        ) -> Self {
-            assert_eq!(BASIC_NUM_WRITES, BLOCKS_PER_WRITE);
-            let mut writes_it = ctx.writes.into_iter();
-            let writes = from_fn(|_| writes_it.next().unwrap());
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes,
-            }
-        }
-    }
-
     // AdapterAirContext: BasicInterface -> VecHeapAdapterInterface
     impl<
             T,
@@ -985,79 +628,6 @@ mod conversions {
         }
     }
 
-    // AdapterRuntimeContext: BasicInterface -> FlatInterface
-    impl<
-            T,
-            PI,
-            const NUM_READS: usize,
-            const NUM_WRITES: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-            const READ_CELLS: usize,
-            const WRITE_CELLS: usize,
-        >
-        From<
-            AdapterRuntimeContext<
-                T,
-                BasicAdapterInterface<T, PI, NUM_READS, NUM_WRITES, READ_SIZE, WRITE_SIZE>,
-            >,
-        > for AdapterRuntimeContext<T, FlatInterface<T, PI, READ_CELLS, WRITE_CELLS>>
-    {
-        /// ## Panics
-        /// If `WRITE_CELLS != NUM_WRITES * WRITE_SIZE`.
-        /// This is a runtime assertion until Rust const generics expressions are stabilized.
-        fn from(
-            ctx: AdapterRuntimeContext<
-                T,
-                BasicAdapterInterface<T, PI, NUM_READS, NUM_WRITES, READ_SIZE, WRITE_SIZE>,
-            >,
-        ) -> AdapterRuntimeContext<T, FlatInterface<T, PI, READ_CELLS, WRITE_CELLS>> {
-            assert_eq!(WRITE_CELLS, NUM_WRITES * WRITE_SIZE);
-            let mut writes_it = ctx.writes.into_iter().flatten();
-            let writes = from_fn(|_| writes_it.next().unwrap());
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes,
-            }
-        }
-    }
-
-    // AdapterRuntimeContext: FlatInterface -> BasicInterface
-    impl<
-            T: FieldAlgebra,
-            PI,
-            const NUM_READS: usize,
-            const NUM_WRITES: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-            const READ_CELLS: usize,
-            const WRITE_CELLS: usize,
-        > From<AdapterRuntimeContext<T, FlatInterface<T, PI, READ_CELLS, WRITE_CELLS>>>
-        for AdapterRuntimeContext<
-            T,
-            BasicAdapterInterface<T, PI, NUM_READS, NUM_WRITES, READ_SIZE, WRITE_SIZE>,
-        >
-    {
-        /// ## Panics
-        /// If `WRITE_CELLS != NUM_WRITES * WRITE_SIZE`.
-        /// This is a runtime assertion until Rust const generics expressions are stabilized.
-        fn from(
-            ctx: AdapterRuntimeContext<T, FlatInterface<T, PI, READ_CELLS, WRITE_CELLS>>,
-        ) -> AdapterRuntimeContext<
-            T,
-            BasicAdapterInterface<T, PI, NUM_READS, NUM_WRITES, READ_SIZE, WRITE_SIZE>,
-        > {
-            assert_eq!(WRITE_CELLS, NUM_WRITES * WRITE_SIZE);
-            let mut writes_it = ctx.writes.into_iter();
-            let writes: [[T; WRITE_SIZE]; NUM_WRITES] =
-                from_fn(|_| from_fn(|_| writes_it.next().unwrap()));
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes,
-            }
-        }
-    }
-
     impl<T> From<Vec<T>> for DynArray<T> {
         fn from(v: Vec<T>) -> Self {
             Self(v)
@@ -1169,35 +739,6 @@ mod conversions {
         }
     }
 
-    // AdapterRuntimeContext: BasicInterface -> DynInterface
-    impl<
-            T,
-            PI,
-            const NUM_READS: usize,
-            const NUM_WRITES: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-        >
-        From<
-            AdapterRuntimeContext<
-                T,
-                BasicAdapterInterface<T, PI, NUM_READS, NUM_WRITES, READ_SIZE, WRITE_SIZE>,
-            >,
-        > for AdapterRuntimeContext<T, DynAdapterInterface<T>>
-    {
-        fn from(
-            ctx: AdapterRuntimeContext<
-                T,
-                BasicAdapterInterface<T, PI, NUM_READS, NUM_WRITES, READ_SIZE, WRITE_SIZE>,
-            >,
-        ) -> Self {
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes: ctx.writes.into(),
-            }
-        }
-    }
-
     // AdapterAirContext: DynInterface -> BasicInterface
     impl<
             T,
@@ -1224,28 +765,6 @@ mod conversions {
         }
     }
 
-    // AdapterRuntimeContext: DynInterface -> BasicInterface
-    impl<
-            T,
-            PI,
-            const NUM_READS: usize,
-            const NUM_WRITES: usize,
-            const READ_SIZE: usize,
-            const WRITE_SIZE: usize,
-        > From<AdapterRuntimeContext<T, DynAdapterInterface<T>>>
-        for AdapterRuntimeContext<
-            T,
-            BasicAdapterInterface<T, PI, NUM_READS, NUM_WRITES, READ_SIZE, WRITE_SIZE>,
-        >
-    {
-        fn from(ctx: AdapterRuntimeContext<T, DynAdapterInterface<T>>) -> Self {
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes: ctx.writes.into(),
-            }
-        }
-    }
-
     // AdapterAirContext: FlatInterface -> DynInterface
     impl<T: Clone, PI: Into<DynArray<T>>, const READ_CELLS: usize, const WRITE_CELLS: usize>
         From<AdapterAirContext<T, FlatInterface<T, PI, READ_CELLS, WRITE_CELLS>>>
@@ -1261,21 +780,6 @@ mod conversions {
         }
     }
 
-    // AdapterRuntimeContext: FlatInterface -> DynInterface
-    impl<T: Clone, PI, const READ_CELLS: usize, const WRITE_CELLS: usize>
-        From<AdapterRuntimeContext<T, FlatInterface<T, PI, READ_CELLS, WRITE_CELLS>>>
-        for AdapterRuntimeContext<T, DynAdapterInterface<T>>
-    {
-        fn from(
-            ctx: AdapterRuntimeContext<T, FlatInterface<T, PI, READ_CELLS, WRITE_CELLS>>,
-        ) -> Self {
-            AdapterRuntimeContext {
-                to_pc: ctx.to_pc,
-                writes: ctx.writes.to_vec().into(),
-            }
-        }
-    }
-
     impl<T> From<MinimalInstruction<T>> for DynArray<T> {
         fn from(m: MinimalInstruction<T>) -> Self {
             Self(vec![m.is_valid, m.opcode])
diff --git a/crates/vm/src/arch/interpreter.rs b/crates/vm/src/arch/interpreter.rs
new file mode 100644
index 0000000000..10bb981804
--- /dev/null
+++ b/crates/vm/src/arch/interpreter.rs
@@ -0,0 +1,638 @@
+use std::{
+    alloc::{alloc, dealloc, handle_alloc_error, Layout},
+    borrow::{Borrow, BorrowMut},
+    ptr::NonNull,
+};
+
+use itertools::Itertools;
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    exe::{SparseMemoryImage, VmExe},
+    instruction::Instruction,
+    program::{Program, DEFAULT_PC_STEP},
+    LocalOpcode, SystemOpcode,
+};
+use openvm_stark_backend::p3_field::PrimeField32;
+use tracing::info_span;
+
+use crate::{
+    arch::{
+        execution_mode::{
+            ExecutionCtx, ExecutionCtxTrait, MeteredCostCtx, MeteredCostExecutionOutput,
+            MeteredCtx, MeteredExecutionCtxTrait, Segment,
+        },
+        ExecuteFunc, ExecutionError, Executor, ExecutorInventory, ExitCode, MeteredExecutor,
+        StaticProgramError, Streams, SystemConfig, VmExecState, VmState,
+    },
+    system::memory::online::GuestMemory,
+};
+
+/// VM pure executor(E1/E2 executor) which doesn't consider trace generation.
+/// Note: This executor doesn't hold any VM state and can be used for multiple execution.
+///
+/// The generic `Ctx` and constructor determine whether this supported pure execution or metered
+/// execution.
+// NOTE: the lifetime 'a represents the lifetime of borrowed ExecutorInventory, which must outlive
+// the InterpretedInstance because `pre_compute_buf` may contain pointers to references held by
+// executors.
+pub struct InterpretedInstance<'a, F, Ctx> {
+    system_config: SystemConfig,
+    // SAFETY: this is not actually dead code, but `pre_compute_insns` contains raw pointer refers
+    // to this buffer.
+    #[allow(dead_code)]
+    pre_compute_buf: AlignedBuf,
+    /// Instruction table of function pointers and pointers to the pre-computed buffer. Indexed by
+    /// `pc_index = (pc - pc_base) / DEFAULT_PC_STEP`.
+    pre_compute_insns: Vec<PreComputeInstruction<'a, F, Ctx>>,
+
+    pc_base: u32,
+    pc_start: u32,
+
+    init_memory: SparseMemoryImage,
+}
+
+struct PreComputeInstruction<'a, F, Ctx> {
+    pub handler: ExecuteFunc<F, Ctx>,
+    pub pre_compute: &'a [u8],
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct TerminatePreCompute {
+    exit_code: u32,
+}
+
+macro_rules! execute_with_metrics {
+    ($span:literal, $pc_base:expr, $exec_state:expr, $pre_compute_insts:expr) => {{
+        #[cfg(feature = "metrics")]
+        let start = std::time::Instant::now();
+        #[cfg(feature = "metrics")]
+        let start_instret = $exec_state.instret;
+
+        info_span!($span).in_scope(|| unsafe {
+            execute_trampoline($pc_base, $exec_state, $pre_compute_insts);
+        });
+
+        #[cfg(feature = "metrics")]
+        {
+            let elapsed = start.elapsed();
+            let insns = $exec_state.instret - start_instret;
+            tracing::info!("instructions_executed={insns}");
+            metrics::counter!(concat!($span, "_insns")).absolute(insns);
+            metrics::gauge!(concat!($span, "_insn_mi/s"))
+                .set(insns as f64 / elapsed.as_micros() as f64);
+        }
+    }};
+}
+
+// Constructors for E1 and E2 respectively, which generate pre-computed buffers and function
+// pointers
+// - Generic in `Ctx`
+
+impl<'a, F, Ctx> InterpretedInstance<'a, F, Ctx>
+where
+    F: PrimeField32,
+    Ctx: ExecutionCtxTrait,
+{
+    /// Creates a new interpreter instance for pure execution.
+    // (E1 execution)
+    pub fn new<E>(
+        inventory: &'a ExecutorInventory<E>,
+        exe: &VmExe<F>,
+    ) -> Result<Self, StaticProgramError>
+    where
+        E: Executor<F>,
+    {
+        let program = &exe.program;
+        let pre_compute_max_size = get_pre_compute_max_size(program, inventory);
+        let mut pre_compute_buf = alloc_pre_compute_buf(program.len(), pre_compute_max_size);
+        let mut split_pre_compute_buf =
+            split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size);
+        let pre_compute_insns = get_pre_compute_instructions::<F, Ctx, E>(
+            program,
+            inventory,
+            &mut split_pre_compute_buf,
+        )?;
+        let pc_base = program.pc_base;
+        let pc_start = exe.pc_start;
+        let init_memory = exe.init_memory.clone();
+
+        Ok(Self {
+            system_config: inventory.config().clone(),
+            pre_compute_buf,
+            pre_compute_insns,
+            pc_base,
+            pc_start,
+            init_memory,
+        })
+    }
+}
+
+impl<'a, F, Ctx> InterpretedInstance<'a, F, Ctx>
+where
+    F: PrimeField32,
+    Ctx: MeteredExecutionCtxTrait,
+{
+    /// Creates a new interpreter instance for pure execution.
+    // (E1 execution)
+    pub fn new_metered<E>(
+        inventory: &'a ExecutorInventory<E>,
+        exe: &VmExe<F>,
+        executor_idx_to_air_idx: &[usize],
+    ) -> Result<Self, StaticProgramError>
+    where
+        E: MeteredExecutor<F>,
+    {
+        let program = &exe.program;
+        let pre_compute_max_size = get_metered_pre_compute_max_size(program, inventory);
+        let mut pre_compute_buf = alloc_pre_compute_buf(program.len(), pre_compute_max_size);
+        let mut split_pre_compute_buf =
+            split_pre_compute_buf(program, &mut pre_compute_buf, pre_compute_max_size);
+        let pre_compute_insns = get_metered_pre_compute_instructions::<F, Ctx, E>(
+            program,
+            inventory,
+            executor_idx_to_air_idx,
+            &mut split_pre_compute_buf,
+        )?;
+
+        let pc_base = program.pc_base;
+        let pc_start = exe.pc_start;
+        let init_memory = exe.init_memory.clone();
+
+        Ok(Self {
+            system_config: inventory.config().clone(),
+            pre_compute_buf,
+            pre_compute_insns,
+            pc_base,
+            pc_start,
+            init_memory,
+        })
+    }
+}
+
+// Execute functions specialize to relevant Ctx types to provide more streamlines APIs
+
+impl<F> InterpretedInstance<'_, F, ExecutionCtx>
+where
+    F: PrimeField32,
+{
+    /// Pure execution, without metering, for the given `inputs`. Execution begins from the initial
+    /// state specified by the `VmExe`. This function executes the program until either termination
+    /// if `num_insns` is `None` or for exactly `num_insns` instructions if `num_insns` is `Some`.
+    ///
+    /// Returns the final VM state when execution stops.
+    pub fn execute(
+        &self,
+        inputs: impl Into<Streams<F>>,
+        num_insns: Option<u64>,
+    ) -> Result<VmState<F, GuestMemory>, ExecutionError> {
+        let vm_state = VmState::initial(
+            &self.system_config,
+            &self.init_memory,
+            self.pc_start,
+            inputs,
+        );
+        self.execute_from_state(vm_state, num_insns)
+    }
+
+    /// Pure execution, without metering, from the given `VmState`. This function executes the
+    /// program until either termination if `num_insns` is `None` or for exactly `num_insns`
+    /// instructions if `num_insns` is `Some`.
+    ///
+    /// Returns the final VM state when execution stops.
+    pub fn execute_from_state(
+        &self,
+        from_state: VmState<F, GuestMemory>,
+        num_insns: Option<u64>,
+    ) -> Result<VmState<F, GuestMemory>, ExecutionError> {
+        let ctx = ExecutionCtx::new(num_insns);
+        let mut exec_state = VmExecState::new(from_state, ctx);
+        // Start execution
+        execute_with_metrics!(
+            "execute_e1",
+            self.pc_base,
+            &mut exec_state,
+            &self.pre_compute_insns
+        );
+        if num_insns.is_some() {
+            check_exit_code(exec_state.exit_code)?;
+        } else {
+            check_termination(exec_state.exit_code)?;
+        }
+        Ok(exec_state.vm_state)
+    }
+}
+
+impl<F> InterpretedInstance<'_, F, MeteredCtx>
+where
+    F: PrimeField32,
+{
+    /// Metered execution for the given `inputs`. Execution begins from the initial
+    /// state specified by the `VmExe`. This function executes the program until termination.
+    ///
+    /// Returns the segmentation boundary data and the final VM state when execution stops.
+    pub fn execute_metered(
+        &self,
+        inputs: impl Into<Streams<F>>,
+        ctx: MeteredCtx,
+    ) -> Result<(Vec<Segment>, VmState<F, GuestMemory>), ExecutionError> {
+        let vm_state = VmState::initial(
+            &self.system_config,
+            &self.init_memory,
+            self.pc_start,
+            inputs,
+        );
+        self.execute_metered_from_state(vm_state, ctx)
+    }
+
+    /// Metered execution for the given `VmState`. This function executes the program until
+    /// termination.
+    ///
+    /// Returns the segmentation boundary data and the final VM state when execution stops.
+    ///
+    /// The [MeteredCtx] can be constructed using either
+    /// [VmExecutor::build_metered_ctx](super::VmExecutor::build_metered_ctx) or
+    /// [VirtualMachine::build_metered_ctx](super::VirtualMachine::build_metered_ctx).
+    pub fn execute_metered_from_state(
+        &self,
+        from_state: VmState<F, GuestMemory>,
+        ctx: MeteredCtx,
+    ) -> Result<(Vec<Segment>, VmState<F, GuestMemory>), ExecutionError> {
+        let mut exec_state = VmExecState::new(from_state, ctx);
+        // Start execution
+        execute_with_metrics!(
+            "execute_metered",
+            self.pc_base,
+            &mut exec_state,
+            &self.pre_compute_insns
+        );
+        check_termination(exec_state.exit_code)?;
+        let VmExecState { vm_state, ctx, .. } = exec_state;
+        Ok((ctx.into_segments(), vm_state))
+    }
+}
+
+impl<F> InterpretedInstance<'_, F, MeteredCostCtx>
+where
+    F: PrimeField32,
+{
+    /// Metered cost execution for the given `inputs`. Execution begins from the initial
+    /// state specified by the `VmExe`. This function executes the program until termination.
+    ///
+    /// Returns the trace cost when execution stops.
+    pub fn execute_metered_cost(
+        &self,
+        inputs: impl Into<Streams<F>>,
+        ctx: MeteredCostCtx,
+    ) -> Result<MeteredCostExecutionOutput, ExecutionError> {
+        let vm_state = VmState::initial(
+            &self.system_config,
+            &self.init_memory,
+            self.pc_start,
+            inputs,
+        );
+        self.execute_metered_cost_from_state(vm_state, ctx)
+    }
+
+    /// Metered cost execution for the given `VmState`. This function executes the program until
+    /// termination.
+    ///
+    /// Returns the trace cost when execution stops.
+    pub fn execute_metered_cost_from_state(
+        &self,
+        from_state: VmState<F, GuestMemory>,
+        ctx: MeteredCostCtx,
+    ) -> Result<MeteredCostExecutionOutput, ExecutionError> {
+        let mut exec_state = VmExecState::new(from_state, ctx);
+        // Start execution
+        execute_with_metrics!(
+            "execute_metered_cost",
+            self.pc_base,
+            &mut exec_state,
+            &self.pre_compute_insns
+        );
+        check_exit_code(exec_state.exit_code)?;
+        let VmExecState { ctx, vm_state, .. } = exec_state;
+        let output = MeteredCostExecutionOutput::new(vm_state.instret, ctx.cost);
+        Ok(output)
+    }
+}
+
+fn alloc_pre_compute_buf(program_len: usize, pre_compute_max_size: usize) -> AlignedBuf {
+    let buf_len = program_len * pre_compute_max_size;
+    AlignedBuf::uninit(buf_len, pre_compute_max_size)
+}
+
+fn split_pre_compute_buf<'a, F>(
+    program: &Program<F>,
+    pre_compute_buf: &'a mut AlignedBuf,
+    pre_compute_max_size: usize,
+) -> Vec<&'a mut [u8]> {
+    let program_len = program.instructions_and_debug_infos.len();
+    let buf_len = program_len * pre_compute_max_size;
+    let mut pre_compute_buf_ptr =
+        unsafe { std::slice::from_raw_parts_mut(pre_compute_buf.ptr, buf_len) };
+    let mut split_pre_compute_buf = Vec::with_capacity(program_len);
+    for _ in 0..program_len {
+        let (first, last) = pre_compute_buf_ptr.split_at_mut(pre_compute_max_size);
+        pre_compute_buf_ptr = last;
+        split_pre_compute_buf.push(first);
+    }
+    split_pre_compute_buf
+}
+
+/// Executes using function pointers with the trampoline (loop) approach.
+///
+/// # Safety
+/// The `fn_ptrs` pointer to pre-computed buffers that outlive this function.
+#[inline(always)]
+unsafe fn execute_trampoline<F: PrimeField32, Ctx: ExecutionCtxTrait>(
+    pc_base: u32,
+    vm_state: &mut VmExecState<F, GuestMemory, Ctx>,
+    fn_ptrs: &[PreComputeInstruction<F, Ctx>],
+) {
+    while vm_state
+        .exit_code
+        .as_ref()
+        .is_ok_and(|exit_code| exit_code.is_none())
+    {
+        if Ctx::should_suspend(vm_state) {
+            break;
+        }
+        let pc_index = get_pc_index(pc_base, vm_state.pc);
+        if let Some(inst) = fn_ptrs.get(pc_index) {
+            // SAFETY: pre_compute assumed to live long enough
+            unsafe { (inst.handler)(inst.pre_compute, vm_state) };
+        } else {
+            vm_state.exit_code = Err(ExecutionError::PcOutOfBounds {
+                pc: vm_state.pc,
+                pc_base,
+                program_len: fn_ptrs.len(),
+            });
+        }
+    }
+    if vm_state
+        .exit_code
+        .as_ref()
+        .is_ok_and(|exit_code| exit_code.is_some())
+    {
+        Ctx::on_terminate(vm_state);
+    }
+}
+
+#[inline(always)]
+pub fn get_pc_index(pc_base: u32, pc: u32) -> usize {
+    ((pc - pc_base) / DEFAULT_PC_STEP) as usize
+}
+
+/// Bytes allocated according to the given Layout
+// @dev: This is duplicate from the openvm crate, but it doesn't seem worth importing `openvm` here
+// just for this.
+pub struct AlignedBuf {
+    pub ptr: *mut u8,
+    pub layout: Layout,
+}
+
+impl AlignedBuf {
+    /// Allocate a new buffer whose start address is aligned to `align` bytes.
+    /// *NOTE* if `len` is zero then a creates new `NonNull` that is dangling and 16-byte aligned.
+    pub fn uninit(len: usize, align: usize) -> Self {
+        let layout = Layout::from_size_align(len, align).unwrap();
+        if layout.size() == 0 {
+            return Self {
+                ptr: NonNull::<u128>::dangling().as_ptr() as *mut u8,
+                layout,
+            };
+        }
+        // SAFETY: `len` is nonzero
+        let ptr = unsafe { alloc(layout) };
+        if ptr.is_null() {
+            handle_alloc_error(layout);
+        }
+        AlignedBuf { ptr, layout }
+    }
+}
+
+impl Drop for AlignedBuf {
+    fn drop(&mut self) {
+        if self.layout.size() != 0 {
+            unsafe {
+                dealloc(self.ptr, self.layout);
+            }
+        }
+    }
+}
+
+unsafe fn terminate_execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &TerminatePreCompute = pre_compute.borrow();
+    vm_state.instret += 1;
+    vm_state.exit_code = Ok(Some(pre_compute.exit_code));
+}
+
+fn get_pre_compute_max_size<F, E: Executor<F>>(
+    program: &Program<F>,
+    inventory: &ExecutorInventory<E>,
+) -> usize {
+    program
+        .instructions_and_debug_infos
+        .iter()
+        .map(|inst_opt| {
+            if let Some((inst, _)) = inst_opt {
+                if let Some(size) = system_opcode_pre_compute_size(inst) {
+                    size
+                } else {
+                    inventory
+                        .get_executor(inst.opcode)
+                        .map(|executor| executor.pre_compute_size())
+                        .unwrap()
+                }
+            } else {
+                0
+            }
+        })
+        .max()
+        .unwrap()
+        .next_power_of_two()
+}
+
+fn get_metered_pre_compute_max_size<F, E: MeteredExecutor<F>>(
+    program: &Program<F>,
+    inventory: &ExecutorInventory<E>,
+) -> usize {
+    program
+        .instructions_and_debug_infos
+        .iter()
+        .map(|inst_opt| {
+            if let Some((inst, _)) = inst_opt {
+                if let Some(size) = system_opcode_pre_compute_size(inst) {
+                    size
+                } else {
+                    inventory
+                        .get_executor(inst.opcode)
+                        .map(|executor| executor.metered_pre_compute_size())
+                        .unwrap()
+                }
+            } else {
+                0
+            }
+        })
+        .max()
+        .unwrap()
+        .next_power_of_two()
+}
+
+fn system_opcode_pre_compute_size<F>(inst: &Instruction<F>) -> Option<usize> {
+    if inst.opcode == SystemOpcode::TERMINATE.global_opcode() {
+        return Some(size_of::<TerminatePreCompute>());
+    }
+    None
+}
+
+fn get_pre_compute_instructions<'a, F, Ctx, E>(
+    program: &Program<F>,
+    inventory: &'a ExecutorInventory<E>,
+    pre_compute: &mut [&mut [u8]],
+) -> Result<Vec<PreComputeInstruction<'a, F, Ctx>>, StaticProgramError>
+where
+    F: PrimeField32,
+    Ctx: ExecutionCtxTrait,
+    E: Executor<F>,
+{
+    program
+        .instructions_and_debug_infos
+        .iter()
+        .zip_eq(pre_compute.iter_mut())
+        .enumerate()
+        .map(|(i, (inst_opt, buf))| {
+            // SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This is safe
+            // only in the current context because `buf` comes from `pre_compute_buf` which will
+            // outlive the returned `PreComputeInstruction`s.
+            let buf: &mut [u8] = unsafe { &mut *(*buf as *mut [u8]) };
+            let pre_inst = if let Some((inst, _)) = inst_opt {
+                tracing::trace!("get_pre_compute_instruction {inst:?}");
+                let pc = program.pc_base + i as u32 * DEFAULT_PC_STEP;
+                if let Some(handler) = get_system_opcode_handler(inst, buf) {
+                    PreComputeInstruction {
+                        handler,
+                        pre_compute: buf,
+                    }
+                } else if let Some(executor) = inventory.get_executor(inst.opcode) {
+                    PreComputeInstruction {
+                        handler: executor.pre_compute(pc, inst, buf)?,
+                        pre_compute: buf,
+                    }
+                } else {
+                    return Err(StaticProgramError::DisabledOperation {
+                        pc,
+                        opcode: inst.opcode,
+                    });
+                }
+            } else {
+                // Dead instruction at this pc
+                PreComputeInstruction {
+                    handler: |_, vm_state| {
+                        vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
+                    },
+                    pre_compute: buf,
+                }
+            };
+            Ok(pre_inst)
+        })
+        .collect::<Result<Vec<_>, _>>()
+}
+
+fn get_metered_pre_compute_instructions<'a, F, Ctx, E>(
+    program: &Program<F>,
+    inventory: &'a ExecutorInventory<E>,
+    executor_idx_to_air_idx: &[usize],
+    pre_compute: &mut [&mut [u8]],
+) -> Result<Vec<PreComputeInstruction<'a, F, Ctx>>, StaticProgramError>
+where
+    F: PrimeField32,
+    Ctx: MeteredExecutionCtxTrait,
+    E: MeteredExecutor<F>,
+{
+    program
+        .instructions_and_debug_infos
+        .iter()
+        .zip_eq(pre_compute.iter_mut())
+        .enumerate()
+        .map(|(i, (inst_opt, buf))| {
+            // SAFETY: we cast to raw pointer and then borrow to remove the lifetime. This is safe
+            // only in the current context because `buf` comes from `pre_compute_buf` which will
+            // outlive the returned `PreComputeInstruction`s.
+            let buf: &mut [u8] = unsafe { &mut *(*buf as *mut [u8]) };
+            let pre_inst = if let Some((inst, _)) = inst_opt {
+                tracing::trace!("get_metered_pre_compute_instruction {inst:?}");
+                let pc = program.pc_base + i as u32 * DEFAULT_PC_STEP;
+                if let Some(handler) = get_system_opcode_handler(inst, buf) {
+                    PreComputeInstruction {
+                        handler,
+                        pre_compute: buf,
+                    }
+                } else if let Some(&executor_idx) = inventory.instruction_lookup.get(&inst.opcode) {
+                    let executor_idx = executor_idx as usize;
+                    let executor = inventory
+                        .executors
+                        .get(executor_idx)
+                        .expect("ExecutorInventory ensures executor_idx is in bounds");
+                    let air_idx = executor_idx_to_air_idx[executor_idx];
+                    PreComputeInstruction {
+                        handler: executor.metered_pre_compute(air_idx, pc, inst, buf)?,
+                        pre_compute: buf,
+                    }
+                } else {
+                    return Err(StaticProgramError::DisabledOperation {
+                        pc,
+                        opcode: inst.opcode,
+                    });
+                }
+            } else {
+                PreComputeInstruction {
+                    handler: |_, vm_state| {
+                        vm_state.exit_code = Err(ExecutionError::Unreachable(vm_state.pc));
+                    },
+                    pre_compute: buf,
+                }
+            };
+            Ok(pre_inst)
+        })
+        .collect::<Result<Vec<_>, _>>()
+}
+
+fn get_system_opcode_handler<F: PrimeField32, Ctx: ExecutionCtxTrait>(
+    inst: &Instruction<F>,
+    buf: &mut [u8],
+) -> Option<ExecuteFunc<F, Ctx>> {
+    if inst.opcode == SystemOpcode::TERMINATE.global_opcode() {
+        let pre_compute: &mut TerminatePreCompute = buf.borrow_mut();
+        pre_compute.exit_code = inst.c.as_canonical_u32();
+        return Some(terminate_execute_e12_impl);
+    }
+    None
+}
+
+/// Errors if exit code is either error or terminated with non-successful exit code.
+fn check_exit_code(exit_code: Result<Option<u32>, ExecutionError>) -> Result<(), ExecutionError> {
+    let exit_code = exit_code?;
+    if let Some(exit_code) = exit_code {
+        // This means execution did terminate
+        if exit_code != ExitCode::Success as u32 {
+            return Err(ExecutionError::FailedWithExitCode(exit_code));
+        }
+    }
+    Ok(())
+}
+
+/// Same as [check_exit_code] but errors if program did not terminate.
+fn check_termination(exit_code: Result<Option<u32>, ExecutionError>) -> Result<(), ExecutionError> {
+    let did_terminate = matches!(exit_code.as_ref(), Ok(Some(_)));
+    check_exit_code(exit_code)?;
+    match did_terminate {
+        true => Ok(()),
+        false => Err(ExecutionError::DidNotTerminate),
+    }
+}
diff --git a/crates/vm/src/arch/interpreter_preflight.rs b/crates/vm/src/arch/interpreter_preflight.rs
new file mode 100644
index 0000000000..7fb8006157
--- /dev/null
+++ b/crates/vm/src/arch/interpreter_preflight.rs
@@ -0,0 +1,265 @@
+use std::sync::Arc;
+
+use openvm_instructions::{instruction::Instruction, program::Program, LocalOpcode, SystemOpcode};
+use openvm_stark_backend::{
+    p3_field::{Field, PrimeField32},
+    p3_maybe_rayon::prelude::*,
+};
+
+use crate::{
+    arch::{
+        execution_mode::PreflightCtx, interpreter::get_pc_index, Arena, ExecutionError, ExecutorId,
+        ExecutorInventory, PreflightExecutor, StaticProgramError, VmExecState, VmStateMut,
+    },
+    system::memory::online::TracingMemory,
+};
+
+/// VM preflight executor (E3 executor) for use with trace generation.
+/// Note: This executor doesn't hold any VM state and can be used for multiple execution.
+pub struct PreflightInterpretedInstance<F, E> {
+    // NOTE[jpw]: we use an Arc so that VmInstance can hold both VirtualMachine and
+    // PreflightInterpretedInstance. All we really need is to borrow `executors: &'a [E]`.
+    inventory: Arc<ExecutorInventory<E>>,
+
+    /// This is a map from (pc - pc_base) / pc_step -> [PcEntry].
+    /// We will set `executor_idx` to `u32::MAX` in the [PcEntry] if the program has no instruction
+    /// at that pc.
+    // PERF[jpw/ayush]: We could map directly to the raw pointer(u64) for executor, but storing the
+    // u32 may be better for cache efficiency.
+    pc_handler: Vec<PcEntry<F>>,
+    // pc_handler, execution_frequencies will all have the same length, which equals
+    // `Program::len()`
+    execution_frequencies: Vec<u32>,
+    pc_base: u32,
+
+    pub(super) executor_idx_to_air_idx: Vec<usize>,
+}
+
+#[repr(C)]
+pub struct PcEntry<F> {
+    // NOTE[jpw]: revisit storing only smaller `precompute` for better cache locality. Currently
+    // VmOpcode is usize so align=8 and there are 7 u32 operands so we store ExecutorId(u32) after
+    // to avoid padding. This means PcEntry has align=8 and size=40 bytes, which is too big
+    pub insn: Instruction<F>,
+    pub executor_idx: ExecutorId,
+}
+
+impl<F: Field, E> PreflightInterpretedInstance<F, E> {
+    /// Creates a new interpreter instance for preflight execution.
+    /// Rewrites the program into an internal table specialized for enum dispatch.
+    ///
+    /// ## Assumption
+    /// There are less than `u32::MAX` total AIRs.
+    pub fn new(
+        program: &Program<F>,
+        inventory: Arc<ExecutorInventory<E>>,
+        executor_idx_to_air_idx: Vec<usize>,
+    ) -> Result<Self, StaticProgramError> {
+        if inventory.executors().len() > u32::MAX as usize {
+            // This would mean we cannot use u32::MAX as an "undefined" executor index
+            return Err(StaticProgramError::TooManyExecutors);
+        }
+        let len = program.instructions_and_debug_infos.len();
+        let mut pc_handler = Vec::with_capacity(len);
+        for insn_and_debug_info in &program.instructions_and_debug_infos {
+            if let Some((insn, _)) = insn_and_debug_info {
+                let insn = insn.clone();
+                let executor_idx = if insn.opcode == SystemOpcode::TERMINATE.global_opcode() {
+                    // The execution loop will always branch to terminate before using this executor
+                    0
+                } else {
+                    *inventory.instruction_lookup.get(&insn.opcode).ok_or(
+                        StaticProgramError::ExecutorNotFound {
+                            opcode: insn.opcode,
+                        },
+                    )?
+                };
+                assert!(
+                    (executor_idx as usize) < inventory.executors.len(),
+                    "ExecutorInventory ensures executor_idx is in bounds"
+                );
+                let pc_entry = PcEntry { insn, executor_idx };
+                pc_handler.push(pc_entry);
+            } else {
+                pc_handler.push(PcEntry::undefined());
+            }
+        }
+        Ok(Self {
+            inventory,
+            execution_frequencies: vec![0u32; len],
+            pc_handler,
+            pc_base: program.pc_base,
+            executor_idx_to_air_idx,
+        })
+    }
+
+    pub fn executors(&self) -> &[E] {
+        &self.inventory.executors
+    }
+
+    pub fn filtered_execution_frequencies(&self) -> Vec<u32>
+    where
+        E: Send + Sync,
+    {
+        self.pc_handler
+            .par_iter()
+            .enumerate()
+            .filter(|(_, entry)| entry.is_some())
+            .map(|(i, _)| self.execution_frequencies[i])
+            .collect()
+    }
+
+    pub fn reset_execution_frequencies(&mut self) {
+        self.execution_frequencies.fill(0);
+    }
+}
+
+impl<F: PrimeField32, E> PreflightInterpretedInstance<F, E> {
+    /// Stopping is triggered by should_stop() or if VM is terminated
+    pub fn execute_from_state<RA>(
+        &mut self,
+        state: &mut VmExecState<F, TracingMemory, PreflightCtx<RA>>,
+    ) -> Result<(), ExecutionError>
+    where
+        RA: Arena,
+        E: PreflightExecutor<F, RA>,
+    {
+        loop {
+            if let Ok(Some(_)) = state.exit_code {
+                // should terminate
+                break;
+            }
+            if state
+                .ctx
+                .instret_end
+                .is_some_and(|instret_end| state.instret >= instret_end)
+            {
+                // should suspend
+                break;
+            }
+
+            // Fetch, decode and execute single instruction
+            self.execute_instruction(state)?;
+            state.instret += 1;
+        }
+
+        Ok(())
+    }
+
+    /// Executes a single instruction and updates VM state
+    #[inline(always)]
+    fn execute_instruction<RA>(
+        &mut self,
+        state: &mut VmExecState<F, TracingMemory, PreflightCtx<RA>>,
+    ) -> Result<(), ExecutionError>
+    where
+        RA: Arena,
+        E: PreflightExecutor<F, RA>,
+    {
+        let pc = state.pc;
+        let pc_idx = get_pc_index(self.pc_base, pc);
+        let pc_entry =
+            self.pc_handler
+                .get(pc_idx)
+                .ok_or_else(|| ExecutionError::PcOutOfBounds {
+                    pc,
+                    pc_base: self.pc_base,
+                    program_len: self.pc_handler.len(),
+                })?;
+        // SAFETY: `execution_frequencies` has the same length as `pc_handler` so `get_pc_entry`
+        // already does the bounds check
+        unsafe {
+            *self.execution_frequencies.get_unchecked_mut(pc_idx) += 1;
+        };
+        // SAFETY: the `executor_idx` comes from ExecutorInventory, which ensures that
+        // `executor_idx` is within bounds
+        let executor = unsafe {
+            self.inventory
+                .executors
+                .get_unchecked(pc_entry.executor_idx as usize)
+        };
+        tracing::trace!("pc: {pc:#x} | {:?}", pc_entry.insn);
+
+        let opcode = pc_entry.insn.opcode;
+        let c = pc_entry.insn.c;
+        // Handle termination instruction
+        if opcode.as_usize() == SystemOpcode::CLASS_OFFSET + SystemOpcode::TERMINATE as usize {
+            state.exit_code = Ok(Some(c.as_canonical_u32()));
+            return Ok(());
+        }
+
+        // Execute the instruction using the control implementation
+        tracing::trace!(
+            "opcode: {} | timestamp: {}",
+            executor.get_opcode_name(pc_entry.insn.opcode.as_usize()),
+            state.memory.timestamp()
+        );
+        let arena = unsafe {
+            // SAFETY: executor_idx is guarantee to be within bounds by ProgramHandler constructor
+            let air_idx = *self
+                .executor_idx_to_air_idx
+                .get_unchecked(pc_entry.executor_idx as usize);
+            // SAFETY: air_idx is a valid AIR index in the vkey, and always construct arenas with
+            // length equal to num_airs
+            state.ctx.arenas.get_unchecked_mut(air_idx)
+        };
+        let state_mut = VmStateMut {
+            pc: &mut state.vm_state.pc,
+            memory: &mut state.vm_state.memory,
+            streams: &mut state.vm_state.streams,
+            rng: &mut state.vm_state.rng,
+            custom_pvs: &mut state.vm_state.custom_pvs,
+            ctx: arena,
+            #[cfg(feature = "metrics")]
+            metrics: &mut state.vm_state.metrics,
+        };
+        executor.execute(state_mut, &pc_entry.insn)?;
+
+        #[cfg(feature = "metrics")]
+        {
+            crate::metrics::update_instruction_metrics(state, executor, pc, pc_entry);
+        }
+
+        Ok(())
+    }
+}
+
+impl<F> PcEntry<F> {
+    pub fn is_some(&self) -> bool {
+        self.executor_idx != u32::MAX
+    }
+}
+
+impl<F: Default> PcEntry<F> {
+    fn undefined() -> Self {
+        Self {
+            insn: Instruction::default(),
+            executor_idx: u32::MAX,
+        }
+    }
+}
+
+/// Macro for executing and emitting metrics for instructions/s and number of instructions executed.
+/// Does not include any tracing span.
+#[macro_export]
+macro_rules! execute_spanned {
+    ($name:literal, $executor:expr, $state:expr) => {{
+        #[cfg(feature = "metrics")]
+        let start = std::time::Instant::now();
+        #[cfg(feature = "metrics")]
+        let start_instret = $state.instret;
+
+        let result = $executor.execute_from_state($state);
+
+        #[cfg(feature = "metrics")]
+        {
+            let elapsed = start.elapsed();
+            let insns = $state.instret - start_instret;
+            tracing::info!("instructions_executed={insns}");
+            metrics::counter!(concat!($name, "_insns")).absolute(insns);
+            metrics::gauge!(concat!($name, "_insn_mi/s"))
+                .set(insns as f64 / elapsed.as_micros() as f64);
+        }
+        result
+    }};
+}
diff --git a/crates/vm/src/arch/mod.rs b/crates/vm/src/arch/mod.rs
index 63ee5e6f8b..974b86008e 100644
--- a/crates/vm/src/arch/mod.rs
+++ b/crates/vm/src/arch/mod.rs
@@ -1,26 +1,36 @@
 mod config;
 /// Instruction execution traits and types.
 /// Execution bus and interface.
-mod execution;
+pub mod execution;
+/// Execution context types for different execution modes.
+pub mod execution_mode;
 /// Traits and builders to compose collections of chips into a virtual machine.
 mod extensions;
 /// Traits and wrappers to facilitate VM chip integration
 mod integration_api;
-/// Runtime execution and segmentation
-pub mod segment;
-/// Top level [VirtualMachine] constructor and API.
+/// [RecordArena] trait definitions and implementations. Currently there are two concrete
+/// implementations: [MatrixRecordArena] and [DenseRecordArena].
+mod record_arena;
+/// VM state definitions
+mod state;
+/// Top level [VmExecutor] and [VirtualMachine] constructor and API.
 pub mod vm;
 
-pub use openvm_instructions as instructions;
-
 pub mod hasher;
+/// Interpreter for pure and metered VM execution
+pub mod interpreter;
+/// Interpreter for preflight VM execution, for trace generation purposes.
+pub mod interpreter_preflight;
 /// Testing framework
 #[cfg(any(test, feature = "test-utils"))]
 pub mod testing;
 
 pub use config::*;
 pub use execution::*;
+pub use execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait};
 pub use extensions::*;
 pub use integration_api::*;
-pub use segment::*;
+pub use openvm_instructions as instructions;
+pub use record_arena::*;
+pub use state::*;
 pub use vm::*;
diff --git a/crates/vm/src/arch/record_arena.rs b/crates/vm/src/arch/record_arena.rs
new file mode 100644
index 0000000000..cfb269d095
--- /dev/null
+++ b/crates/vm/src/arch/record_arena.rs
@@ -0,0 +1,667 @@
+use std::{
+    borrow::BorrowMut,
+    io::Cursor,
+    marker::PhantomData,
+    ptr::{copy_nonoverlapping, slice_from_raw_parts_mut},
+};
+
+use openvm_circuit_primitives::utils::next_power_of_two_or_zero;
+use openvm_stark_backend::{
+    p3_field::{Field, PrimeField32},
+    p3_matrix::dense::RowMajorMatrix,
+};
+
+pub trait Arena {
+    /// Currently `width` always refers to the main trace width.
+    fn with_capacity(height: usize, width: usize) -> Self;
+
+    fn is_empty(&self) -> bool;
+
+    /// Only used for metric collection purposes. Intended usage is that for a record arena that
+    /// corresponds to a single trace matrix, this function can extract the current number of used
+    /// rows of the corresponding trace matrix. This is currently expected to work only for
+    /// [MatrixRecordArena].
+    #[cfg(feature = "metrics")]
+    fn current_trace_height(&self) -> usize {
+        0
+    }
+}
+
+/// Given some minimum layout of type `Layout`, the `RecordArena` should allocate a buffer, of
+/// size possibly larger than the record, and then return mutable pointers to the record within the
+/// buffer.
+pub trait RecordArena<'a, Layout, RecordMut> {
+    /// Allocates underlying buffer and returns a mutable reference `RecordMut`.
+    /// Note that calling this function may not call an underlying memory allocation as the record
+    /// arena may be virtual.
+    fn alloc(&'a mut self, layout: Layout) -> RecordMut;
+}
+
+/// Helper trait for arenas backed by row-major matrices.
+pub trait RowMajorMatrixArena<F>: Arena {
+    /// Set the arena's capacity based on the projected trace height.
+    fn set_capacity(&mut self, trace_height: usize);
+    fn width(&self) -> usize;
+    fn trace_offset(&self) -> usize;
+    fn into_matrix(self) -> RowMajorMatrix<F>;
+}
+
+/// `SizedRecord` is a trait that provides additional information about the size and alignment
+/// requirements of a record. Should be implemented on RecordMut types
+pub trait SizedRecord<Layout> {
+    /// The minimal size in bytes that the RecordMut requires to be properly constructed
+    /// given the layout.
+    fn size(layout: &Layout) -> usize;
+    /// The minimal alignment required for the RecordMut to be properly constructed
+    /// given the layout.
+    fn alignment(layout: &Layout) -> usize;
+}
+
+impl<Layout, Record> SizedRecord<Layout> for &mut Record
+where
+    Record: Sized,
+{
+    fn size(_layout: &Layout) -> usize {
+        size_of::<Record>()
+    }
+
+    fn alignment(_layout: &Layout) -> usize {
+        align_of::<Record>()
+    }
+}
+
+// =================== Arena Implementations =========================
+
+#[derive(Default)]
+pub struct MatrixRecordArena<F> {
+    pub trace_buffer: Vec<F>,
+    pub width: usize,
+    pub trace_offset: usize,
+    /// The arena is created with a specified capacity, but may be truncated before being converted
+    /// into a [RowMajorMatrix] if `allow_truncate == true`. If `allow_truncate == false`, then the
+    /// matrix will never be truncated. The latter is used if the trace matrix must have fixed
+    /// dimensions (e.g., for a static verifier).
+    pub(super) allow_truncate: bool,
+}
+
+impl<F: Field> MatrixRecordArena<F> {
+    pub fn alloc_single_row(&mut self) -> &mut [u8] {
+        self.alloc_buffer(1)
+    }
+
+    pub fn alloc_buffer(&mut self, num_rows: usize) -> &mut [u8] {
+        let start = self.trace_offset;
+        self.trace_offset += num_rows * self.width;
+        let row_slice = &mut self.trace_buffer[start..self.trace_offset];
+        let size = size_of_val(row_slice);
+        let ptr = row_slice as *mut [F] as *mut u8;
+        // SAFETY:
+        // - `ptr` is non-null
+        // - `size` is correct
+        // - alignment of `u8` is always satisfied
+        unsafe { &mut *std::ptr::slice_from_raw_parts_mut(ptr, size) }
+    }
+
+    pub fn force_matrix_dimensions(&mut self) {
+        self.allow_truncate = false;
+    }
+}
+
+impl<F: Field> Arena for MatrixRecordArena<F> {
+    fn with_capacity(height: usize, width: usize) -> Self {
+        let height = next_power_of_two_or_zero(height);
+        let trace_buffer = F::zero_vec(height * width);
+        Self {
+            trace_buffer,
+            width,
+            trace_offset: 0,
+            allow_truncate: true,
+        }
+    }
+
+    fn is_empty(&self) -> bool {
+        self.trace_offset == 0
+    }
+
+    #[cfg(feature = "metrics")]
+    fn current_trace_height(&self) -> usize {
+        self.trace_offset / self.width
+    }
+}
+
+impl<F: Field> RowMajorMatrixArena<F> for MatrixRecordArena<F> {
+    fn set_capacity(&mut self, trace_height: usize) {
+        let size = trace_height * self.width;
+        // PERF: use memset
+        self.trace_buffer.resize(size, F::ZERO);
+    }
+
+    fn width(&self) -> usize {
+        self.width
+    }
+
+    fn trace_offset(&self) -> usize {
+        self.trace_offset
+    }
+
+    fn into_matrix(mut self) -> RowMajorMatrix<F> {
+        let width = self.width();
+        assert_eq!(self.trace_offset() % width, 0);
+        let rows_used = self.trace_offset() / width;
+        let height = next_power_of_two_or_zero(rows_used);
+        // This should be automatic since trace_buffer's height is a power of two:
+        assert!(height.checked_mul(width).unwrap() <= self.trace_buffer.len());
+        if self.allow_truncate {
+            self.trace_buffer.truncate(height * width);
+        } else {
+            assert_eq!(self.trace_buffer.len() % width, 0);
+            let height = self.trace_buffer.len() / width;
+            assert!(height.is_power_of_two() || height == 0);
+        }
+        RowMajorMatrix::new(self.trace_buffer, self.width)
+    }
+}
+
+pub struct DenseRecordArena {
+    pub records_buffer: Cursor<Vec<u8>>,
+}
+
+const MAX_ALIGNMENT: usize = 32;
+
+impl DenseRecordArena {
+    /// Creates a new [DenseRecordArena] with the given capacity in bytes.
+    pub fn with_byte_capacity(size_bytes: usize) -> Self {
+        let buffer = vec![0; size_bytes + MAX_ALIGNMENT];
+        let offset = (MAX_ALIGNMENT - (buffer.as_ptr() as usize % MAX_ALIGNMENT)) % MAX_ALIGNMENT;
+        let mut cursor = Cursor::new(buffer);
+        cursor.set_position(offset as u64);
+        Self {
+            records_buffer: cursor,
+        }
+    }
+
+    pub fn set_byte_capacity(&mut self, size_bytes: usize) {
+        let buffer = vec![0; size_bytes + MAX_ALIGNMENT];
+        let offset = (MAX_ALIGNMENT - (buffer.as_ptr() as usize % MAX_ALIGNMENT)) % MAX_ALIGNMENT;
+        let mut cursor = Cursor::new(buffer);
+        cursor.set_position(offset as u64);
+        self.records_buffer = cursor;
+    }
+
+    /// Returns the allocated size of the arena in bytes.
+    ///
+    /// **Note**: This may include additional bytes for alignment.
+    pub fn capacity(&self) -> usize {
+        self.records_buffer.get_ref().len()
+    }
+
+    /// Allocates `count` bytes and returns as a mutable slice.
+    pub fn alloc_bytes<'a>(&mut self, count: usize) -> &'a mut [u8] {
+        let begin = self.records_buffer.position();
+        debug_assert!(
+            begin as usize + count <= self.records_buffer.get_ref().len(),
+            "failed to allocate {count} bytes from {begin} when the capacity is {}",
+            self.records_buffer.get_ref().len()
+        );
+        self.records_buffer.set_position(begin + count as u64);
+        unsafe {
+            std::slice::from_raw_parts_mut(
+                self.records_buffer
+                    .get_mut()
+                    .as_mut_ptr()
+                    .add(begin as usize),
+                count,
+            )
+        }
+    }
+
+    pub fn allocated(&self) -> &[u8] {
+        let size = self.records_buffer.position() as usize;
+        let offset = (MAX_ALIGNMENT
+            - (self.records_buffer.get_ref().as_ptr() as usize % MAX_ALIGNMENT))
+            % MAX_ALIGNMENT;
+        &self.records_buffer.get_ref()[offset..size]
+    }
+
+    pub fn allocated_mut(&mut self) -> &mut [u8] {
+        let size = self.records_buffer.position() as usize;
+        let offset = (MAX_ALIGNMENT
+            - (self.records_buffer.get_ref().as_ptr() as usize % MAX_ALIGNMENT))
+            % MAX_ALIGNMENT;
+        &mut self.records_buffer.get_mut()[offset..size]
+    }
+
+    pub fn align_to(&mut self, alignment: usize) {
+        debug_assert!(MAX_ALIGNMENT % alignment == 0);
+        let offset =
+            (alignment - (self.records_buffer.get_ref().as_ptr() as usize % alignment)) % alignment;
+        self.records_buffer.set_position(offset as u64);
+    }
+
+    // Returns a [RecordSeeker] on the allocated buffer
+    pub fn get_record_seeker<R, L>(&mut self) -> RecordSeeker<DenseRecordArena, R, L> {
+        RecordSeeker::new(self.allocated_mut())
+    }
+}
+
+impl Arena for DenseRecordArena {
+    // TODO[jpw]: treat `width` as AIR width in number of columns for now
+    fn with_capacity(height: usize, width: usize) -> Self {
+        let size_bytes = height * (width * size_of::<u32>());
+        Self::with_byte_capacity(size_bytes)
+    }
+
+    fn is_empty(&self) -> bool {
+        self.allocated().is_empty()
+    }
+}
+
+// =================== Helper Functions =================================
+
+/// Converts a field element slice into a record type.
+/// This function transmutes the `&mut [F]` to raw bytes,
+/// then uses the `CustomBorrow` trait to transmute to the desired record type `T`.
+/// ## Safety
+/// `slice` must satisfy the requirements of the `CustomBorrow` trait.
+pub unsafe fn get_record_from_slice<'a, T, F, L>(slice: &mut &'a mut [F], layout: L) -> T
+where
+    [u8]: CustomBorrow<'a, T, L>,
+{
+    // The alignment of `[u8]` is always satisfiedƒ
+    let record_buffer =
+        &mut *slice_from_raw_parts_mut(slice.as_mut_ptr() as *mut u8, size_of_val::<[F]>(*slice));
+    let record: T = record_buffer.custom_borrow(layout);
+    record
+}
+
+/// A trait that allows for custom implementation of `borrow` given the necessary information
+/// This is useful for record structs that have dynamic size
+pub trait CustomBorrow<'a, T, L> {
+    fn custom_borrow(&'a mut self, layout: L) -> T;
+
+    /// Given `&self` as a valid starting pointer of a reference that has already been previously
+    /// allocated and written to, extracts and returns the corresponding layout.
+    /// This must work even if `T` is not sized.
+    ///
+    /// # Safety
+    /// - `&self` must be a valid starting pointer on which `custom_borrow` has already been called
+    /// - The data underlying `&self` has already been written to and is self-describing, so layout
+    ///   can be extracted
+    unsafe fn extract_layout(&self) -> L;
+}
+
+// This is a helper struct that implements a few utility methods
+pub struct RecordSeeker<'a, RA, RecordMut, Layout> {
+    pub buffer: &'a mut [u8], // The buffer that the records are written to
+    _phantom: PhantomData<(RA, RecordMut, Layout)>,
+}
+
+impl<'a, RA, RecordMut, Layout> RecordSeeker<'a, RA, RecordMut, Layout> {
+    pub fn new(record_buffer: &'a mut [u8]) -> Self {
+        Self {
+            buffer: record_buffer,
+            _phantom: PhantomData,
+        }
+    }
+}
+
+// `RecordSeeker` implementation for [DenseRecordArena], with [MultiRowLayout]
+// **NOTE** Assumes that `layout` can be extracted from the record alone
+impl<'a, R, M> RecordSeeker<'a, DenseRecordArena, R, MultiRowLayout<M>>
+where
+    [u8]: CustomBorrow<'a, R, MultiRowLayout<M>>,
+    R: SizedRecord<MultiRowLayout<M>>,
+    M: MultiRowMetadata + Clone,
+{
+    // Returns the layout at the given offset in the buffer
+    // **SAFETY**: `offset` has to be a valid offset, pointing to the start of a record
+    pub fn get_layout_at(offset: &mut usize, buffer: &[u8]) -> MultiRowLayout<M> {
+        let buffer = &buffer[*offset..];
+        unsafe { buffer.extract_layout() }
+    }
+
+    // Returns a record at the given offset in the buffer
+    // **SAFETY**: `offset` has to be a valid offset, pointing to the start of a record
+    pub fn get_record_at(offset: &mut usize, buffer: &'a mut [u8]) -> R {
+        let layout = Self::get_layout_at(offset, buffer);
+        let buffer = &mut buffer[*offset..];
+        let record_size = R::size(&layout);
+        let record_alignment = R::alignment(&layout);
+        let aligned_record_size = record_size.next_multiple_of(record_alignment);
+        let record: R = buffer.custom_borrow(layout);
+        *offset += aligned_record_size;
+        record
+    }
+
+    // Returns a vector of all the records in the buffer
+    pub fn extract_records(&'a mut self) -> Vec<R> {
+        let mut records = Vec::new();
+        let len = self.buffer.len();
+        let buff = &mut self.buffer[..];
+        let mut offset = 0;
+        while offset < len {
+            let record: R = {
+                let buff = unsafe { &mut *slice_from_raw_parts_mut(buff.as_mut_ptr(), len) };
+                Self::get_record_at(&mut offset, buff)
+            };
+            records.push(record);
+        }
+        records
+    }
+
+    // Transfers the records in the buffer to a [MatrixRecordArena], used in testing
+    pub fn transfer_to_matrix_arena<F: PrimeField32>(
+        &'a mut self,
+        arena: &mut MatrixRecordArena<F>,
+    ) {
+        let len = self.buffer.len();
+        arena.trace_offset = 0;
+        let mut offset = 0;
+        while offset < len {
+            let layout = Self::get_layout_at(&mut offset, self.buffer);
+            let record_size = R::size(&layout);
+            let record_alignment = R::alignment(&layout);
+            let aligned_record_size = record_size.next_multiple_of(record_alignment);
+            let src_ptr = unsafe { self.buffer.as_ptr().add(offset) };
+            let dst_ptr = arena
+                .alloc_buffer(layout.metadata.get_num_rows())
+                .as_mut_ptr();
+            unsafe { copy_nonoverlapping(src_ptr, dst_ptr, aligned_record_size) };
+            offset += aligned_record_size;
+        }
+    }
+}
+
+// `RecordSeeker` implementation for [DenseRecordArena], with [AdapterCoreLayout]
+// **NOTE** Assumes that `layout` is the same for all the records, so it is expected to be passed as
+// a parameter
+impl<'a, A, C, M> RecordSeeker<'a, DenseRecordArena, (A, C), AdapterCoreLayout<M>>
+where
+    [u8]: CustomBorrow<'a, A, AdapterCoreLayout<M>> + CustomBorrow<'a, C, AdapterCoreLayout<M>>,
+    A: SizedRecord<AdapterCoreLayout<M>>,
+    C: SizedRecord<AdapterCoreLayout<M>>,
+    M: AdapterCoreMetadata + Clone,
+{
+    // Returns the aligned sizes of the adapter and core records given their layout
+    pub fn get_aligned_sizes(layout: &AdapterCoreLayout<M>) -> (usize, usize) {
+        let adapter_alignment = A::alignment(layout);
+        let core_alignment = C::alignment(layout);
+        let adapter_size = A::size(layout);
+        let aligned_adapter_size = adapter_size.next_multiple_of(core_alignment);
+        let core_size = C::size(layout);
+        let aligned_core_size = (aligned_adapter_size + core_size)
+            .next_multiple_of(adapter_alignment)
+            - aligned_adapter_size;
+        (aligned_adapter_size, aligned_core_size)
+    }
+
+    // Returns the aligned size of a single record given its layout
+    pub fn get_aligned_record_size(layout: &AdapterCoreLayout<M>) -> usize {
+        let (adapter_size, core_size) = Self::get_aligned_sizes(layout);
+        adapter_size + core_size
+    }
+
+    // Returns a record at the given offset in the buffer
+    // **SAFETY**: `offset` has to be a valid offset, pointing to the start of a record
+    pub fn get_record_at(
+        offset: &mut usize,
+        buffer: &'a mut [u8],
+        layout: AdapterCoreLayout<M>,
+    ) -> (A, C) {
+        let buffer = &mut buffer[*offset..];
+        let (adapter_size, core_size) = Self::get_aligned_sizes(&layout);
+        let (adapter_buffer, core_buffer) = unsafe { buffer.split_at_mut_unchecked(adapter_size) };
+        let adapter_record: A = adapter_buffer.custom_borrow(layout.clone());
+        let core_record: C = core_buffer.custom_borrow(layout);
+        *offset += adapter_size + core_size;
+        (adapter_record, core_record)
+    }
+
+    // Returns a vector of all the records in the buffer
+    pub fn extract_records(&'a mut self, layout: AdapterCoreLayout<M>) -> Vec<(A, C)> {
+        let mut records = Vec::new();
+        let len = self.buffer.len();
+        let buff = &mut self.buffer[..];
+        let mut offset = 0;
+        while offset < len {
+            let record: (A, C) = {
+                let buff = unsafe { &mut *slice_from_raw_parts_mut(buff.as_mut_ptr(), len) };
+                Self::get_record_at(&mut offset, buff, layout.clone())
+            };
+            records.push(record);
+        }
+        records
+    }
+
+    // Transfers the records in the buffer to a [MatrixRecordArena], used in testing
+    pub fn transfer_to_matrix_arena<F: PrimeField32>(
+        &'a mut self,
+        arena: &mut MatrixRecordArena<F>,
+        layout: AdapterCoreLayout<M>,
+    ) {
+        let len = self.buffer.len();
+        arena.trace_offset = 0;
+        let mut offset = 0;
+        let (adapter_size, core_size) = Self::get_aligned_sizes(&layout);
+        while offset < len {
+            let dst_buffer = arena.alloc_single_row();
+            let (adapter_buf, core_buf) =
+                unsafe { dst_buffer.split_at_mut_unchecked(M::get_adapter_width()) };
+            unsafe {
+                let src_ptr = self.buffer.as_ptr().add(offset);
+                copy_nonoverlapping(src_ptr, adapter_buf.as_mut_ptr(), adapter_size);
+                copy_nonoverlapping(src_ptr.add(adapter_size), core_buf.as_mut_ptr(), core_size);
+            }
+            offset += adapter_size + core_size;
+        }
+    }
+}
+
+// ============================== MultiRowLayout =======================================
+
+/// Minimal layout information that [RecordArena] requires for record allocation
+/// in scenarios involving chips that:
+/// - can have multiple rows per record, and
+/// - have possibly variable length records
+///
+/// **NOTE**: `M` is the metadata type that implements `MultiRowMetadata`
+#[derive(Debug, Clone, Default, derive_new::new)]
+pub struct MultiRowLayout<M> {
+    pub metadata: M,
+}
+
+/// `Metadata` types need to implement this trait to be used with `MultiRowLayout`
+pub trait MultiRowMetadata {
+    fn get_num_rows(&self) -> usize;
+}
+
+/// Empty metadata that implements `MultiRowMetadata` with `get_num_rows` always returning 1
+#[derive(Debug, Clone, Default, derive_new::new)]
+pub struct EmptyMultiRowMetadata {}
+
+impl MultiRowMetadata for EmptyMultiRowMetadata {
+    #[inline(always)]
+    fn get_num_rows(&self) -> usize {
+        1
+    }
+}
+
+/// Empty metadata that implements `MultiRowMetadata`
+pub type EmptyMultiRowLayout = MultiRowLayout<EmptyMultiRowMetadata>;
+
+/// If a struct implements `BorrowMut<T>`, then the same implementation can be used for
+/// `CustomBorrow::custom_borrow` with any layout
+impl<'a, T: Sized, L: Default> CustomBorrow<'a, &'a mut T, L> for [u8]
+where
+    [u8]: BorrowMut<T>,
+{
+    fn custom_borrow(&'a mut self, _layout: L) -> &'a mut T {
+        self.borrow_mut()
+    }
+
+    unsafe fn extract_layout(&self) -> L {
+        L::default()
+    }
+}
+
+/// [RecordArena] implementation for [MatrixRecordArena], with [MultiRowLayout]
+/// **NOTE**: `R` is the RecordMut type
+impl<'a, F: Field, M: MultiRowMetadata, R> RecordArena<'a, MultiRowLayout<M>, R>
+    for MatrixRecordArena<F>
+where
+    [u8]: CustomBorrow<'a, R, MultiRowLayout<M>>,
+{
+    fn alloc(&'a mut self, layout: MultiRowLayout<M>) -> R {
+        let buffer = self.alloc_buffer(layout.metadata.get_num_rows());
+        let record: R = buffer.custom_borrow(layout);
+        record
+    }
+}
+
+/// [RecordArena] implementation for [DenseRecordArena], with [MultiRowLayout]
+/// **NOTE**: `R` is the RecordMut type
+impl<'a, R, M> RecordArena<'a, MultiRowLayout<M>, R> for DenseRecordArena
+where
+    [u8]: CustomBorrow<'a, R, MultiRowLayout<M>>,
+    R: SizedRecord<MultiRowLayout<M>>,
+{
+    fn alloc(&'a mut self, layout: MultiRowLayout<M>) -> R {
+        let record_size = R::size(&layout);
+        let record_alignment = R::alignment(&layout);
+        let aligned_record_size = record_size.next_multiple_of(record_alignment);
+        let buffer = self.alloc_bytes(aligned_record_size);
+        let record: R = buffer.custom_borrow(layout);
+        record
+    }
+}
+
+// ============================== AdapterCoreLayout =======================================
+// This is for integration_api usage
+
+/// Minimal layout information that [RecordArena] requires for record allocation
+/// in scenarios involving chips that:
+/// - have a single row per record, and
+/// - have trace row = [adapter_row, core_row]
+///
+/// **NOTE**: `M` is the metadata type that implements `AdapterCoreMetadata`
+#[derive(Debug, Clone, Default)]
+pub struct AdapterCoreLayout<M> {
+    pub metadata: M,
+}
+
+/// `Metadata` types need to implement this trait to be used with `AdapterCoreLayout`
+/// **NOTE**: get_adapter_width returns the size in bytes
+pub trait AdapterCoreMetadata {
+    fn get_adapter_width() -> usize;
+}
+
+impl<M> AdapterCoreLayout<M> {
+    pub fn new() -> Self
+    where
+        M: Default,
+    {
+        Self::default()
+    }
+
+    pub fn with_metadata(metadata: M) -> Self {
+        Self { metadata }
+    }
+}
+
+/// Empty metadata that implements `AdapterCoreMetadata`
+/// **NOTE**: `AS` is the adapter type that implements `AdapterTraceExecutor`
+/// **WARNING**: `AS::WIDTH` is the number of field elements, not the size in bytes
+pub struct AdapterCoreEmptyMetadata<F, AS> {
+    _phantom: PhantomData<(F, AS)>,
+}
+
+impl<F, AS> Clone for AdapterCoreEmptyMetadata<F, AS> {
+    fn clone(&self) -> Self {
+        Self {
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<F, AS> AdapterCoreEmptyMetadata<F, AS> {
+    pub fn new() -> Self {
+        Self {
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<F, AS> Default for AdapterCoreEmptyMetadata<F, AS> {
+    fn default() -> Self {
+        Self {
+            _phantom: PhantomData,
+        }
+    }
+}
+
+impl<F, AS> AdapterCoreMetadata for AdapterCoreEmptyMetadata<F, AS>
+where
+    AS: super::AdapterTraceExecutor<F>,
+{
+    #[inline(always)]
+    fn get_adapter_width() -> usize {
+        AS::WIDTH * size_of::<F>()
+    }
+}
+
+/// AdapterCoreLayout with empty metadata that can be used by chips that have record type
+/// (&mut A, &mut C) where `A` and `C` are `Sized`
+pub type EmptyAdapterCoreLayout<F, AS> = AdapterCoreLayout<AdapterCoreEmptyMetadata<F, AS>>;
+
+/// [RecordArena] implementation for [MatrixRecordArena], with [AdapterCoreLayout]
+/// **NOTE**: `A` is the adapter RecordMut type and `C` is the core RecordMut type
+impl<'a, F: Field, A, C, M: AdapterCoreMetadata> RecordArena<'a, AdapterCoreLayout<M>, (A, C)>
+    for MatrixRecordArena<F>
+where
+    [u8]: CustomBorrow<'a, A, AdapterCoreLayout<M>> + CustomBorrow<'a, C, AdapterCoreLayout<M>>,
+    M: Clone,
+{
+    fn alloc(&'a mut self, layout: AdapterCoreLayout<M>) -> (A, C) {
+        let adapter_width = M::get_adapter_width();
+        let buffer = self.alloc_single_row();
+        // Doing a unchecked split here for perf
+        let (adapter_buffer, core_buffer) = unsafe { buffer.split_at_mut_unchecked(adapter_width) };
+
+        let adapter_record: A = adapter_buffer.custom_borrow(layout.clone());
+        let core_record: C = core_buffer.custom_borrow(layout);
+
+        (adapter_record, core_record)
+    }
+}
+
+/// [RecordArena] implementation for [DenseRecordArena], with [AdapterCoreLayout]
+/// **NOTE**: `A` is the adapter RecordMut type and `C` is the core record type
+impl<'a, A, C, M> RecordArena<'a, AdapterCoreLayout<M>, (A, C)> for DenseRecordArena
+where
+    [u8]: CustomBorrow<'a, A, AdapterCoreLayout<M>> + CustomBorrow<'a, C, AdapterCoreLayout<M>>,
+    M: Clone,
+    A: SizedRecord<AdapterCoreLayout<M>>,
+    C: SizedRecord<AdapterCoreLayout<M>>,
+{
+    fn alloc(&'a mut self, layout: AdapterCoreLayout<M>) -> (A, C) {
+        let adapter_alignment = A::alignment(&layout);
+        let core_alignment = C::alignment(&layout);
+        let adapter_size = A::size(&layout);
+        let aligned_adapter_size = adapter_size.next_multiple_of(core_alignment);
+        let core_size = C::size(&layout);
+        let aligned_core_size = (aligned_adapter_size + core_size)
+            .next_multiple_of(adapter_alignment)
+            - aligned_adapter_size;
+        debug_assert_eq!(MAX_ALIGNMENT % adapter_alignment, 0);
+        debug_assert_eq!(MAX_ALIGNMENT % core_alignment, 0);
+        let buffer = self.alloc_bytes(aligned_adapter_size + aligned_core_size);
+        // Doing an unchecked split here for perf
+        let (adapter_buffer, core_buffer) =
+            unsafe { buffer.split_at_mut_unchecked(aligned_adapter_size) };
+
+        let adapter_record: A = adapter_buffer.custom_borrow(layout.clone());
+        let core_record: C = core_buffer.custom_borrow(layout);
+
+        (adapter_record, core_record)
+    }
+}
diff --git a/crates/vm/src/arch/segment.rs b/crates/vm/src/arch/segment.rs
deleted file mode 100644
index 634632ce2b..0000000000
--- a/crates/vm/src/arch/segment.rs
+++ /dev/null
@@ -1,387 +0,0 @@
-use std::sync::Arc;
-
-use backtrace::Backtrace;
-use openvm_instructions::{
-    exe::FnBounds,
-    instruction::{DebugInfo, Instruction},
-    program::Program,
-};
-use openvm_stark_backend::{
-    config::{Domain, StarkGenericConfig},
-    keygen::types::LinearConstraint,
-    p3_commit::PolynomialSpace,
-    p3_field::PrimeField32,
-    prover::types::{CommittedTraceData, ProofInput},
-    utils::metrics_span,
-    Chip,
-};
-
-use super::{
-    ExecutionError, GenerationError, Streams, SystemBase, SystemConfig, VmChipComplex,
-    VmComplexTraceHeights, VmConfig,
-};
-#[cfg(feature = "bench-metrics")]
-use crate::metrics::VmMetrics;
-use crate::{
-    arch::{instructions::*, ExecutionState, InstructionExecutor},
-    system::memory::MemoryImage,
-};
-
-/// Check segment every 100 instructions.
-const SEGMENT_CHECK_INTERVAL: usize = 100;
-
-const DEFAULT_MAX_SEGMENT_LEN: usize = (1 << 22) - 100;
-// a heuristic number for the maximum number of cells per chip in a segment
-// a few reasons for this number:
-//  1. `VmAirWrapper<Rv32BaseAluAdapterAir, BaseAluCoreAir<4, 8>` is
-//    the chip with the most cells in a segment from the reth-benchmark.
-//  2. `VmAirWrapper<Rv32BaseAluAdapterAir, BaseAluCoreAir<4, 8>`:
-//    its trace width is 36 and its after challenge trace width is 80.
-const DEFAULT_MAX_CELLS_PER_CHIP_IN_SEGMENT: usize = DEFAULT_MAX_SEGMENT_LEN * 120;
-
-pub trait SegmentationStrategy:
-    std::fmt::Debug + Send + Sync + std::panic::UnwindSafe + std::panic::RefUnwindSafe
-{
-    /// Whether the execution should segment based on the trace heights and cells.
-    ///
-    /// Air names are provided for debugging purposes.
-    fn should_segment(
-        &self,
-        air_names: &[String],
-        trace_heights: &[usize],
-        trace_cells: &[usize],
-    ) -> bool;
-
-    /// A strategy that segments more aggressively than the current one.
-    ///
-    /// Called when `should_segment` results in a segment that is infeasible. Execution will be
-    /// re-run with the stricter segmentation strategy.
-    fn stricter_strategy(&self) -> Arc<dyn SegmentationStrategy>;
-}
-
-/// Default segmentation strategy: segment if any chip's height or cells exceed the limits.
-#[derive(Debug, Clone)]
-pub struct DefaultSegmentationStrategy {
-    max_segment_len: usize,
-    max_cells_per_chip_in_segment: usize,
-}
-
-impl Default for DefaultSegmentationStrategy {
-    fn default() -> Self {
-        Self {
-            max_segment_len: DEFAULT_MAX_SEGMENT_LEN,
-            max_cells_per_chip_in_segment: DEFAULT_MAX_CELLS_PER_CHIP_IN_SEGMENT,
-        }
-    }
-}
-
-impl DefaultSegmentationStrategy {
-    pub fn new_with_max_segment_len(max_segment_len: usize) -> Self {
-        Self {
-            max_segment_len,
-            max_cells_per_chip_in_segment: max_segment_len * 120,
-        }
-    }
-
-    pub fn new(max_segment_len: usize, max_cells_per_chip_in_segment: usize) -> Self {
-        Self {
-            max_segment_len,
-            max_cells_per_chip_in_segment,
-        }
-    }
-
-    pub fn max_segment_len(&self) -> usize {
-        self.max_segment_len
-    }
-}
-
-const SEGMENTATION_BACKOFF_FACTOR: usize = 4;
-
-impl SegmentationStrategy for DefaultSegmentationStrategy {
-    fn should_segment(
-        &self,
-        air_names: &[String],
-        trace_heights: &[usize],
-        trace_cells: &[usize],
-    ) -> bool {
-        for (i, &height) in trace_heights.iter().enumerate() {
-            if height > self.max_segment_len {
-                tracing::info!(
-                    "Should segment because chip {} (name: {}) has height {}",
-                    i,
-                    air_names[i],
-                    height
-                );
-                return true;
-            }
-        }
-        for (i, &num_cells) in trace_cells.iter().enumerate() {
-            if num_cells > self.max_cells_per_chip_in_segment {
-                tracing::info!(
-                    "Should segment because chip {} (name: {}) has {} cells",
-                    i,
-                    air_names[i],
-                    num_cells
-                );
-                return true;
-            }
-        }
-        false
-    }
-
-    fn stricter_strategy(&self) -> Arc<dyn SegmentationStrategy> {
-        Arc::new(Self {
-            max_segment_len: self.max_segment_len / SEGMENTATION_BACKOFF_FACTOR,
-            max_cells_per_chip_in_segment: self.max_cells_per_chip_in_segment
-                / SEGMENTATION_BACKOFF_FACTOR,
-        })
-    }
-}
-
-pub struct ExecutionSegment<F, VC>
-where
-    F: PrimeField32,
-    VC: VmConfig<F>,
-{
-    pub chip_complex: VmChipComplex<F, VC::Executor, VC::Periphery>,
-    /// Memory image after segment was executed. Not used in trace generation.
-    pub final_memory: Option<MemoryImage<F>>,
-
-    pub since_last_segment_check: usize,
-    pub trace_height_constraints: Vec<LinearConstraint>,
-
-    /// Air names for debug purposes only.
-    pub(crate) air_names: Vec<String>,
-    /// Metrics collected for this execution segment alone.
-    #[cfg(feature = "bench-metrics")]
-    pub metrics: VmMetrics,
-}
-
-pub struct ExecutionSegmentState {
-    pub pc: u32,
-    pub is_terminated: bool,
-}
-
-impl<F: PrimeField32, VC: VmConfig<F>> ExecutionSegment<F, VC> {
-    /// Creates a new execution segment from a program and initial state, using parent VM config
-    pub fn new(
-        config: &VC,
-        program: Program<F>,
-        init_streams: Streams<F>,
-        initial_memory: Option<MemoryImage<F>>,
-        trace_height_constraints: Vec<LinearConstraint>,
-        #[allow(unused_variables)] fn_bounds: FnBounds,
-    ) -> Self {
-        let mut chip_complex = config.create_chip_complex().unwrap();
-        chip_complex.set_streams(init_streams);
-        let program = if !config.system().profiling {
-            program.strip_debug_infos()
-        } else {
-            program
-        };
-        chip_complex.set_program(program);
-
-        if let Some(initial_memory) = initial_memory {
-            chip_complex.set_initial_memory(initial_memory);
-        }
-        let air_names = chip_complex.air_names();
-
-        Self {
-            chip_complex,
-            final_memory: None,
-            air_names,
-            trace_height_constraints,
-            #[cfg(feature = "bench-metrics")]
-            metrics: VmMetrics {
-                fn_bounds,
-                ..Default::default()
-            },
-            since_last_segment_check: 0,
-        }
-    }
-
-    pub fn system_config(&self) -> &SystemConfig {
-        self.chip_complex.config()
-    }
-
-    pub fn set_override_trace_heights(&mut self, overridden_heights: VmComplexTraceHeights) {
-        self.chip_complex
-            .set_override_system_trace_heights(overridden_heights.system);
-        self.chip_complex
-            .set_override_inventory_trace_heights(overridden_heights.inventory);
-    }
-
-    /// Stopping is triggered by should_segment()
-    pub fn execute_from_pc(
-        &mut self,
-        mut pc: u32,
-    ) -> Result<ExecutionSegmentState, ExecutionError> {
-        let mut timestamp = self.chip_complex.memory_controller().timestamp();
-        let mut prev_backtrace: Option<Backtrace> = None;
-
-        self.chip_complex
-            .connector_chip_mut()
-            .begin(ExecutionState::new(pc, timestamp));
-
-        let mut did_terminate = false;
-
-        loop {
-            #[allow(unused_variables)]
-            let (opcode, dsl_instr) = {
-                let Self {
-                    chip_complex,
-                    #[cfg(feature = "bench-metrics")]
-                    metrics,
-                    ..
-                } = self;
-                let SystemBase {
-                    program_chip,
-                    memory_controller,
-                    ..
-                } = &mut chip_complex.base;
-
-                let (instruction, debug_info) = program_chip.get_instruction(pc)?;
-                tracing::trace!("pc: {pc:#x} | time: {timestamp} | {:?}", instruction);
-
-                #[allow(unused_variables)]
-                let (dsl_instr, trace) = debug_info.as_ref().map_or(
-                    (None, None),
-                    |DebugInfo {
-                         dsl_instruction,
-                         trace,
-                     }| (Some(dsl_instruction), trace.as_ref()),
-                );
-
-                let &Instruction { opcode, c, .. } = instruction;
-                if opcode == SystemOpcode::TERMINATE.global_opcode() {
-                    did_terminate = true;
-                    self.chip_complex.connector_chip_mut().end(
-                        ExecutionState::new(pc, timestamp),
-                        Some(c.as_canonical_u32()),
-                    );
-                    break;
-                }
-
-                // Some phantom instruction handling is more convenient to do here than in
-                // PhantomChip.
-                if opcode == SystemOpcode::PHANTOM.global_opcode() {
-                    // Note: the discriminant is the lower 16 bits of the c operand.
-                    let discriminant = c.as_canonical_u32() as u16;
-                    let phantom = SysPhantom::from_repr(discriminant);
-                    tracing::trace!("pc: {pc:#x} | system phantom: {phantom:?}");
-                    match phantom {
-                        Some(SysPhantom::DebugPanic) => {
-                            if let Some(mut backtrace) = prev_backtrace {
-                                backtrace.resolve();
-                                eprintln!("openvm program failure; backtrace:\n{:?}", backtrace);
-                            } else {
-                                eprintln!("openvm program failure; no backtrace");
-                            }
-                            return Err(ExecutionError::Fail { pc });
-                        }
-                        Some(SysPhantom::CtStart) =>
-                        {
-                            #[cfg(feature = "bench-metrics")]
-                            metrics
-                                .cycle_tracker
-                                .start(dsl_instr.cloned().unwrap_or("Default".to_string()))
-                        }
-                        Some(SysPhantom::CtEnd) =>
-                        {
-                            #[cfg(feature = "bench-metrics")]
-                            metrics
-                                .cycle_tracker
-                                .end(dsl_instr.cloned().unwrap_or("Default".to_string()))
-                        }
-                        _ => {}
-                    }
-                }
-                prev_backtrace = trace.cloned();
-
-                if let Some(executor) = chip_complex.inventory.get_mut_executor(&opcode) {
-                    let next_state = InstructionExecutor::execute(
-                        executor,
-                        memory_controller,
-                        instruction,
-                        ExecutionState::new(pc, timestamp),
-                    )?;
-                    assert!(next_state.timestamp > timestamp);
-                    pc = next_state.pc;
-                    timestamp = next_state.timestamp;
-                } else {
-                    return Err(ExecutionError::DisabledOperation { pc, opcode });
-                };
-                (opcode, dsl_instr.cloned())
-            };
-
-            #[cfg(feature = "bench-metrics")]
-            self.update_instruction_metrics(pc, opcode, dsl_instr);
-
-            if self.should_segment() {
-                self.chip_complex
-                    .connector_chip_mut()
-                    .end(ExecutionState::new(pc, timestamp), None);
-                break;
-            }
-        }
-        self.final_memory = Some(
-            self.chip_complex
-                .base
-                .memory_controller
-                .memory_image()
-                .clone(),
-        );
-
-        Ok(ExecutionSegmentState {
-            pc,
-            is_terminated: did_terminate,
-        })
-    }
-
-    /// Generate ProofInput to prove the segment. Should be called after ::execute
-    pub fn generate_proof_input<SC: StarkGenericConfig>(
-        #[allow(unused_mut)] mut self,
-        cached_program: Option<CommittedTraceData<SC>>,
-    ) -> Result<ProofInput<SC>, GenerationError>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        metrics_span("trace_gen_time_ms", || {
-            self.chip_complex.generate_proof_input(
-                cached_program,
-                &self.trace_height_constraints,
-                #[cfg(feature = "bench-metrics")]
-                &mut self.metrics,
-            )
-        })
-    }
-
-    /// Returns bool of whether to switch to next segment or not. This is called every clock cycle
-    /// inside of Core trace generation.
-    fn should_segment(&mut self) -> bool {
-        if !self.system_config().continuation_enabled {
-            return false;
-        }
-        // Avoid checking segment too often.
-        if self.since_last_segment_check != SEGMENT_CHECK_INTERVAL {
-            self.since_last_segment_check += 1;
-            return false;
-        }
-        self.since_last_segment_check = 0;
-        let segmentation_strategy = &self.system_config().segmentation_strategy;
-        segmentation_strategy.should_segment(
-            &self.air_names,
-            &self
-                .chip_complex
-                .dynamic_trace_heights()
-                .collect::<Vec<_>>(),
-            &self.chip_complex.current_trace_cells(),
-        )
-    }
-
-    pub fn current_trace_cells(&self) -> Vec<usize> {
-        self.chip_complex.current_trace_cells()
-    }
-}
diff --git a/crates/vm/src/arch/state.rs b/crates/vm/src/arch/state.rs
new file mode 100644
index 0000000000..a9c2ea3925
--- /dev/null
+++ b/crates/vm/src/arch/state.rs
@@ -0,0 +1,192 @@
+use std::{
+    fmt::Debug,
+    ops::{Deref, DerefMut},
+};
+
+use openvm_instructions::exe::SparseMemoryImage;
+use rand::{rngs::StdRng, SeedableRng};
+
+use super::{create_memory_image, ExecutionError, Streams};
+#[cfg(feature = "metrics")]
+use crate::metrics::VmMetrics;
+use crate::{
+    arch::{execution_mode::ExecutionCtxTrait, SystemConfig},
+    system::memory::online::GuestMemory,
+};
+
+/// Represents the core state of a VM.
+pub struct VmState<F, MEM = GuestMemory> {
+    pub instret: u64,
+    pub pc: u32,
+    pub memory: MEM,
+    pub streams: Streams<F>,
+    pub rng: StdRng,
+    /// The public values of the PublicValuesAir when it exists
+    pub(crate) custom_pvs: Vec<Option<F>>,
+    #[cfg(feature = "metrics")]
+    pub metrics: VmMetrics,
+}
+
+pub(super) const DEFAULT_RNG_SEED: u64 = 0;
+
+impl<F: Clone, MEM> VmState<F, MEM> {
+    /// `num_custom_pvs` should only be nonzero when the PublicValuesAir exists.
+    pub fn new(
+        instret: u64,
+        pc: u32,
+        memory: MEM,
+        streams: impl Into<Streams<F>>,
+        seed: u64,
+        num_custom_pvs: usize,
+    ) -> Self {
+        Self {
+            instret,
+            pc,
+            memory,
+            streams: streams.into(),
+            rng: StdRng::seed_from_u64(seed),
+            custom_pvs: vec![None; num_custom_pvs],
+            #[cfg(feature = "metrics")]
+            metrics: VmMetrics::default(),
+        }
+    }
+}
+
+impl<F: Clone> VmState<F, GuestMemory> {
+    pub fn initial(
+        system_config: &SystemConfig,
+        init_memory: &SparseMemoryImage,
+        pc_start: u32,
+        inputs: impl Into<Streams<F>>,
+    ) -> Self {
+        let memory = create_memory_image(&system_config.memory_config, init_memory);
+        let num_custom_pvs = if system_config.has_public_values_chip() {
+            system_config.num_public_values
+        } else {
+            0
+        };
+        VmState::new(
+            0,
+            pc_start,
+            memory,
+            inputs.into(),
+            DEFAULT_RNG_SEED,
+            num_custom_pvs,
+        )
+    }
+
+    pub fn reset(
+        &mut self,
+        init_memory: &SparseMemoryImage,
+        pc_start: u32,
+        streams: impl Into<Streams<F>>,
+    ) {
+        self.instret = 0;
+        self.pc = pc_start;
+        self.memory.memory.fill_zero();
+        self.memory.memory.set_from_sparse(init_memory);
+        self.streams = streams.into();
+        self.rng = StdRng::seed_from_u64(DEFAULT_RNG_SEED);
+    }
+}
+
+/// Represents the full execution state of a VM during execution.
+/// The global state is generic in guest memory `MEM` and additional context `CTX`.
+/// The host state is execution context specific.
+// @dev: Do not confuse with `ExecutionState` struct.
+pub struct VmExecState<F, MEM, CTX> {
+    /// Core VM state
+    pub vm_state: VmState<F, MEM>,
+    /// Execution-specific fields
+    pub exit_code: Result<Option<u32>, ExecutionError>,
+    pub ctx: CTX,
+}
+
+impl<F, MEM, CTX> VmExecState<F, MEM, CTX> {
+    pub fn new(vm_state: VmState<F, MEM>, ctx: CTX) -> Self {
+        Self {
+            vm_state,
+            ctx,
+            exit_code: Ok(None),
+        }
+    }
+}
+
+impl<F, MEM, CTX> Deref for VmExecState<F, MEM, CTX> {
+    type Target = VmState<F, MEM>;
+
+    fn deref(&self) -> &Self::Target {
+        &self.vm_state
+    }
+}
+
+impl<F, MEM, CTX> DerefMut for VmExecState<F, MEM, CTX> {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        &mut self.vm_state
+    }
+}
+
+impl<F, CTX> VmExecState<F, GuestMemory, CTX>
+where
+    CTX: ExecutionCtxTrait,
+{
+    /// Runtime read operation for a block of memory
+    #[inline(always)]
+    pub fn vm_read<T: Copy + Debug, const BLOCK_SIZE: usize>(
+        &mut self,
+        addr_space: u32,
+        ptr: u32,
+    ) -> [T; BLOCK_SIZE] {
+        self.ctx
+            .on_memory_operation(addr_space, ptr, BLOCK_SIZE as u32);
+        self.host_read(addr_space, ptr)
+    }
+
+    /// Runtime write operation for a block of memory
+    #[inline(always)]
+    pub fn vm_write<T: Copy + Debug, const BLOCK_SIZE: usize>(
+        &mut self,
+        addr_space: u32,
+        ptr: u32,
+        data: &[T; BLOCK_SIZE],
+    ) {
+        self.ctx
+            .on_memory_operation(addr_space, ptr, BLOCK_SIZE as u32);
+        self.host_write(addr_space, ptr, data)
+    }
+
+    #[inline(always)]
+    pub fn vm_read_slice<T: Copy + Debug>(
+        &mut self,
+        addr_space: u32,
+        ptr: u32,
+        len: usize,
+    ) -> &[T] {
+        self.ctx.on_memory_operation(addr_space, ptr, len as u32);
+        self.host_read_slice(addr_space, ptr, len)
+    }
+
+    #[inline(always)]
+    pub fn host_read<T: Copy + Debug, const BLOCK_SIZE: usize>(
+        &self,
+        addr_space: u32,
+        ptr: u32,
+    ) -> [T; BLOCK_SIZE] {
+        unsafe { self.memory.read(addr_space, ptr) }
+    }
+
+    #[inline(always)]
+    pub fn host_write<T: Copy + Debug, const BLOCK_SIZE: usize>(
+        &mut self,
+        addr_space: u32,
+        ptr: u32,
+        data: &[T; BLOCK_SIZE],
+    ) {
+        unsafe { self.memory.write(addr_space, ptr, *data) }
+    }
+
+    #[inline(always)]
+    pub fn host_read_slice<T: Copy + Debug>(&self, addr_space: u32, ptr: u32, len: usize) -> &[T] {
+        unsafe { self.memory.get_slice(addr_space, ptr, len) }
+    }
+}
diff --git a/crates/vm/src/arch/testing/execution/mod.rs b/crates/vm/src/arch/testing/execution/mod.rs
index c0fdb71c71..3177e7250b 100644
--- a/crates/vm/src/arch/testing/execution/mod.rs
+++ b/crates/vm/src/arch/testing/execution/mod.rs
@@ -1,12 +1,12 @@
 use std::{borrow::BorrowMut, mem::size_of, sync::Arc};
 
-use air::{DummyExecutionInteractionCols, ExecutionDummyAir};
+use air::DummyExecutionInteractionCols;
 use openvm_stark_backend::{
     config::{StarkGenericConfig, Val},
     p3_field::{Field, FieldAlgebra, PrimeField32},
     p3_matrix::dense::RowMajorMatrix,
-    prover::types::AirProofInput,
-    AirRef, Chip, ChipUsageGetter,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
+    Chip, ChipUsageGetter,
 };
 
 use crate::arch::{ExecutionBus, ExecutionState};
@@ -48,24 +48,20 @@ impl<F: PrimeField32> ExecutionTester<F> {
     }
 }
 
-impl<SC: StarkGenericConfig> Chip<SC> for ExecutionTester<Val<SC>>
+impl<SC: StarkGenericConfig, RA> Chip<RA, CpuBackend<SC>> for ExecutionTester<Val<SC>>
 where
     Val<SC>: Field,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(ExecutionDummyAir::new(self.bus))
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    fn generate_proving_ctx(&self, _: RA) -> AirProvingContext<CpuBackend<SC>> {
         let height = self.records.len().next_power_of_two();
         let width = self.trace_width();
         let mut values = Val::<SC>::zero_vec(height * width);
         // This zip only goes through records. The padding rows between records.len()..height
         // are filled with zeros - in particular count = 0 so nothing is added to bus.
-        for (row, record) in values.chunks_mut(width).zip(self.records) {
-            *row.borrow_mut() = record;
+        for (row, record) in values.chunks_mut(width).zip(&self.records) {
+            *row.borrow_mut() = *record;
         }
-        AirProofInput::simple_no_pis(RowMajorMatrix::new(values, width))
+        AirProvingContext::simple_no_pis(Arc::new(RowMajorMatrix::new(values, width)))
     }
 }
 impl<F: Field> ChipUsageGetter for ExecutionTester<F> {
diff --git a/crates/vm/src/arch/testing/memory/air.rs b/crates/vm/src/arch/testing/memory/air.rs
index 8a394c0cce..efca131ae8 100644
--- a/crates/vm/src/arch/testing/memory/air.rs
+++ b/crates/vm/src/arch/testing/memory/air.rs
@@ -1,46 +1,153 @@
-use std::{borrow::Borrow, mem::size_of};
+use std::{mem::size_of, sync::Arc};
 
-use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
     interaction::InteractionBuilder,
     p3_air::{Air, BaseAir},
-    p3_matrix::Matrix,
+    p3_field::{FieldAlgebra, PrimeField32},
+    p3_matrix::{dense::RowMajorMatrix, Matrix},
+    prover::{cpu::CpuBackend, types::AirProvingContext},
     rap::{BaseAirWithPublicValues, PartitionedBaseAir},
+    Chip, ChipUsageGetter,
 };
 
 use crate::system::memory::{offline_checker::MemoryBus, MemoryAddress};
 
-#[derive(Clone, Copy, Debug, AlignedBorrow, derive_new::new)]
 #[repr(C)]
-pub struct DummyMemoryInteractionCols<T, const BLOCK_SIZE: usize> {
-    pub address: MemoryAddress<T, T>,
-    pub data: [T; BLOCK_SIZE],
-    pub timestamp: T,
+#[derive(Clone, Copy)]
+pub struct DummyMemoryInteractionColsRef<'a, T> {
+    pub address: MemoryAddress<&'a T, &'a T>,
+    pub data: &'a [T],
+    pub timestamp: &'a T,
     /// The send frequency. Send corresponds to write. To read, set to negative.
-    pub count: T,
+    pub count: &'a T,
+}
+
+#[repr(C)]
+pub struct DummyMemoryInteractionColsMut<'a, T> {
+    pub address: MemoryAddress<&'a mut T, &'a mut T>,
+    pub data: &'a mut [T],
+    pub timestamp: &'a mut T,
+    /// The send frequency. Send corresponds to write. To read, set to negative.
+    pub count: &'a mut T,
+}
+
+impl<'a, T> DummyMemoryInteractionColsRef<'a, T> {
+    pub fn from_slice(slice: &'a [T]) -> Self {
+        let (address, slice) = slice.split_at(size_of::<MemoryAddress<u8, u8>>());
+        let (count, slice) = slice.split_last().unwrap();
+        let (timestamp, data) = slice.split_last().unwrap();
+        Self {
+            address: MemoryAddress::new(&address[0], &address[1]),
+            data,
+            timestamp,
+            count,
+        }
+    }
+}
+
+impl<'a, T> DummyMemoryInteractionColsMut<'a, T> {
+    pub fn from_mut_slice(slice: &'a mut [T]) -> Self {
+        let (addr_space, slice) = slice.split_first_mut().unwrap();
+        let (ptr, slice) = slice.split_first_mut().unwrap();
+        let (count, slice) = slice.split_last_mut().unwrap();
+        let (timestamp, data) = slice.split_last_mut().unwrap();
+        Self {
+            address: MemoryAddress::new(addr_space, ptr),
+            data,
+            timestamp,
+            count,
+        }
+    }
 }
 
 #[derive(Clone, Copy, Debug, derive_new::new)]
-pub struct MemoryDummyAir<const BLOCK_SIZE: usize> {
+pub struct MemoryDummyAir {
     pub bus: MemoryBus,
+    pub block_size: usize,
 }
 
-impl<const BLOCK_SIZE: usize, F> BaseAirWithPublicValues<F> for MemoryDummyAir<BLOCK_SIZE> {}
-impl<const BLOCK_SIZE: usize, F> PartitionedBaseAir<F> for MemoryDummyAir<BLOCK_SIZE> {}
-impl<const BLOCK_SIZE: usize, F> BaseAir<F> for MemoryDummyAir<BLOCK_SIZE> {
+impl<F> BaseAirWithPublicValues<F> for MemoryDummyAir {}
+impl<F> PartitionedBaseAir<F> for MemoryDummyAir {}
+impl<F> BaseAir<F> for MemoryDummyAir {
     fn width(&self) -> usize {
-        size_of::<DummyMemoryInteractionCols<u8, BLOCK_SIZE>>()
+        self.block_size + 4
     }
 }
 
-impl<const BLOCK_SIZE: usize, AB: InteractionBuilder> Air<AB> for MemoryDummyAir<BLOCK_SIZE> {
+impl<AB: InteractionBuilder> Air<AB> for MemoryDummyAir {
     fn eval(&self, builder: &mut AB) {
         let main = builder.main();
         let local = main.row_slice(0);
-        let local: &DummyMemoryInteractionCols<AB::Var, BLOCK_SIZE> = (*local).borrow();
+        let local = DummyMemoryInteractionColsRef::from_slice(&local);
 
         self.bus
-            .send(local.address, local.data.to_vec(), local.timestamp)
-            .eval(builder, local.count);
+            .send(
+                MemoryAddress::new(*local.address.address_space, *local.address.pointer),
+                local.data.to_vec(),
+                *local.timestamp,
+            )
+            .eval(builder, *local.count);
+    }
+}
+
+#[derive(Clone)]
+pub struct MemoryDummyChip<F> {
+    pub air: MemoryDummyAir,
+    pub trace: Vec<F>,
+}
+
+impl<F> MemoryDummyChip<F> {
+    pub fn new(air: MemoryDummyAir) -> Self {
+        Self {
+            air,
+            trace: Vec::new(),
+        }
+    }
+}
+
+impl<F: PrimeField32> MemoryDummyChip<F> {
+    pub fn send(&mut self, addr_space: u32, ptr: u32, data: &[F], timestamp: u32) {
+        self.push(addr_space, ptr, data, timestamp, F::ONE);
+    }
+
+    pub fn receive(&mut self, addr_space: u32, ptr: u32, data: &[F], timestamp: u32) {
+        self.push(addr_space, ptr, data, timestamp, F::NEG_ONE);
+    }
+
+    pub fn push(&mut self, addr_space: u32, ptr: u32, data: &[F], timestamp: u32, count: F) {
+        assert_eq!(data.len(), self.air.block_size);
+        self.trace.push(F::from_canonical_u32(addr_space));
+        self.trace.push(F::from_canonical_u32(ptr));
+        self.trace.extend_from_slice(data);
+        self.trace.push(F::from_canonical_u32(timestamp));
+        self.trace.push(count);
+    }
+}
+
+impl<SC: StarkGenericConfig, RA> Chip<RA, CpuBackend<SC>> for MemoryDummyChip<Val<SC>>
+where
+    Val<SC>: PrimeField32,
+{
+    fn generate_proving_ctx(&self, _: RA) -> AirProvingContext<CpuBackend<SC>> {
+        let height = self.current_trace_height().next_power_of_two();
+        let width = self.trace_width();
+        let mut trace = self.trace.clone();
+        trace.resize(height * width, Val::<SC>::ZERO);
+
+        let trace = Arc::new(RowMajorMatrix::new(trace, width));
+        AirProvingContext::simple_no_pis(trace)
+    }
+}
+
+impl<F: PrimeField32> ChipUsageGetter for MemoryDummyChip<F> {
+    fn air_name(&self) -> String {
+        format!("MemoryDummyAir<{}>", self.air.block_size)
+    }
+    fn current_trace_height(&self) -> usize {
+        self.trace.len() / self.trace_width()
+    }
+    fn trace_width(&self) -> usize {
+        BaseAir::<F>::width(&self.air)
     }
 }
diff --git a/crates/vm/src/arch/testing/memory/mod.rs b/crates/vm/src/arch/testing/memory/mod.rs
index ae1136bc7f..a16adc7e2d 100644
--- a/crates/vm/src/arch/testing/memory/mod.rs
+++ b/crates/vm/src/arch/testing/memory/mod.rs
@@ -1,138 +1,91 @@
-use std::{array::from_fn, borrow::BorrowMut as _, cell::RefCell, mem::size_of, rc::Rc, sync::Arc};
+use std::collections::HashMap;
 
-use air::{DummyMemoryInteractionCols, MemoryDummyAir};
-use openvm_circuit::system::memory::MemoryController;
-use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
-    p3_field::{FieldAlgebra, PrimeField32},
-    p3_matrix::dense::RowMajorMatrix,
-    prover::types::AirProofInput,
-    AirRef, Chip, ChipUsageGetter,
-};
-use rand::{seq::SliceRandom, Rng};
+use air::{MemoryDummyAir, MemoryDummyChip};
+use openvm_stark_backend::p3_field::{Field, PrimeField32};
+use rand::Rng;
 
-use crate::system::memory::{offline_checker::MemoryBus, MemoryAddress, RecordId};
+use crate::system::memory::{online::TracingMemory, MemoryController};
 
 pub mod air;
 
-const WORD_SIZE: usize = 1;
-
 /// A dummy testing chip that will add unconstrained messages into the [MemoryBus].
 /// Stores a log of raw messages to send/receive to the [MemoryBus].
 ///
 /// It will create a [air::MemoryDummyAir] to add messages to MemoryBus.
-pub struct MemoryTester<F> {
-    pub bus: MemoryBus,
-    pub controller: Rc<RefCell<MemoryController<F>>>,
-    /// Log of record ids
-    pub records: Vec<RecordId>,
+pub struct MemoryTester<F: Field> {
+    /// Map from `block_size` to [MemoryDummyChip] of that block size
+    pub chip_for_block: HashMap<usize, MemoryDummyChip<F>>,
+    pub memory: TracingMemory,
+    pub(super) controller: MemoryController<F>,
 }
 
 impl<F: PrimeField32> MemoryTester<F> {
-    pub fn new(controller: Rc<RefCell<MemoryController<F>>>) -> Self {
-        let bus = controller.borrow().memory_bus;
+    pub fn new(controller: MemoryController<F>, memory: TracingMemory) -> Self {
+        let bus = controller.memory_bus;
+        let mut chip_for_block = HashMap::new();
+        for log_block_size in 0..6 {
+            let block_size = 1 << log_block_size;
+            let chip = MemoryDummyChip::new(MemoryDummyAir::new(bus, block_size));
+            chip_for_block.insert(block_size, chip);
+        }
         Self {
-            bus,
+            chip_for_block,
+            memory,
             controller,
-            records: Vec::new(),
         }
     }
 
-    /// Returns the cell value at the current timestamp according to `MemoryController`.
-    pub fn read_cell(&mut self, address_space: usize, pointer: usize) -> F {
-        let [addr_space, pointer] = [address_space, pointer].map(F::from_canonical_usize);
-        // core::BorrowMut confuses compiler
-        let (record_id, value) =
-            RefCell::borrow_mut(&self.controller).read_cell(addr_space, pointer);
-        self.records.push(record_id);
-        value
-    }
-
-    pub fn write_cell(&mut self, address_space: usize, pointer: usize, value: F) {
-        let [addr_space, pointer] = [address_space, pointer].map(F::from_canonical_usize);
-        let (record_id, _) =
-            RefCell::borrow_mut(&self.controller).write_cell(addr_space, pointer, value);
-        self.records.push(record_id);
+    pub fn read<const N: usize>(&mut self, addr_space: usize, ptr: usize) -> [F; N] {
+        let memory = &mut self.memory;
+        let t = memory.timestamp();
+        // TODO: this could be improved if we added a TracingMemory::get_f function
+        let (t_prev, data) = if addr_space <= 3 {
+            let (t_prev, data) = unsafe { memory.read::<u8, N, 4>(addr_space as u32, ptr as u32) };
+            (t_prev, data.map(F::from_canonical_u8))
+        } else {
+            unsafe { memory.read::<F, N, 1>(addr_space as u32, ptr as u32) }
+        };
+        self.chip_for_block.get_mut(&N).unwrap().receive(
+            addr_space as u32,
+            ptr as u32,
+            &data,
+            t_prev,
+        );
+        self.chip_for_block
+            .get_mut(&N)
+            .unwrap()
+            .send(addr_space as u32, ptr as u32, &data, t);
+
+        data
     }
 
-    pub fn read<const N: usize>(&mut self, address_space: usize, pointer: usize) -> [F; N] {
-        from_fn(|i| self.read_cell(address_space, pointer + i))
-    }
-
-    pub fn write<const N: usize>(
-        &mut self,
-        address_space: usize,
-        mut pointer: usize,
-        cells: [F; N],
-    ) {
-        for cell in cells {
-            self.write_cell(address_space, pointer, cell);
-            pointer += 1;
-        }
-    }
-}
-
-impl<SC: StarkGenericConfig> Chip<SC> for MemoryTester<Val<SC>>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(MemoryDummyAir::<WORD_SIZE>::new(self.bus))
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let offline_memory = self.controller.borrow().offline_memory();
-        let offline_memory = offline_memory.lock().unwrap();
-
-        let height = self.records.len().next_power_of_two();
-        let width = self.trace_width();
-        let mut values = Val::<SC>::zero_vec(2 * height * width);
-        // This zip only goes through records. The padding rows between records.len()..height
-        // are filled with zeros - in particular count = 0 so nothing is added to bus.
-        for (row, id) in values.chunks_mut(2 * width).zip(self.records) {
-            let (first, second) = row.split_at_mut(width);
-            let row: &mut DummyMemoryInteractionCols<Val<SC>, WORD_SIZE> = first.borrow_mut();
-            let record = offline_memory.record_by_id(id);
-            row.address = MemoryAddress {
-                address_space: record.address_space,
-                pointer: record.pointer,
+    pub fn write<const N: usize>(&mut self, addr_space: usize, ptr: usize, data: [F; N]) {
+        let memory = &mut self.memory;
+        let t = memory.timestamp();
+        // TODO: this could be improved if we added a TracingMemory::write_f function
+        let (t_prev, data_prev) = if addr_space <= 3 {
+            let (t_prev, data_prev) = unsafe {
+                memory.write::<u8, N, 4>(
+                    addr_space as u32,
+                    ptr as u32,
+                    data.map(|x| x.as_canonical_u32() as u8),
+                )
             };
-            row.data
-                .copy_from_slice(record.prev_data_slice().unwrap_or(record.data_slice()));
-            row.timestamp = Val::<SC>::from_canonical_u32(record.prev_timestamp);
-            row.count = -Val::<SC>::ONE;
-
-            let row: &mut DummyMemoryInteractionCols<Val<SC>, WORD_SIZE> = second.borrow_mut();
-            row.address = MemoryAddress {
-                address_space: record.address_space,
-                pointer: record.pointer,
-            };
-            row.data.copy_from_slice(record.data_slice());
-            row.timestamp = Val::<SC>::from_canonical_u32(record.timestamp);
-            row.count = Val::<SC>::ONE;
-        }
-        AirProofInput::simple_no_pis(RowMajorMatrix::new(values, width))
-    }
-}
-
-impl<F: PrimeField32> ChipUsageGetter for MemoryTester<F> {
-    fn air_name(&self) -> String {
-        "MemoryDummyAir".to_string()
-    }
-    fn current_trace_height(&self) -> usize {
-        self.records.len()
+            (t_prev, data_prev.map(F::from_canonical_u8))
+        } else {
+            unsafe { memory.write::<F, N, 1>(addr_space as u32, ptr as u32, data) }
+        };
+        self.chip_for_block.get_mut(&N).unwrap().receive(
+            addr_space as u32,
+            ptr as u32,
+            &data_prev,
+            t_prev,
+        );
+        self.chip_for_block
+            .get_mut(&N)
+            .unwrap()
+            .send(addr_space as u32, ptr as u32, &data, t);
     }
-
-    fn trace_width(&self) -> usize {
-        size_of::<DummyMemoryInteractionCols<u8, WORD_SIZE>>()
-    }
-}
-
-pub fn gen_address_space<R>(rng: &mut R) -> usize
-where
-    R: Rng + ?Sized,
-{
-    *[1, 2].choose(rng).unwrap()
 }
 
 pub fn gen_pointer<R>(rng: &mut R, len: usize) -> usize
diff --git a/crates/vm/src/arch/testing/mod.rs b/crates/vm/src/arch/testing/mod.rs
index 44b19177be..7c4d5f1672 100644
--- a/crates/vm/src/arch/testing/mod.rs
+++ b/crates/vm/src/arch/testing/mod.rs
@@ -1,21 +1,26 @@
-use std::{
-    cell::RefCell,
-    iter::zip,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
+use std::{marker::PhantomData, sync::Arc};
 
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
+use itertools::zip_eq;
+use openvm_circuit_primitives::{
+    utils::next_power_of_two_or_zero,
+    var_range::{
+        SharedVariableRangeCheckerChip, VariableRangeCheckerBus, VariableRangeCheckerChip,
+    },
 };
-use openvm_instructions::instruction::Instruction;
+use openvm_instructions::{instruction::Instruction, riscv::RV32_REGISTER_AS, NATIVE_AS};
 use openvm_stark_backend::{
     config::{StarkGenericConfig, Val},
     engine::VerificationData,
-    interaction::BusIndex,
-    p3_field::PrimeField32,
-    p3_matrix::dense::{DenseMatrix, RowMajorMatrix},
-    prover::types::AirProofInput,
+    interaction::{BusIndex, PermutationCheckBus},
+    p3_air::BaseAir,
+    p3_field::{Field, PrimeField32},
+    p3_matrix::dense::RowMajorMatrix,
+    p3_util::log2_strict_usize,
+    prover::{
+        cpu::{CpuBackend, CpuDevice},
+        types::AirProvingContext,
+    },
+    rap::AnyRap,
     verifier::VerificationError,
     AirRef, Chip,
 };
@@ -32,27 +37,32 @@ use program::ProgramTester;
 use rand::{rngs::StdRng, RngCore, SeedableRng};
 use tracing::Level;
 
-use super::{ExecutionBus, InstructionExecutor, SystemPort};
+use super::{ExecutionBridge, ExecutionBus, PreflightExecutor};
 use crate::{
-    arch::{ExecutionState, MemoryConfig},
+    arch::{
+        testing::{execution::air::ExecutionDummyAir, program::air::ProgramDummyAir},
+        vm_poseidon2_config, Arena, ExecutionState, MatrixRecordArena, MemoryConfig, Streams,
+        VmStateMut,
+    },
     system::{
         memory::{
+            adapter::records::arena_size_bound,
             offline_checker::{MemoryBridge, MemoryBus},
-            MemoryController, OfflineMemory,
+            online::TracingMemory,
+            MemoryAirInventory, MemoryController, SharedMemoryHelper, CHUNK,
         },
         poseidon2::Poseidon2PeripheryChip,
         program::ProgramBus,
+        SystemPort,
     },
 };
 
 pub mod execution;
 pub mod memory;
 pub mod program;
-pub mod test_adapter;
 
 pub use execution::ExecutionTester;
 pub use memory::MemoryTester;
-pub use test_adapter::TestAdapterChip;
 
 pub const EXECUTION_BUS: BusIndex = 0;
 pub const MEMORY_BUS: BusIndex = 1;
@@ -63,79 +73,126 @@ pub const BYTE_XOR_BUS: BusIndex = 10;
 pub const RANGE_TUPLE_CHECKER_BUS: BusIndex = 11;
 pub const MEMORY_MERKLE_BUS: BusIndex = 12;
 
-const RANGE_CHECKER_BUS: BusIndex = 4;
+pub const RANGE_CHECKER_BUS: BusIndex = 4;
+
+pub type ArenaId = usize;
+
+pub struct TestChipHarness<F, E, A, C, RA = MatrixRecordArena<F>> {
+    pub executor: E,
+    pub air: A,
+    pub chip: C,
+    pub arena: RA,
+    phantom: PhantomData<F>,
+}
+
+impl<F, E, A, C, RA> TestChipHarness<F, E, A, C, RA>
+where
+    F: Field,
+    A: BaseAir<F>,
+    RA: Arena,
+{
+    pub fn with_capacity(executor: E, air: A, chip: C, height: usize) -> Self {
+        let width = air.width();
+        let height = next_power_of_two_or_zero(height);
+        let arena = RA::with_capacity(height, width);
+        Self {
+            executor,
+            air,
+            chip,
+            arena,
+            phantom: PhantomData,
+        }
+    }
+}
 
-pub struct VmChipTestBuilder<F: PrimeField32> {
+pub struct VmChipTestBuilder<F: Field> {
     pub memory: MemoryTester<F>,
+    pub streams: Streams<F>,
+    pub rng: StdRng,
     pub execution: ExecutionTester<F>,
     pub program: ProgramTester<F>,
-    rng: StdRng,
+    internal_rng: StdRng,
+    custom_pvs: Vec<Option<F>>,
     default_register: usize,
     default_pointer: usize,
 }
 
 impl<F: PrimeField32> VmChipTestBuilder<F> {
     pub fn new(
-        memory_controller: Rc<RefCell<MemoryController<F>>>,
+        controller: MemoryController<F>,
+        memory: TracingMemory,
+        streams: Streams<F>,
+        rng: StdRng,
         execution_bus: ExecutionBus,
         program_bus: ProgramBus,
-        rng: StdRng,
+        internal_rng: StdRng,
     ) -> Self {
         setup_tracing_with_log_level(Level::WARN);
         Self {
-            memory: MemoryTester::new(memory_controller),
+            memory: MemoryTester::new(controller, memory),
+            streams,
+            rng,
+            custom_pvs: Vec::new(),
             execution: ExecutionTester::new(execution_bus),
             program: ProgramTester::new(program_bus),
-            rng,
+            internal_rng,
             default_register: 0,
             default_pointer: 0,
         }
     }
 
     // Passthrough functions from ExecutionTester and MemoryTester for better dev-ex
-    pub fn execute<E: InstructionExecutor<F>>(
+    pub fn execute<E, A, C, RA>(
         &mut self,
-        executor: &mut E,
+        harness: &mut TestChipHarness<F, E, A, C, RA>,
         instruction: &Instruction<F>,
-    ) {
+    ) where
+        E: PreflightExecutor<F, RA>,
+    {
         let initial_pc = self.next_elem_size_u32();
-        self.execute_with_pc(executor, instruction, initial_pc);
+        self.execute_with_pc(harness, instruction, initial_pc);
     }
 
-    pub fn execute_with_pc<E: InstructionExecutor<F>>(
+    pub fn execute_with_pc<E, A, C, RA>(
         &mut self,
-        executor: &mut E,
+        harness: &mut TestChipHarness<F, E, A, C, RA>,
         instruction: &Instruction<F>,
         initial_pc: u32,
-    ) {
+    ) where
+        E: PreflightExecutor<F, RA>,
+    {
         let initial_state = ExecutionState {
             pc: initial_pc,
-            timestamp: self.memory.controller.borrow().timestamp(),
+            timestamp: self.memory.memory.timestamp(),
+        };
+        tracing::debug!("initial_timestamp={}", self.memory.memory.timestamp());
+
+        let mut pc = initial_pc;
+        let state_mut = VmStateMut {
+            pc: &mut pc,
+            memory: &mut self.memory.memory,
+            streams: &mut self.streams,
+            rng: &mut self.rng,
+            custom_pvs: &mut self.custom_pvs,
+            ctx: &mut harness.arena,
+            #[cfg(feature = "metrics")]
+            metrics: &mut Default::default(),
         };
-        tracing::debug!(?initial_state.timestamp);
-
-        let final_state = executor
-            .execute(
-                &mut *self.memory.controller.borrow_mut(),
-                instruction,
-                initial_state,
-            )
+        harness
+            .executor
+            .execute(state_mut, instruction)
             .expect("Expected the execution not to fail");
+        let final_state = ExecutionState {
+            pc,
+            timestamp: self.memory.memory.timestamp(),
+        };
 
         self.program.execute(instruction, &initial_state);
         self.execution.execute(initial_state, final_state);
     }
 
     fn next_elem_size_u32(&mut self) -> u32 {
-        self.rng.next_u32() % (1 << (F::bits() - 2))
-    }
-
-    pub fn read_cell(&mut self, address_space: usize, pointer: usize) -> F {
-        self.memory.read_cell(address_space, pointer)
-    }
-
-    pub fn write_cell(&mut self, address_space: usize, pointer: usize, value: F) {
-        self.memory.write_cell(address_space, pointer, value);
+        self.internal_rng.next_u32() % (1 << (F::bits() - 2))
     }
 
     pub fn read<const N: usize>(&mut self, address_space: usize, pointer: usize) -> [F; N] {
@@ -162,9 +219,22 @@ impl<F: PrimeField32> VmChipTestBuilder<F> {
         pointer: usize,
         writes: Vec<[F; NUM_LIMBS]>,
     ) {
-        self.write(1usize, register, [F::from_canonical_usize(pointer)]);
-        for (i, &write) in writes.iter().enumerate() {
-            self.write(2usize, pointer + i * NUM_LIMBS, write);
+        self.write(
+            1usize,
+            register,
+            pointer.to_le_bytes().map(F::from_canonical_u8),
+        );
+        if NUM_LIMBS.is_power_of_two() {
+            for (i, &write) in writes.iter().enumerate() {
+                self.write(2usize, pointer + i * NUM_LIMBS, write);
+            }
+        } else {
+            for (i, &write) in writes.iter().enumerate() {
+                let ptr = pointer + i * NUM_LIMBS;
+                for j in (0..NUM_LIMBS).step_by(4) {
+                    self.write::<4>(2usize, ptr + j, write[j..j + 4].try_into().unwrap());
+                }
+            }
         }
     }
 
@@ -176,6 +246,10 @@ impl<F: PrimeField32> VmChipTestBuilder<F> {
         }
     }
 
+    pub fn execution_bridge(&self) -> ExecutionBridge {
+        ExecutionBridge::new(self.execution.bus, self.program.bus)
+    }
+
     pub fn execution_bus(&self) -> ExecutionBus {
         self.execution.bus
     }
@@ -185,27 +259,23 @@ impl<F: PrimeField32> VmChipTestBuilder<F> {
     }
 
     pub fn memory_bus(&self) -> MemoryBus {
-        self.memory.bus
-    }
-
-    pub fn memory_controller(&self) -> Rc<RefCell<MemoryController<F>>> {
-        self.memory.controller.clone()
+        self.memory.controller.memory_bus
     }
 
     pub fn range_checker(&self) -> SharedVariableRangeCheckerChip {
-        self.memory.controller.borrow().range_checker.clone()
+        self.memory.controller.range_checker.clone()
     }
 
     pub fn memory_bridge(&self) -> MemoryBridge {
-        self.memory.controller.borrow().memory_bridge()
+        self.memory.controller.memory_bridge()
     }
 
-    pub fn address_bits(&self) -> usize {
-        self.memory.controller.borrow().mem_config.pointer_max_bits
+    pub fn memory_helper(&self) -> SharedMemoryHelper<F> {
+        self.memory.controller.helper()
     }
 
-    pub fn offline_memory_mutex_arc(&self) -> Arc<Mutex<OfflineMemory<F>>> {
-        self.memory_controller().borrow().offline_memory().clone()
+    pub fn address_bits(&self) -> usize {
+        self.memory.controller.memory_config().pointer_max_bits
     }
 
     pub fn get_default_register(&mut self, increment: usize) -> usize {
@@ -240,71 +310,135 @@ impl<F: PrimeField32> VmChipTestBuilder<F> {
         self.write_heap(register, pointer, writes);
         (register, pointer)
     }
+
+    pub fn set_num_public_values(&mut self, num_public_values: usize) {
+        self.custom_pvs.resize(num_public_values, None);
+    }
 }
 
 // Use Blake3 as hash for faster tests.
-type TestSC = BabyBearBlake3Config;
+pub(crate) type TestSC = BabyBearBlake3Config;
 
 impl VmChipTestBuilder<BabyBear> {
     pub fn build(self) -> VmChipTester<TestSC> {
-        self.memory
-            .controller
-            .borrow_mut()
-            .finalize(None::<&mut Poseidon2PeripheryChip<BabyBear>>);
         let tester = VmChipTester {
             memory: Some(self.memory),
             ..Default::default()
         };
-        let tester = tester.load(self.execution);
-        tester.load(self.program)
+        let tester =
+            tester.load_periphery((ExecutionDummyAir::new(self.execution.bus), self.execution));
+        tester.load_periphery((ProgramDummyAir::new(self.program.bus), self.program))
     }
     pub fn build_babybear_poseidon2(self) -> VmChipTester<BabyBearPoseidon2Config> {
-        self.memory
-            .controller
-            .borrow_mut()
-            .finalize(None::<&mut Poseidon2PeripheryChip<BabyBear>>);
         let tester = VmChipTester {
             memory: Some(self.memory),
             ..Default::default()
         };
-        let tester = tester.load(self.execution);
-        tester.load(self.program)
+        let tester =
+            tester.load_periphery((ExecutionDummyAir::new(self.execution.bus), self.execution));
+        tester.load_periphery((ProgramDummyAir::new(self.program.bus), self.program))
     }
 }
 
-impl<F: PrimeField32> Default for VmChipTestBuilder<F> {
-    fn default() -> Self {
-        let mem_config = MemoryConfig::default();
-        let range_checker = SharedVariableRangeCheckerChip::new(VariableRangeCheckerBus::new(
+impl<F: PrimeField32> VmChipTestBuilder<F> {
+    pub fn default_persistent() -> Self {
+        let mut mem_config = MemoryConfig::default();
+        mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29;
+        mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0;
+        Self::persistent(mem_config)
+    }
+
+    pub fn default_native() -> Self {
+        Self::volatile(MemoryConfig::aggregation())
+    }
+
+    fn range_checker_and_memory(
+        mem_config: &MemoryConfig,
+        init_block_size: usize,
+    ) -> (SharedVariableRangeCheckerChip, TracingMemory) {
+        let range_checker = Arc::new(VariableRangeCheckerChip::new(VariableRangeCheckerBus::new(
             RANGE_CHECKER_BUS,
             mem_config.decomp,
+        )));
+        let max_access_adapter_n = log2_strict_usize(mem_config.max_access_adapter_n);
+        let arena_size_bound = arena_size_bound(&vec![1 << 16; max_access_adapter_n]);
+        let memory = TracingMemory::new(mem_config, init_block_size, arena_size_bound);
+
+        (range_checker, memory)
+    }
+
+    pub fn persistent(mem_config: MemoryConfig) -> Self {
+        setup_tracing_with_log_level(Level::INFO);
+        let (range_checker, memory) = Self::range_checker_and_memory(&mem_config, CHUNK);
+        let hasher_chip = Arc::new(Poseidon2PeripheryChip::new(
+            vm_poseidon2_config(),
+            POSEIDON2_DIRECT_BUS,
+            3,
         ));
-        let memory_controller = MemoryController::with_volatile_memory(
+        let memory_controller = MemoryController::with_persistent_memory(
             MemoryBus::new(MEMORY_BUS),
             mem_config,
             range_checker,
+            PermutationCheckBus::new(MEMORY_MERKLE_BUS),
+            PermutationCheckBus::new(POSEIDON2_DIRECT_BUS),
+            hasher_chip,
         );
         Self {
-            memory: MemoryTester::new(Rc::new(RefCell::new(memory_controller))),
+            memory: MemoryTester::new(memory_controller, memory),
+            streams: Default::default(),
+            rng: StdRng::seed_from_u64(0),
+            custom_pvs: Vec::new(),
             execution: ExecutionTester::new(ExecutionBus::new(EXECUTION_BUS)),
             program: ProgramTester::new(ProgramBus::new(READ_INSTRUCTION_BUS)),
+            internal_rng: StdRng::seed_from_u64(0),
+            default_register: 0,
+            default_pointer: 0,
+        }
+    }
+
+    pub fn volatile(mem_config: MemoryConfig) -> Self {
+        setup_tracing_with_log_level(Level::INFO);
+        let (range_checker, memory) = Self::range_checker_and_memory(&mem_config, 1);
+        let memory_controller = MemoryController::with_volatile_memory(
+            MemoryBus::new(MEMORY_BUS),
+            mem_config,
+            range_checker,
+        );
+        Self {
+            memory: MemoryTester::new(memory_controller, memory),
+            streams: Default::default(),
             rng: StdRng::seed_from_u64(0),
+            custom_pvs: Vec::new(),
+            execution: ExecutionTester::new(ExecutionBus::new(EXECUTION_BUS)),
+            program: ProgramTester::new(ProgramBus::new(READ_INSTRUCTION_BUS)),
+            internal_rng: StdRng::seed_from_u64(0),
             default_register: 0,
             default_pointer: 0,
         }
     }
 }
 
+impl<F: PrimeField32> Default for VmChipTestBuilder<F> {
+    fn default() -> Self {
+        let mut mem_config = MemoryConfig::default();
+        // TODO[jpw]: this is because old tests use `gen_pointer` on address space 1; this can be
+        // removed when tests are updated.
+        mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29;
+        mem_config.addr_spaces[NATIVE_AS as usize].num_cells = 0;
+        Self::volatile(mem_config)
+    }
+}
+
 pub struct VmChipTester<SC: StarkGenericConfig> {
     pub memory: Option<MemoryTester<Val<SC>>>,
-    pub air_proof_inputs: Vec<(AirRef<SC>, AirProofInput<SC>)>,
+    pub air_ctxs: Vec<(AirRef<SC>, AirProvingContext<CpuBackend<SC>>)>,
 }
 
 impl<SC: StarkGenericConfig> Default for VmChipTester<SC> {
     fn default() -> Self {
         Self {
             memory: None,
-            air_proof_inputs: vec![],
+            air_ctxs: vec![],
         }
     }
 }
@@ -313,90 +447,149 @@ impl<SC: StarkGenericConfig> VmChipTester<SC>
 where
     Val<SC>: PrimeField32,
 {
-    pub fn load<C: Chip<SC>>(mut self, chip: C) -> Self {
-        if chip.current_trace_height() > 0 {
-            let air = chip.air();
-            let air_proof_input = chip.generate_air_proof_input();
-            tracing::debug!("Generated air proof input for {}", air.name());
-            self.air_proof_inputs.push((air, air_proof_input));
+    pub fn load<E, A, C>(
+        mut self,
+        harness: TestChipHarness<Val<SC>, E, A, C, MatrixRecordArena<Val<SC>>>,
+    ) -> Self
+    where
+        A: AnyRap<SC> + 'static,
+        C: Chip<MatrixRecordArena<Val<SC>>, CpuBackend<SC>>,
+    {
+        let arena = harness.arena;
+        let rows_used = arena.trace_offset.div_ceil(arena.width);
+        if rows_used > 0 {
+            let air = Arc::new(harness.air) as AirRef<SC>;
+            let ctx = harness.chip.generate_proving_ctx(arena);
+            tracing::debug!("Generated air proving context for {}", air.name());
+            self.air_ctxs.push((air, ctx));
         }
 
         self
     }
 
+    pub fn load_periphery<A, C>(self, (air, chip): (A, C)) -> Self
+    where
+        A: AnyRap<SC> + 'static,
+        C: Chip<(), CpuBackend<SC>>,
+    {
+        let air = Arc::new(air) as AirRef<SC>;
+        self.load_periphery_ref((air, chip))
+    }
+
+    pub fn load_periphery_ref<C>(mut self, (air, chip): (AirRef<SC>, C)) -> Self
+    where
+        C: Chip<(), CpuBackend<SC>>,
+    {
+        let ctx = chip.generate_proving_ctx(());
+        tracing::debug!("Generated air proving context for {}", air.name());
+        self.air_ctxs.push((air, ctx));
+
+        self
+    }
+
     pub fn finalize(mut self) -> Self {
         if let Some(memory_tester) = self.memory.take() {
-            let memory_controller = memory_tester.controller.clone();
-            let range_checker = memory_controller.borrow().range_checker.clone();
-            self = self.load(memory_tester); // dummy memory interactions
+            let mut memory_controller = memory_tester.controller;
+            let is_persistent = memory_controller.continuation_enabled();
+            let mut memory = memory_tester.memory;
+            let touched_memory = memory.finalize::<Val<SC>>(is_persistent);
+            // Balance memory boundaries
+            let range_checker = memory_controller.range_checker.clone();
+            for mem_chip in memory_tester.chip_for_block.into_values() {
+                self = self.load_periphery((mem_chip.air, mem_chip));
+            }
+            let mem_inventory = MemoryAirInventory::new(
+                memory_controller.memory_bridge(),
+                memory_controller.memory_config(),
+                range_checker.bus(),
+                is_persistent.then_some((
+                    PermutationCheckBus::new(MEMORY_MERKLE_BUS),
+                    PermutationCheckBus::new(POSEIDON2_DIRECT_BUS),
+                )),
+            );
+            let ctxs = memory_controller
+                .generate_proving_ctx(memory.access_adapter_records, touched_memory);
+            for (air, ctx) in zip_eq(mem_inventory.into_airs(), ctxs)
+                .filter(|(_, ctx)| ctx.main_trace_height() > 0)
             {
-                let airs = memory_controller.borrow().airs();
-                let air_proof_inputs = Rc::try_unwrap(memory_controller)
-                    .unwrap_or_else(|_| panic!("Memory controller was not dropped"))
-                    .into_inner()
-                    .generate_air_proof_inputs();
-                self.air_proof_inputs.extend(
-                    zip(airs, air_proof_inputs).filter(|(_, input)| input.main_trace_height() > 0),
-                );
+                self.air_ctxs.push((air, ctx));
             }
-            self = self.load(range_checker); // this must be last because other trace generation
-                                             // mutates its state
+            if let Some(hasher_chip) = memory_controller.hasher_chip {
+                let air: AirRef<SC> = match hasher_chip.as_ref() {
+                    Poseidon2PeripheryChip::Register0(chip) => chip.air.clone(),
+                    Poseidon2PeripheryChip::Register1(chip) => chip.air.clone(),
+                };
+                self = self.load_periphery_ref((air, hasher_chip));
+            }
+            // this must be last because other trace generation mutates its state
+            self = self.load_periphery((range_checker.air, range_checker));
         }
         self
     }
 
-    pub fn load_air_proof_input(
+    pub fn load_air_proving_ctx(
         mut self,
-        air_proof_input: (AirRef<SC>, AirProofInput<SC>),
+        air_proving_ctx: (AirRef<SC>, AirProvingContext<CpuBackend<SC>>),
     ) -> Self {
-        self.air_proof_inputs.push(air_proof_input);
+        self.air_ctxs.push(air_proving_ctx);
         self
     }
 
-    pub fn load_with_custom_trace<C: Chip<SC>>(
-        mut self,
-        chip: C,
-        trace: RowMajorMatrix<Val<SC>>,
-    ) -> Self {
-        let air = chip.air();
-        let mut air_proof_input = chip.generate_air_proof_input();
-        air_proof_input.raw.common_main = Some(trace);
-        self.air_proof_inputs.push((air, air_proof_input));
-        self
-    }
+    // pub fn load_with_custom_trace<C: Chip<SC>>(
+    //     mut self,
+    //     chip: C,
+    //     trace: RowMajorMatrix<Val<SC>>,
+    // ) -> Self {
+    //     let air = chip.air();
+    //     let mut air_proof_input = chip.generate_air_proof_input();
+    //     air_proof_input.raw.common_main = Some(trace);
+    //     self.air_proof_inputs.push((air, air_proof_input));
+    //     self
+    // }
 
-    pub fn load_and_prank_trace<C: Chip<SC>, P>(mut self, chip: C, modify_trace: P) -> Self
+    pub fn load_and_prank_trace<E, A, C, P>(
+        mut self,
+        harness: TestChipHarness<Val<SC>, E, A, C, MatrixRecordArena<Val<SC>>>,
+        modify_trace: P,
+    ) -> Self
     where
-        P: Fn(&mut DenseMatrix<Val<SC>>),
+        A: AnyRap<SC> + 'static,
+        C: Chip<MatrixRecordArena<Val<SC>>, CpuBackend<SC>>,
+        P: Fn(&mut RowMajorMatrix<Val<SC>>),
     {
-        let air = chip.air();
-        let mut air_proof_input = chip.generate_air_proof_input();
-        let trace = air_proof_input.raw.common_main.as_mut().unwrap();
-        modify_trace(trace);
-        self.air_proof_inputs.push((air, air_proof_input));
+        let arena = harness.arena;
+        let mut ctx = harness.chip.generate_proving_ctx(arena);
+        let trace: Arc<RowMajorMatrix<Val<SC>>> = Option::take(&mut ctx.common_main).unwrap();
+        let mut trace = Arc::into_inner(trace).unwrap();
+        modify_trace(&mut trace);
+        ctx.common_main = Some(Arc::new(trace));
+        self.air_ctxs.push((Arc::new(harness.air), ctx));
         self
     }
 
     /// Given a function to produce an engine from the max trace height,
     /// runs a simple test on that engine
-    pub fn test<E: StarkEngine<SC>, P: Fn() -> E>(
-        &self, // do no take ownership so it's easier to prank
+    pub fn test<E, P: Fn() -> E>(
+        self, // do no take ownership so it's easier to prank
         engine_provider: P,
-    ) -> Result<VerificationData<SC>, VerificationError> {
+    ) -> Result<VerificationData<SC>, VerificationError>
+    where
+        E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    {
         assert!(self.memory.is_none(), "Memory must be finalized");
-        let (airs, air_proof_inputs) = self.air_proof_inputs.iter().cloned().unzip();
-        engine_provider().run_test_impl(airs, air_proof_inputs)
+        let (airs, ctxs): (Vec<_>, Vec<_>) = self.air_ctxs.into_iter().unzip();
+        engine_provider().run_test_impl(airs, ctxs)
     }
 }
 
 impl VmChipTester<BabyBearPoseidon2Config> {
     pub fn simple_test(
-        &self,
+        self,
     ) -> Result<VerificationData<BabyBearPoseidon2Config>, VerificationError> {
         self.test(|| BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(1)))
     }
 
-    pub fn simple_test_with_expected_error(&self, expected_error: VerificationError) {
+    pub fn simple_test_with_expected_error(self, expected_error: VerificationError) {
         let msg = format!(
             "Expected verification to fail with {:?}, but it didn't",
             &expected_error
@@ -407,11 +600,11 @@ impl VmChipTester<BabyBearPoseidon2Config> {
 }
 
 impl VmChipTester<BabyBearBlake3Config> {
-    pub fn simple_test(&self) -> Result<VerificationData<BabyBearBlake3Config>, VerificationError> {
+    pub fn simple_test(self) -> Result<VerificationData<BabyBearBlake3Config>, VerificationError> {
         self.test(|| BabyBearBlake3Engine::new(FriParameters::new_for_testing(1)))
     }
 
-    pub fn simple_test_with_expected_error(&self, expected_error: VerificationError) {
+    pub fn simple_test_with_expected_error(self, expected_error: VerificationError) {
         let msg = format!(
             "Expected verification to fail with {:?}, but it didn't",
             &expected_error
diff --git a/crates/vm/src/arch/testing/program/mod.rs b/crates/vm/src/arch/testing/program/mod.rs
index 04c4feee60..224743cab5 100644
--- a/crates/vm/src/arch/testing/program/mod.rs
+++ b/crates/vm/src/arch/testing/program/mod.rs
@@ -1,13 +1,12 @@
 use std::{borrow::BorrowMut, mem::size_of, sync::Arc};
 
-use air::ProgramDummyAir;
 use openvm_instructions::instruction::Instruction;
 use openvm_stark_backend::{
     config::{StarkGenericConfig, Val},
     p3_field::{Field, FieldAlgebra, PrimeField32},
     p3_matrix::dense::RowMajorMatrix,
-    prover::types::AirProofInput,
-    AirRef, Chip, ChipUsageGetter,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
+    Chip, ChipUsageGetter,
 };
 
 use crate::{
@@ -15,7 +14,7 @@ use crate::{
     system::program::{ProgramBus, ProgramExecutionCols},
 };
 
-mod air;
+pub mod air;
 
 #[derive(Debug)]
 pub struct ProgramTester<F: Field> {
@@ -52,22 +51,18 @@ impl<F: Field> ProgramTester<F> {
     }
 }
 
-impl<SC: StarkGenericConfig> Chip<SC> for ProgramTester<Val<SC>> {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(ProgramDummyAir::new(self.bus))
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+impl<SC: StarkGenericConfig, RA> Chip<RA, CpuBackend<SC>> for ProgramTester<Val<SC>> {
+    fn generate_proving_ctx(&self, _: RA) -> AirProvingContext<CpuBackend<SC>> {
         let height = self.records.len().next_power_of_two();
         let width = self.trace_width();
         let mut values = Val::<SC>::zero_vec(height * width);
         // This zip only goes through records. The padding rows between records.len()..height
         // are filled with zeros - in particular count = 0 so nothing is added to bus.
-        for (row, record) in values.chunks_mut(width).zip(self.records) {
-            *(row[..width - 1]).borrow_mut() = record;
+        for (row, record) in values.chunks_mut(width).zip(&self.records) {
+            *(row[..width - 1]).borrow_mut() = *record;
             row[width - 1] = Val::<SC>::ONE;
         }
-        AirProofInput::simple_no_pis(RowMajorMatrix::new(values, width))
+        AirProvingContext::simple_no_pis(Arc::new(RowMajorMatrix::new(values, width)))
     }
 }
 
diff --git a/crates/vm/src/arch/testing/test_adapter.rs b/crates/vm/src/arch/testing/test_adapter.rs
deleted file mode 100644
index bca9eed724..0000000000
--- a/crates/vm/src/arch/testing/test_adapter.rs
+++ /dev/null
@@ -1,175 +0,0 @@
-use std::{
-    borrow::{Borrow, BorrowMut},
-    collections::VecDeque,
-    fmt::Debug,
-};
-
-use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::instruction::Instruction;
-use openvm_stark_backend::{
-    interaction::InteractionBuilder,
-    p3_air::BaseAir,
-    p3_field::{Field, FieldAlgebra, PrimeField32},
-};
-use serde::{Deserialize, Serialize};
-
-use crate::{
-    arch::{
-        AdapterAirContext, AdapterRuntimeContext, DynAdapterInterface, DynArray, ExecutionBridge,
-        ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-    },
-    system::memory::{MemoryController, OfflineMemory},
-};
-
-// Replaces A: VmAdapterChip while testing VmCoreChip functionality, as it has no
-// constraints and thus cannot cause a failure.
-pub struct TestAdapterChip<F> {
-    /// List of the return values of `preprocess` this chip should provide on each sequential call.
-    pub prank_reads: VecDeque<Vec<F>>,
-    /// List of `pc_inc` to use in `postprocess` on each sequential call.
-    /// Defaults to `4` if not provided.
-    pub prank_pc_inc: VecDeque<Option<u32>>,
-
-    pub air: TestAdapterAir,
-}
-
-impl<F> TestAdapterChip<F> {
-    pub fn new(
-        prank_reads: Vec<Vec<F>>,
-        prank_pc_inc: Vec<Option<u32>>,
-        execution_bridge: ExecutionBridge,
-    ) -> Self {
-        Self {
-            prank_reads: prank_reads.into(),
-            prank_pc_inc: prank_pc_inc.into(),
-            air: TestAdapterAir { execution_bridge },
-        }
-    }
-}
-
-#[derive(Clone, Serialize, Deserialize)]
-pub struct TestAdapterRecord<T> {
-    pub from_pc: u32,
-    pub operands: [T; 7],
-}
-
-impl<F: PrimeField32> VmAdapterChip<F> for TestAdapterChip<F> {
-    type ReadRecord = ();
-    type WriteRecord = TestAdapterRecord<F>;
-    type Air = TestAdapterAir;
-    type Interface = DynAdapterInterface<F>;
-
-    fn preprocess(
-        &mut self,
-        _memory: &mut MemoryController<F>,
-        _instruction: &Instruction<F>,
-    ) -> Result<(DynArray<F>, Self::ReadRecord)> {
-        Ok((
-            self.prank_reads
-                .pop_front()
-                .expect("Not enough prank reads provided")
-                .into(),
-            (),
-        ))
-    }
-
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        _output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let pc_inc = self
-            .prank_pc_inc
-            .pop_front()
-            .map(|x| x.unwrap_or(4))
-            .unwrap_or(4);
-        Ok((
-            ExecutionState {
-                pc: from_state.pc + pc_inc,
-                timestamp: memory.timestamp(),
-            },
-            TestAdapterRecord {
-                operands: [
-                    instruction.a,
-                    instruction.b,
-                    instruction.c,
-                    instruction.d,
-                    instruction.e,
-                    instruction.f,
-                    instruction.g,
-                ],
-                from_pc: from_state.pc,
-            },
-        ))
-    }
-
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        _read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        _memory: &OfflineMemory<F>,
-    ) {
-        let cols: &mut TestAdapterCols<F> = row_slice.borrow_mut();
-        cols.from_pc = F::from_canonical_u32(write_record.from_pc);
-        cols.operands = write_record.operands;
-        // row_slice[0] = F::from_canonical_u32(write_record.from_pc);
-        // row_slice[1..].copy_from_slice(&write_record.operands);
-    }
-
-    fn air(&self) -> &Self::Air {
-        &self.air
-    }
-}
-
-#[derive(Clone, Copy, Debug)]
-pub struct TestAdapterAir {
-    pub execution_bridge: ExecutionBridge,
-}
-
-#[repr(C)]
-#[derive(AlignedBorrow)]
-pub struct TestAdapterCols<T> {
-    pub from_pc: T,
-    pub operands: [T; 7],
-}
-
-impl<F: Field> BaseAir<F> for TestAdapterAir {
-    fn width(&self) -> usize {
-        TestAdapterCols::<F>::width()
-    }
-}
-
-impl<AB: InteractionBuilder> VmAdapterAir<AB> for TestAdapterAir {
-    type Interface = DynAdapterInterface<AB::Expr>;
-
-    fn eval(
-        &self,
-        builder: &mut AB,
-        local: &[AB::Var],
-        ctx: AdapterAirContext<AB::Expr, Self::Interface>,
-    ) {
-        let processed_instruction: MinimalInstruction<AB::Expr> = ctx.instruction.into();
-        let cols: &TestAdapterCols<AB::Var> = local.borrow();
-        self.execution_bridge
-            .execute_and_increment_or_set_pc(
-                processed_instruction.opcode,
-                cols.operands.to_vec(),
-                ExecutionState {
-                    pc: cols.from_pc.into(),
-                    timestamp: AB::Expr::ONE,
-                },
-                AB::Expr::ZERO,
-                (4, ctx.to_pc),
-            )
-            .eval(builder, processed_instruction.is_valid);
-    }
-
-    fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var {
-        let cols: &TestAdapterCols<AB::Var> = local.borrow();
-        cols.from_pc
-    }
-}
diff --git a/crates/vm/src/arch/vm.rs b/crates/vm/src/arch/vm.rs
index c9d5cb2ffc..a164e37a1e 100644
--- a/crates/vm/src/arch/vm.rs
+++ b/crates/vm/src/arch/vm.rs
@@ -1,60 +1,98 @@
+//! [VmExecutor] is the struct that can execute an _arbitrary_ program, provided in the form of a
+//! [VmExe], for a fixed set of OpenVM instructions corresponding to a [VmExecutionConfig].
+//! Internally once it is given a program, it will preprocess the program to rewrite it into a more
+//! optimized format for runtime execution. This **instance** of the executor will be a separate
+//! struct specialized to running a _fixed_ program on different program inputs.
+//!
+//! [VirtualMachine] will similarly be the struct that has done all the setup so it can
+//! execute+prove an arbitrary program for a fixed config - it will internally still hold VmExecutor
 use std::{
+    any::TypeId,
     borrow::Borrow,
     collections::{HashMap, VecDeque},
     marker::PhantomData,
-    mem,
     sync::Arc,
 };
 
+use getset::{Getters, MutGetters, Setters, WithSetters};
+use itertools::{zip_eq, Itertools};
 use openvm_circuit::system::program::trace::compute_exe_commit;
-use openvm_instructions::exe::VmExe;
+use openvm_instructions::{
+    exe::{SparseMemoryImage, VmExe},
+    program::Program,
+};
 use openvm_stark_backend::{
-    config::{Com, Domain, StarkGenericConfig, Val},
+    config::{Com, StarkGenericConfig, Val},
     engine::StarkEngine,
-    keygen::types::{LinearConstraint, MultiStarkProvingKey, MultiStarkVerifyingKey},
-    p3_commit::PolynomialSpace,
-    p3_field::{FieldAlgebra, PrimeField32},
+    keygen::types::{MultiStarkProvingKey, MultiStarkVerifyingKey},
+    p3_field::{FieldAlgebra, FieldExtensionAlgebra, PrimeField32, TwoAdicField},
+    p3_util::{log2_ceil_usize, log2_strict_usize},
     proof::Proof,
-    prover::types::{CommittedTraceData, ProofInput},
-    utils::metrics_span,
+    prover::{
+        hal::{DeviceDataTransporter, MatrixDimensions, TraceCommitter},
+        types::{CommittedTraceData, DeviceMultiStarkProvingKey, ProvingContext},
+    },
     verifier::VerificationError,
-    Chip,
 };
+use p3_baby_bear::BabyBear;
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
-use tracing::info_span;
+use tracing::{info_span, instrument};
 
 use super::{
-    ExecutionError, VmComplexTraceHeights, VmConfig, CONNECTOR_AIR_ID, MERKLE_AIR_ID,
-    PROGRAM_AIR_ID, PROGRAM_CACHED_TRACE_INDEX,
+    execution_mode::{ExecutionCtx, MeteredCostCtx, MeteredCtx, PreflightCtx, Segment},
+    hasher::poseidon2::vm_poseidon2_hasher,
+    interpreter::InterpretedInstance,
+    interpreter_preflight::PreflightInterpretedInstance,
+    AirInventoryError, ChipInventoryError, ExecutionError, ExecutionState, Executor,
+    ExecutorInventory, ExecutorInventoryError, MemoryConfig, MeteredExecutor, PreflightExecutor,
+    StaticProgramError, SystemConfig, VmBuilder, VmChipComplex, VmCircuitConfig, VmExecState,
+    VmExecutionConfig, VmState, CONNECTOR_AIR_ID, MERKLE_AIR_ID, PROGRAM_AIR_ID,
+    PROGRAM_CACHED_TRACE_INDEX, PUBLIC_VALUES_AIR_ID,
 };
-#[cfg(feature = "bench-metrics")]
-use crate::metrics::VmMetrics;
 use crate::{
-    arch::{hasher::poseidon2::vm_poseidon2_hasher, segment::ExecutionSegment},
+    arch::DEFAULT_RNG_SEED,
+    execute_spanned,
     system::{
         connector::{VmConnectorPvs, DEFAULT_SUSPEND_EXIT_CODE},
         memory::{
-            merkle::MemoryMerklePvs,
-            paged_vec::AddressMap,
-            tree::public_values::{UserPublicValuesProof, UserPublicValuesProofError},
-            MemoryImage, CHUNK,
+            adapter::records,
+            merkle::{
+                public_values::{UserPublicValuesProof, UserPublicValuesProofError},
+                MemoryMerklePvs,
+            },
+            online::{GuestMemory, TracingMemory},
+            AddressMap, CHUNK,
         },
-        program::trace::VmCommittedExe,
+        program::trace::{generate_cached_trace, VmCommittedExe},
+        SystemChipComplex, SystemRecords, SystemWithFixedTraceHeights,
     },
 };
 
 #[derive(Error, Debug)]
 pub enum GenerationError {
-    #[error("generated trace heights violate constraints")]
-    TraceHeightsLimitExceeded,
-    #[error(transparent)]
-    Execution(#[from] ExecutionError),
+    #[error("unexpected number of arenas: {actual} (expected num_airs={expected})")]
+    UnexpectedNumArenas { actual: usize, expected: usize },
+    #[error("trace height for air_idx={air_idx} must be fixed to {expected}, actual={actual}")]
+    ForceTraceHeightIncorrect {
+        air_idx: usize,
+        actual: usize,
+        expected: usize,
+    },
+    #[error("trace height of air {air_idx} has height {height} greater than maximum {max_height}")]
+    TraceHeightsLimitExceeded {
+        air_idx: usize,
+        height: usize,
+        max_height: usize,
+    },
+    #[error("trace heights violate linear constraint {constraint_idx} ({value} >= {threshold})")]
+    LinearTraceHeightConstraintExceeded {
+        constraint_idx: usize,
+        value: u64,
+        threshold: u32,
+    },
 }
 
-/// VM memory state for continuations.
-pub type VmMemoryState<F> = MemoryImage<F>;
-
 /// A trait for key-value store for `Streams`.
 pub trait KvStore: Send + Sync {
     fn get(&self, key: &[u8]) -> Option<&[u8]>;
@@ -105,11 +143,23 @@ impl<F> From<Vec<Vec<F>>> for Streams<F> {
     }
 }
 
-pub struct VmExecutor<F, VC> {
+/// Typedef for [PreflightInterpretedInstance] that is generic in `VC: VmExecutionConfig<F>`
+type PreflightInterpretedInstance2<F, VC> =
+    PreflightInterpretedInstance<F, <VC as VmExecutionConfig<F>>::Executor>;
+
+/// [VmExecutor] is the struct that can execute an _arbitrary_ program, provided in the form of a
+/// [VmExe], for a fixed set of OpenVM instructions corresponding to a [VmExecutionConfig].
+/// Internally once it is given a program, it will preprocess the program to rewrite it into a more
+/// optimized format for runtime execution. This **instance** of the executor will be a separate
+/// struct specialized to running a _fixed_ program on different program inputs.
+#[derive(Clone)]
+pub struct VmExecutor<F, VC>
+where
+    VC: VmExecutionConfig<F>,
+{
     pub config: VC,
-    pub overridden_heights: Option<VmComplexTraceHeights>,
-    pub trace_height_constraints: Vec<LinearConstraint>,
-    _marker: PhantomData<F>,
+    inventory: Arc<ExecutorInventory<VC::Executor>>,
+    phantom: PhantomData<F>,
 }
 
 #[repr(i32)]
@@ -119,395 +169,95 @@ pub enum ExitCode {
     Suspended = -1, // Continuations
 }
 
-pub struct VmExecutorResult<SC: StarkGenericConfig> {
-    pub per_segment: Vec<ProofInput<SC>>,
-    /// When VM is running on persistent mode, public values are stored in a special memory space.
-    pub final_memory: Option<VmMemoryState<Val<SC>>>,
-}
-
-pub struct VmExecutorNextSegmentState<F: PrimeField32> {
-    pub memory: MemoryImage<F>,
-    pub input: Streams<F>,
-    pub pc: u32,
-    #[cfg(feature = "bench-metrics")]
-    pub metrics: VmMetrics,
-}
-
-impl<F: PrimeField32> VmExecutorNextSegmentState<F> {
-    pub fn new(memory: MemoryImage<F>, input: impl Into<Streams<F>>, pc: u32) -> Self {
-        Self {
-            memory,
-            input: input.into(),
-            pc,
-            #[cfg(feature = "bench-metrics")]
-            metrics: VmMetrics::default(),
-        }
-    }
-}
-
-pub struct VmExecutorOneSegmentResult<F: PrimeField32, VC: VmConfig<F>> {
-    pub segment: ExecutionSegment<F, VC>,
-    pub next_state: Option<VmExecutorNextSegmentState<F>>,
+pub struct PreflightExecutionOutput<F, RA> {
+    pub system_records: SystemRecords<F>,
+    pub record_arenas: Vec<RA>,
+    pub to_state: VmState<F, GuestMemory>,
 }
 
 impl<F, VC> VmExecutor<F, VC>
 where
-    F: PrimeField32,
-    VC: VmConfig<F>,
+    VC: VmExecutionConfig<F>,
 {
     /// Create a new VM executor with a given config.
     ///
     /// The VM will start with a single segment, which is created from the initial state.
-    pub fn new(config: VC) -> Self {
-        Self::new_with_overridden_trace_heights(config, None)
-    }
-
-    pub fn set_override_trace_heights(&mut self, overridden_heights: VmComplexTraceHeights) {
-        self.overridden_heights = Some(overridden_heights);
-    }
-
-    pub fn new_with_overridden_trace_heights(
-        config: VC,
-        overridden_heights: Option<VmComplexTraceHeights>,
-    ) -> Self {
-        Self {
+    pub fn new(config: VC) -> Result<Self, ExecutorInventoryError> {
+        let inventory = config.create_executors()?;
+        Ok(Self {
             config,
-            overridden_heights,
-            trace_height_constraints: vec![],
-            _marker: Default::default(),
-        }
-    }
-
-    pub fn continuation_enabled(&self) -> bool {
-        self.config.system().continuation_enabled
-    }
-
-    /// Executes the program in segments.
-    /// After each segment is executed, call the provided closure on the execution result.
-    /// Returns the results from each closure, one per segment.
-    ///
-    /// The closure takes `f(segment_idx, segment) -> R`.
-    pub fn execute_and_then<R, E>(
-        &self,
-        exe: impl Into<VmExe<F>>,
-        input: impl Into<Streams<F>>,
-        mut f: impl FnMut(usize, ExecutionSegment<F, VC>) -> Result<R, E>,
-        map_err: impl Fn(ExecutionError) -> E,
-    ) -> Result<Vec<R>, E> {
-        let mem_config = self.config.system().memory_config;
-        let exe = exe.into();
-        let mut segment_results = vec![];
-        let memory = AddressMap::from_iter(
-            mem_config.as_offset,
-            1 << mem_config.as_height,
-            1 << mem_config.pointer_max_bits,
-            exe.init_memory.clone(),
-        );
-        let pc = exe.pc_start;
-        let mut state = VmExecutorNextSegmentState::new(memory, input, pc);
-
-        #[cfg(feature = "bench-metrics")]
-        {
-            state.metrics.fn_bounds = exe.fn_bounds.clone();
-        }
-
-        let mut segment_idx = 0;
-
-        loop {
-            let _span = info_span!("execute_segment", segment = segment_idx).entered();
-            let one_segment_result = self
-                .execute_until_segment(exe.clone(), state)
-                .map_err(&map_err)?;
-            segment_results.push(f(segment_idx, one_segment_result.segment)?);
-            if one_segment_result.next_state.is_none() {
-                break;
-            }
-            state = one_segment_result.next_state.unwrap();
-            segment_idx += 1;
-        }
-        tracing::debug!("Number of continuation segments: {}", segment_results.len());
-        #[cfg(feature = "bench-metrics")]
-        metrics::counter!("num_segments").absolute(segment_results.len() as u64);
-
-        Ok(segment_results)
-    }
-
-    pub fn execute_segments(
-        &self,
-        exe: impl Into<VmExe<F>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<Vec<ExecutionSegment<F, VC>>, ExecutionError> {
-        self.execute_and_then(exe, input, |_, seg| Ok(seg), |err| err)
-    }
-
-    /// Executes a program until a segmentation happens.
-    /// Returns the last segment and the vm state for next segment.
-    /// This is so that the tracegen and proving of this segment can be immediately started (on a
-    /// separate machine).
-    pub fn execute_until_segment(
-        &self,
-        exe: impl Into<VmExe<F>>,
-        from_state: VmExecutorNextSegmentState<F>,
-    ) -> Result<VmExecutorOneSegmentResult<F, VC>, ExecutionError> {
-        let exe = exe.into();
-        let mut segment = ExecutionSegment::new(
-            &self.config,
-            exe.program.clone(),
-            from_state.input,
-            Some(from_state.memory),
-            self.trace_height_constraints.clone(),
-            exe.fn_bounds.clone(),
-        );
-        #[cfg(feature = "bench-metrics")]
-        {
-            segment.metrics = from_state.metrics;
-        }
-        if let Some(overridden_heights) = self.overridden_heights.as_ref() {
-            segment.set_override_trace_heights(overridden_heights.clone());
-        }
-        let state = metrics_span("execute_time_ms", || segment.execute_from_pc(from_state.pc))?;
-
-        if state.is_terminated {
-            return Ok(VmExecutorOneSegmentResult {
-                segment,
-                next_state: None,
-            });
-        }
-
-        assert!(
-            self.continuation_enabled(),
-            "multiple segments require to enable continuations"
-        );
-        assert_eq!(
-            state.pc,
-            segment.chip_complex.connector_chip().boundary_states[1]
-                .unwrap()
-                .pc
-        );
-        let final_memory = mem::take(&mut segment.final_memory)
-            .expect("final memory should be set in continuations segment");
-        let streams = segment.chip_complex.take_streams();
-        #[cfg(feature = "bench-metrics")]
-        let metrics = segment.metrics.partial_take();
-        Ok(VmExecutorOneSegmentResult {
-            segment,
-            next_state: Some(VmExecutorNextSegmentState {
-                memory: final_memory,
-                input: streams,
-                pc: state.pc,
-                #[cfg(feature = "bench-metrics")]
-                metrics,
-            }),
+            inventory: Arc::new(inventory),
+            phantom: PhantomData,
         })
     }
+}
 
-    pub fn execute(
-        &self,
-        exe: impl Into<VmExe<F>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<Option<VmMemoryState<F>>, ExecutionError> {
-        let mut last = None;
-        self.execute_and_then(
-            exe,
-            input,
-            |_, seg| {
-                last = Some(seg);
-                Ok(())
-            },
-            |err| err,
-        )?;
-        let last = last.expect("at least one segment must be executed");
-        let final_memory = last.final_memory;
-        let end_state =
-            last.chip_complex.connector_chip().boundary_states[1].expect("end state must be set");
-        if end_state.is_terminate != 1 {
-            return Err(ExecutionError::DidNotTerminate);
-        }
-        if end_state.exit_code != ExitCode::Success as u32 {
-            return Err(ExecutionError::FailedWithExitCode(end_state.exit_code));
-        }
-        Ok(final_memory)
-    }
-
-    pub fn execute_and_generate<SC: StarkGenericConfig>(
-        &self,
-        exe: impl Into<VmExe<F>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<VmExecutorResult<SC>, GenerationError>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        self.execute_and_generate_impl(exe.into(), None, input)
-    }
-
-    pub fn execute_and_generate_with_cached_program<SC: StarkGenericConfig>(
+impl<F, VC> VmExecutor<F, VC>
+where
+    VC: VmExecutionConfig<F> + AsRef<SystemConfig>,
+{
+    pub fn build_metered_ctx(
         &self,
-        committed_exe: Arc<VmCommittedExe<SC>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<VmExecutorResult<SC>, GenerationError>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        self.execute_and_generate_impl(
-            committed_exe.exe.clone(),
-            Some(committed_exe.committed_program.clone()),
-            input,
+        constant_trace_heights: &[Option<usize>],
+        air_names: &[String],
+        widths: &[usize],
+        interactions: &[usize],
+    ) -> MeteredCtx {
+        MeteredCtx::new(
+            constant_trace_heights.to_vec(),
+            air_names.to_vec(),
+            widths.to_vec(),
+            interactions.to_vec(),
+            self.config.as_ref(),
         )
     }
 
-    fn execute_and_generate_impl<SC: StarkGenericConfig>(
-        &self,
-        exe: VmExe<F>,
-        committed_program: Option<CommittedTraceData<SC>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<VmExecutorResult<SC>, GenerationError>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let mut final_memory = None;
-        let per_segment = self.execute_and_then(
-            exe,
-            input,
-            |seg_idx, mut seg| {
-                // Note: this will only be Some on the last segment; otherwise it is
-                // already moved into next segment state
-                final_memory = mem::take(&mut seg.final_memory);
-                tracing::info_span!("trace_gen", segment = seg_idx)
-                    .in_scope(|| seg.generate_proof_input(committed_program.clone()))
-            },
-            GenerationError::Execution,
-        )?;
-
-        Ok(VmExecutorResult {
-            per_segment,
-            final_memory,
-        })
-    }
-
-    pub fn set_trace_height_constraints(&mut self, constraints: Vec<LinearConstraint>) {
-        self.trace_height_constraints = constraints;
+    pub fn build_metered_cost_ctx(&self, widths: &[usize]) -> MeteredCostCtx {
+        MeteredCostCtx::new(widths.to_vec(), self.config.as_ref())
     }
 }
 
-/// A single segment VM.
-pub struct SingleSegmentVmExecutor<F, VC> {
-    pub config: VC,
-    pub overridden_heights: Option<VmComplexTraceHeights>,
-    pub trace_height_constraints: Vec<LinearConstraint>,
-    _marker: PhantomData<F>,
-}
-
-/// Execution result of a single segment VM execution.
-pub struct SingleSegmentVmExecutionResult<F> {
-    /// All user public values
-    pub public_values: Vec<Option<F>>,
-    /// Heights of each AIR, ordered by AIR ID.
-    pub air_heights: Vec<usize>,
-    /// Heights of (SystemBase, Inventory), in an internal ordering.
-    pub vm_heights: VmComplexTraceHeights,
-}
-
-impl<F, VC> SingleSegmentVmExecutor<F, VC>
+impl<F, VC> VmExecutor<F, VC>
 where
     F: PrimeField32,
-    VC: VmConfig<F>,
+    VC: VmExecutionConfig<F>,
+    VC::Executor: Executor<F>,
 {
-    pub fn new(config: VC) -> Self {
-        Self::new_with_overridden_trace_heights(config, None)
-    }
-
-    pub fn new_with_overridden_trace_heights(
-        config: VC,
-        overridden_heights: Option<VmComplexTraceHeights>,
-    ) -> Self {
-        assert!(
-            !config.system().continuation_enabled,
-            "Single segment VM doesn't support continuation mode"
-        );
-        Self {
-            config,
-            overridden_heights,
-            trace_height_constraints: vec![],
-            _marker: Default::default(),
-        }
-    }
-
-    pub fn set_override_trace_heights(&mut self, overridden_heights: VmComplexTraceHeights) {
-        self.overridden_heights = Some(overridden_heights);
-    }
-
-    pub fn set_trace_height_constraints(&mut self, constraints: Vec<LinearConstraint>) {
-        self.trace_height_constraints = constraints;
-    }
-
-    /// Executes a program, compute the trace heights, and returns the public values.
-    pub fn execute_and_compute_heights(
+    /// Creates an instance of the interpreter specialized for pure execution, without metering, of
+    /// the given `exe`.
+    ///
+    /// For metered execution, use the [`metered_instance`](Self::metered_instance) constructor.
+    pub fn instance(
         &self,
-        exe: impl Into<VmExe<F>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<SingleSegmentVmExecutionResult<F>, ExecutionError> {
-        let segment = {
-            let mut segment = self.execute_impl(exe.into(), input.into())?;
-            segment.chip_complex.finalize_memory();
-            segment
-        };
-        let air_heights = segment.chip_complex.current_trace_heights();
-        let vm_heights = segment.chip_complex.get_internal_trace_heights();
-        let public_values = if let Some(pv_chip) = segment.chip_complex.public_values_chip() {
-            pv_chip.core.get_custom_public_values()
-        } else {
-            vec![]
-        };
-        Ok(SingleSegmentVmExecutionResult {
-            public_values,
-            air_heights,
-            vm_heights,
-        })
+        exe: &VmExe<F>,
+    ) -> Result<InterpretedInstance<F, ExecutionCtx>, StaticProgramError> {
+        InterpretedInstance::new(&self.inventory, exe)
     }
+}
 
-    /// Executes a program and returns its proof input.
-    pub fn execute_and_generate<SC: StarkGenericConfig>(
+impl<F, VC> VmExecutor<F, VC>
+where
+    F: PrimeField32,
+    VC: VmExecutionConfig<F>,
+    VC::Executor: MeteredExecutor<F>,
+{
+    /// Creates an instance of the interpreter specialized for metered execution of the given `exe`.
+    pub fn metered_instance(
         &self,
-        committed_exe: Arc<VmCommittedExe<SC>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<ProofInput<SC>, GenerationError>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-        VC::Executor: Chip<SC>,
-        VC::Periphery: Chip<SC>,
-    {
-        let segment = self.execute_impl(committed_exe.exe.clone(), input)?;
-        let proof_input = tracing::info_span!("trace_gen").in_scope(|| {
-            segment.generate_proof_input(Some(committed_exe.committed_program.clone()))
-        })?;
-        Ok(proof_input)
+        exe: &VmExe<F>,
+        executor_idx_to_air_idx: &[usize],
+    ) -> Result<InterpretedInstance<F, MeteredCtx>, StaticProgramError> {
+        InterpretedInstance::new_metered(&self.inventory, exe, executor_idx_to_air_idx)
     }
 
-    fn execute_impl(
+    /// Creates an instance of the interpreter specialized for cost metering execution of the given
+    /// `exe`.
+    pub fn metered_cost_instance(
         &self,
-        exe: VmExe<F>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<ExecutionSegment<F, VC>, ExecutionError> {
-        let pc_start = exe.pc_start;
-        let mut segment = ExecutionSegment::new(
-            &self.config,
-            exe.program.clone(),
-            input.into(),
-            None,
-            self.trace_height_constraints.clone(),
-            exe.fn_bounds.clone(),
-        );
-        if let Some(overridden_heights) = self.overridden_heights.as_ref() {
-            segment.set_override_trace_heights(overridden_heights.clone());
-        }
-        metrics_span("execute_time_ms", || segment.execute_from_pc(pc_start))?;
-        Ok(segment)
+        exe: &VmExe<F>,
+        executor_idx_to_air_idx: &[usize],
+    ) -> Result<InterpretedInstance<F, MeteredCostCtx>, StaticProgramError> {
+        InterpretedInstance::new_metered(&self.inventory, exe, executor_idx_to_air_idx)
     }
 }
 
@@ -519,6 +269,12 @@ pub enum VmVerificationError {
     #[error("program commit mismatch (index of mismatch proof: {index}")]
     ProgramCommitMismatch { index: usize },
 
+    #[error("exe commit mismatch (expected: {expected:?}, actual: {actual:?})")]
+    ExeCommitMismatch {
+        expected: [u32; CHUNK],
+        actual: [u32; CHUNK],
+    },
+
     #[error("initial pc mismatch (initial: {initial}, prev_final: {prev_final})")]
     InitialPcMismatch { initial: u32, prev_final: u32 },
 
@@ -534,6 +290,9 @@ pub enum VmVerificationError {
     #[error("AIR has unexpected public values (expected: {expected}, actual: {actual})")]
     UnexpectedPvs { expected: usize, actual: usize },
 
+    #[error("Invalid number of AIRs: expected at least 3, got {0}")]
+    NotEnoughAirs(usize),
+
     #[error("missing system AIR with ID {air_id}")]
     SystemAirMissing { air_id: usize },
 
@@ -544,122 +303,384 @@ pub enum VmVerificationError {
     UserPublicValuesError(#[from] UserPublicValuesProofError),
 }
 
-pub struct VirtualMachine<SC: StarkGenericConfig, E, VC> {
+#[derive(Error, Debug)]
+pub enum VirtualMachineError {
+    #[error("executor inventory error: {0}")]
+    ExecutorInventory(#[from] ExecutorInventoryError),
+    #[error("air inventory error: {0}")]
+    AirInventory(#[from] AirInventoryError),
+    #[error("chip inventory error: {0}")]
+    ChipInventory(#[from] ChipInventoryError),
+    #[error("static program error: {0}")]
+    StaticProgram(#[from] StaticProgramError),
+    #[error("execution error: {0}")]
+    Execution(#[from] ExecutionError),
+    #[error("trace generation error: {0}")]
+    Generation(#[from] GenerationError),
+    #[error("program committed trade data not loaded")]
+    ProgramIsNotCommitted,
+    #[error("verification error: {0}")]
+    Verification(#[from] VmVerificationError),
+}
+
+/// The [VirtualMachine] struct contains the API to generate proofs for _arbitrary_ programs for a
+/// fixed set of OpenVM instructions and a fixed VM circuit corresponding to those instructions. The
+/// API is specific to a particular [StarkEngine], which specifies a fixed [StarkGenericConfig] and
+/// [ProverBackend] via associated types. The [VmProverBuilder] also fixes the choice of
+/// `RecordArena` associated to the prover backend via an associated type.
+///
+/// In other words, this struct _is_ the zkVM.
+#[derive(Getters, MutGetters, Setters, WithSetters)]
+pub struct VirtualMachine<E, VB>
+where
+    E: StarkEngine,
+    VB: VmBuilder<E>,
+{
     /// Proving engine
     pub engine: E,
     /// Runtime executor
-    pub executor: VmExecutor<Val<SC>, VC>,
-    _marker: PhantomData<SC>,
+    #[getset(get = "pub")]
+    executor: VmExecutor<Val<E::SC>, VB::VmConfig>,
+    #[getset(get = "pub", get_mut = "pub")]
+    pk: DeviceMultiStarkProvingKey<E::PB>,
+    chip_complex: VmChipComplex<E::SC, VB::RecordArena, E::PB, VB::SystemChipInventory>,
+    #[cfg(feature = "stark-debug")]
+    pub h_pk: Option<MultiStarkProvingKey<E::SC>>,
 }
 
-impl<F, SC, E, VC> VirtualMachine<SC, E, VC>
+impl<E, VB> VirtualMachine<E, VB>
 where
-    F: PrimeField32,
-    SC: StarkGenericConfig,
-    E: StarkEngine<SC>,
-    Domain<SC>: PolynomialSpace<Val = F>,
-    VC: VmConfig<F>,
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
+    E: StarkEngine,
+    VB: VmBuilder<E>,
 {
-    pub fn new(engine: E, config: VC) -> Self {
-        let executor = VmExecutor::new(config);
-        Self {
+    pub fn new(
+        engine: E,
+        builder: VB,
+        config: VB::VmConfig,
+        d_pk: DeviceMultiStarkProvingKey<E::PB>,
+    ) -> Result<Self, VirtualMachineError> {
+        let circuit = config.create_airs()?;
+        let chip_complex = builder.create_chip_complex(&config, circuit)?;
+        let executor = VmExecutor::<Val<E::SC>, _>::new(config)?;
+        Ok(Self {
             engine,
             executor,
-            _marker: PhantomData,
-        }
+            pk: d_pk,
+            chip_complex,
+            #[cfg(feature = "stark-debug")]
+            h_pk: None,
+        })
     }
 
-    pub fn new_with_overridden_trace_heights(
+    pub fn new_with_keygen(
         engine: E,
-        config: VC,
-        overridden_heights: Option<VmComplexTraceHeights>,
-    ) -> Self {
-        let executor = VmExecutor::new_with_overridden_trace_heights(config, overridden_heights);
-        Self {
-            engine,
-            executor,
-            _marker: PhantomData,
-        }
+        builder: VB,
+        config: VB::VmConfig,
+    ) -> Result<(Self, MultiStarkProvingKey<E::SC>), VirtualMachineError> {
+        let circuit = config.create_airs()?;
+        let pk = circuit.keygen(&engine);
+        let d_pk = engine.device().transport_pk_to_device(&pk);
+        let vm = Self::new(engine, builder, config, d_pk)?;
+        Ok((vm, pk))
     }
 
-    pub fn config(&self) -> &VC {
+    pub fn config(&self) -> &VB::VmConfig {
         &self.executor.config
     }
 
-    pub fn keygen(&self) -> MultiStarkProvingKey<SC> {
-        let mut keygen_builder = self.engine.keygen_builder();
-        let chip_complex = self.config().create_chip_complex().unwrap();
-        for air in chip_complex.airs() {
-            keygen_builder.add_air(air);
-        }
-        keygen_builder.generate_pk()
-    }
-
-    pub fn set_trace_height_constraints(
-        &mut self,
-        trace_height_constraints: Vec<LinearConstraint>,
-    ) {
-        self.executor
-            .set_trace_height_constraints(trace_height_constraints);
-    }
-
-    pub fn commit_exe(&self, exe: impl Into<VmExe<F>>) -> Arc<VmCommittedExe<SC>> {
-        let exe = exe.into();
-        Arc::new(VmCommittedExe::commit(exe, self.engine.config().pcs()))
+    /// Pure interpreter.
+    pub fn interpreter(
+        &self,
+        exe: &VmExe<Val<E::SC>>,
+    ) -> Result<InterpretedInstance<Val<E::SC>, ExecutionCtx>, StaticProgramError>
+    where
+        Val<E::SC>: PrimeField32,
+        <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor: Executor<Val<E::SC>>,
+    {
+        self.executor().instance(exe)
     }
 
-    pub fn execute(
+    pub fn metered_interpreter(
         &self,
-        exe: impl Into<VmExe<F>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<Option<VmMemoryState<F>>, ExecutionError> {
-        self.executor.execute(exe, input)
+        exe: &VmExe<Val<E::SC>>,
+    ) -> Result<InterpretedInstance<Val<E::SC>, MeteredCtx>, StaticProgramError>
+    where
+        Val<E::SC>: PrimeField32,
+        <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor: MeteredExecutor<Val<E::SC>>,
+    {
+        let executor_idx_to_air_idx = self.executor_idx_to_air_idx();
+        self.executor()
+            .metered_instance(exe, &executor_idx_to_air_idx)
     }
 
-    pub fn execute_and_generate(
+    pub fn preflight_interpreter(
         &self,
-        exe: impl Into<VmExe<F>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<VmExecutorResult<SC>, GenerationError> {
-        self.executor.execute_and_generate(exe, input)
+        exe: &VmExe<Val<E::SC>>,
+    ) -> Result<PreflightInterpretedInstance2<Val<E::SC>, VB::VmConfig>, StaticProgramError> {
+        PreflightInterpretedInstance::new(
+            &exe.program,
+            self.executor.inventory.clone(),
+            self.executor_idx_to_air_idx(),
+        )
     }
 
-    pub fn execute_and_generate_with_cached_program(
+    /// Preflight execution for a single segment. Executes for exactly `num_insns` instructions
+    /// using an interpreter. Preflight execution must be provided with `trace_heights`
+    /// instrumentation data that was collected from a previous run of metered execution so that the
+    /// preflight execution knows how much memory to allocate for record arenas.
+    ///
+    /// This function should rarely be called on its own. Users are advised to call
+    /// [`prove`](Self::prove) directly.
+    #[instrument(name = "execute_preflight", skip_all)]
+    pub fn execute_preflight(
         &self,
-        committed_exe: Arc<VmCommittedExe<SC>>,
-        input: impl Into<Streams<F>>,
-    ) -> Result<VmExecutorResult<SC>, GenerationError>
+        interpreter: &mut PreflightInterpretedInstance2<Val<E::SC>, VB::VmConfig>,
+        state: VmState<Val<E::SC>, GuestMemory>,
+        num_insns: Option<u64>,
+        trace_heights: &[u32],
+    ) -> Result<PreflightExecutionOutput<Val<E::SC>, VB::RecordArena>, ExecutionError>
     where
-        Domain<SC>: PolynomialSpace<Val = F>,
+        Val<E::SC>: PrimeField32,
+        <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor:
+            PreflightExecutor<Val<E::SC>, VB::RecordArena>,
     {
-        self.executor
-            .execute_and_generate_with_cached_program(committed_exe, input)
+        debug_assert!(interpreter
+            .executor_idx_to_air_idx
+            .iter()
+            .all(|&air_idx| air_idx < trace_heights.len()));
+
+        let instret_end = num_insns.map(|ni| state.instret.saturating_add(ni));
+        // TODO[jpw]: figure out how to compute RA specific main_widths
+        let main_widths = self
+            .pk
+            .per_air
+            .iter()
+            .map(|pk| pk.vk.params.width.main_width())
+            .collect_vec();
+        let capacities = zip_eq(trace_heights, main_widths)
+            .map(|(&h, w)| (h as usize, w))
+            .collect::<Vec<_>>();
+        let ctx = PreflightCtx::new_with_capacity(&capacities, instret_end);
+
+        let system_config: &SystemConfig = self.config().as_ref();
+        let adapter_offset = system_config.access_adapter_air_id_offset();
+        // ATTENTION: this must agree with `num_memory_airs`
+        let num_adapters = log2_strict_usize(system_config.memory_config.max_access_adapter_n);
+        assert_eq!(adapter_offset + num_adapters, system_config.num_airs());
+        let access_adapter_arena_size_bound = records::arena_size_bound(
+            &trace_heights[adapter_offset..adapter_offset + num_adapters],
+        );
+        let memory = TracingMemory::from_image(
+            state.memory,
+            system_config.initial_block_size(),
+            access_adapter_arena_size_bound,
+        );
+        let from_state = ExecutionState::new(state.pc, memory.timestamp());
+        let vm_state = VmState {
+            instret: state.instret,
+            pc: state.pc,
+            memory,
+            streams: state.streams,
+            rng: state.rng,
+            custom_pvs: state.custom_pvs,
+            #[cfg(feature = "metrics")]
+            metrics: state.metrics,
+        };
+        let mut exec_state = VmExecState::new(vm_state, ctx);
+        interpreter.reset_execution_frequencies();
+        execute_spanned!("execute_preflight", interpreter, &mut exec_state)?;
+        let filtered_exec_frequencies = interpreter.filtered_execution_frequencies();
+        let touched_memory = exec_state
+            .vm_state
+            .memory
+            .finalize::<Val<E::SC>>(system_config.continuation_enabled);
+        #[cfg(feature = "perf-metrics")]
+        crate::metrics::end_segment_metrics(&mut exec_state);
+
+        let memory = exec_state.vm_state.memory;
+        let to_state = ExecutionState::new(exec_state.vm_state.pc, memory.timestamp());
+        let public_values = exec_state
+            .vm_state
+            .custom_pvs
+            .iter()
+            .map(|&x| x.unwrap_or(Val::<E::SC>::ZERO))
+            .collect();
+        let exit_code = exec_state.exit_code?;
+        let system_records = SystemRecords {
+            from_state,
+            to_state,
+            exit_code,
+            filtered_exec_frequencies,
+            access_adapter_records: memory.access_adapter_records,
+            touched_memory,
+            public_values,
+        };
+        let record_arenas = exec_state.ctx.arenas;
+        let to_state = VmState {
+            instret: exec_state.vm_state.instret,
+            pc: exec_state.vm_state.pc,
+            memory: memory.data,
+            streams: exec_state.vm_state.streams,
+            rng: exec_state.vm_state.rng,
+            custom_pvs: exec_state.vm_state.custom_pvs,
+            #[cfg(feature = "metrics")]
+            metrics: exec_state.vm_state.metrics,
+        };
+        Ok(PreflightExecutionOutput {
+            system_records,
+            record_arenas,
+            to_state,
+        })
     }
 
-    pub fn prove_single(
+    /// Calls [`VmState::initial`] but sets more information for
+    /// performance metrics when feature "perf-metrics" is enabled.
+    #[instrument(name = "vm.create_initial_state", level = "debug", skip_all)]
+    pub fn create_initial_state(
         &self,
-        pk: &MultiStarkProvingKey<SC>,
-        proof_input: ProofInput<SC>,
-    ) -> Proof<SC> {
-        self.engine.prove(pk, proof_input)
+        exe: &VmExe<Val<E::SC>>,
+        inputs: impl Into<Streams<Val<E::SC>>>,
+    ) -> VmState<Val<E::SC>, GuestMemory> {
+        #[allow(unused_mut)]
+        let mut state = VmState::initial(
+            self.config().as_ref(),
+            &exe.init_memory,
+            exe.pc_start,
+            inputs,
+        );
+        // Add backtrace information for either:
+        // - debugging
+        // - performance metrics
+        #[cfg(all(feature = "metrics", any(feature = "perf-metrics", debug_assertions)))]
+        {
+            state.metrics.fn_bounds = exe.fn_bounds.clone();
+            state.metrics.debug_infos = exe.program.debug_infos();
+        }
+        #[cfg(feature = "perf-metrics")]
+        {
+            state.metrics.set_pk_info(&self.pk);
+            state.metrics.num_sys_airs = self.config().as_ref().num_airs();
+            state.metrics.access_adapter_offset =
+                self.config().as_ref().access_adapter_air_id_offset();
+        }
+        state
     }
 
+    /// This function mutates `self` but should only depend on internal state in the sense that:
+    /// - program must already be loaded as cached trace via [`load_program`](Self::load_program).
+    /// - initial memory image was already sent to device via
+    ///   [`transport_init_memory_to_device`](Self::transport_init_memory_to_device).
+    /// - all other state should be given by `system_records` and `record_arenas`
+    #[instrument(name = "trace_gen", skip_all)]
+    pub fn generate_proving_ctx(
+        &mut self,
+        system_records: SystemRecords<Val<E::SC>>,
+        record_arenas: Vec<VB::RecordArena>,
+    ) -> Result<ProvingContext<E::PB>, GenerationError> {
+        #[cfg(feature = "metrics")]
+        let mut current_trace_heights =
+            self.get_trace_heights_from_arenas(&system_records, &record_arenas);
+        // main tracegen call:
+        let ctx = self
+            .chip_complex
+            .generate_proving_ctx(system_records, record_arenas)?;
+
+        // ==== Defensive checks that the trace heights satisfy the linear constraints: ====
+        let idx_trace_heights = ctx
+            .per_air
+            .iter()
+            .map(|(air_idx, ctx)| (*air_idx, ctx.main_trace_height()))
+            .collect_vec();
+        // 1. check max trace height isn't exceeded
+        let max_trace_height = if TypeId::of::<Val<E::SC>>() == TypeId::of::<BabyBear>() {
+            let min_log_blowup = log2_ceil_usize(self.config().as_ref().max_constraint_degree - 1);
+            1 << (BabyBear::TWO_ADICITY - min_log_blowup)
+        } else {
+            tracing::warn!(
+                "constructing VirtualMachine for unrecognized field; using max_trace_height=2^30"
+            );
+            1 << 30
+        };
+        if let Some(&(air_idx, height)) = idx_trace_heights
+            .iter()
+            .find(|(_, height)| *height > max_trace_height)
+        {
+            return Err(GenerationError::TraceHeightsLimitExceeded {
+                air_idx,
+                height,
+                max_height: max_trace_height,
+            });
+        }
+        // 2. check linear constraints on trace heights are satisfied
+        let trace_height_constraints = &self.pk.trace_height_constraints;
+        if trace_height_constraints.is_empty() {
+            tracing::warn!("generating proving context without trace height constraints");
+        }
+        for (i, constraint) in trace_height_constraints.iter().enumerate() {
+            let value = idx_trace_heights
+                .iter()
+                .map(|&(air_idx, h)| constraint.coefficients[air_idx] as u64 * h as u64)
+                .sum::<u64>();
+
+            if value >= constraint.threshold as u64 {
+                tracing::info!(
+                    "trace heights {:?} violate linear constraint {} ({} >= {})",
+                    idx_trace_heights,
+                    i,
+                    value,
+                    constraint.threshold
+                );
+                return Err(GenerationError::LinearTraceHeightConstraintExceeded {
+                    constraint_idx: i,
+                    value,
+                    threshold: constraint.threshold,
+                });
+            }
+        }
+        #[cfg(feature = "metrics")]
+        self.finalize_metrics(&mut current_trace_heights);
+        #[cfg(feature = "stark-debug")]
+        self.debug_proving_ctx(&ctx);
+
+        Ok(ctx)
+    }
+
+    /// Generates proof for zkVM execution for exactly `num_insns` instructions for a given program
+    /// and a given starting state.
+    ///
+    /// **Note**: The cached program trace must be loaded via [`load_program`](Self::load_program)
+    /// before calling this function.
+    ///
+    /// Returns:
+    /// - proof for the execution segment
+    /// - final memory state only if execution ends in successful termination (exit code 0). This
+    ///   final memory state may be used to extract user public values afterwards.
     pub fn prove(
-        &self,
-        pk: &MultiStarkProvingKey<SC>,
-        results: VmExecutorResult<SC>,
-    ) -> Vec<Proof<SC>> {
-        results
-            .per_segment
-            .into_iter()
-            .enumerate()
-            .map(|(seg_idx, proof_input)| {
-                tracing::info_span!("prove_segment", segment = seg_idx)
-                    .in_scope(|| self.engine.prove(pk, proof_input))
-            })
-            .collect()
+        &mut self,
+        interpreter: &mut PreflightInterpretedInstance2<Val<E::SC>, VB::VmConfig>,
+        state: VmState<Val<E::SC>, GuestMemory>,
+        num_insns: Option<u64>,
+        trace_heights: &[u32],
+    ) -> Result<(Proof<E::SC>, Option<GuestMemory>), VirtualMachineError>
+    where
+        Val<E::SC>: PrimeField32,
+        <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor:
+            PreflightExecutor<Val<E::SC>, VB::RecordArena>,
+    {
+        self.transport_init_memory_to_device(&state.memory);
+
+        let PreflightExecutionOutput {
+            system_records,
+            record_arenas,
+            to_state,
+        } = self.execute_preflight(interpreter, state, num_insns, trace_heights)?;
+        // drop final memory unless this is a terminal segment and the exit code is success
+        let final_memory =
+            (system_records.exit_code == Some(ExitCode::Success as u32)).then_some(to_state.memory);
+        let ctx = self.generate_proving_ctx(system_records, record_arenas)?;
+        let proof = self.engine.prove(&self.pk, ctx);
+
+        Ok((proof, final_memory))
     }
 
     /// Verify segment proofs, checking continuation boundary conditions between segments if VM
@@ -668,20 +689,359 @@ where
     /// or [`verify_single`] directly instead.
     pub fn verify(
         &self,
-        vk: &MultiStarkVerifyingKey<SC>,
-        proofs: Vec<Proof<SC>>,
+        vk: &MultiStarkVerifyingKey<E::SC>,
+        proofs: &[Proof<E::SC>],
     ) -> Result<(), VmVerificationError>
     where
-        Val<SC>: PrimeField32,
-        Com<SC>: AsRef<[Val<SC>; CHUNK]> + From<[Val<SC>; CHUNK]>,
+        Com<E::SC>: AsRef<[Val<E::SC>; CHUNK]> + From<[Val<E::SC>; CHUNK]>,
+        Val<E::SC>: PrimeField32,
     {
-        if self.config().system().continuation_enabled {
-            verify_segments(&self.engine, vk, &proofs).map(|_| ())
+        if self.config().as_ref().continuation_enabled {
+            verify_segments(&self.engine, vk, proofs).map(|_| ())
         } else {
             assert_eq!(proofs.len(), 1);
-            verify_single(&self.engine, vk, &proofs.into_iter().next().unwrap())
-                .map_err(VmVerificationError::StarkError)
+            verify_single(&self.engine, vk, &proofs[0]).map_err(VmVerificationError::StarkError)
+        }
+    }
+
+    /// Transforms the program into a cached trace and commits it _on device_ using the proof system
+    /// polynomial commitment scheme.
+    ///
+    /// Returns the cached program trace.
+    /// Note that [`load_program`](Self::load_program) must be called separately to load the cached
+    /// program trace into the VM itself.
+    pub fn commit_program_on_device(
+        &self,
+        program: &Program<Val<E::SC>>,
+    ) -> CommittedTraceData<E::PB> {
+        let trace = generate_cached_trace(program);
+        let d_trace = self
+            .engine
+            .device()
+            .transport_matrix_to_device(&Arc::new(trace));
+        let (commitment, data) = self.engine.device().commit(std::slice::from_ref(&d_trace));
+        CommittedTraceData {
+            commitment,
+            trace: d_trace,
+            data,
+        }
+    }
+
+    /// Convenience method to transport a host committed Exe to device. This can be used if you have
+    /// a pre-committed program and want to transport to device instead of re-committing. One should
+    /// benchmark the latency of this function versus
+    /// [`commit_program_on_device`](Self::commit_program_on_device), which directly re-commits on
+    /// device, to determine which method is more suitable.
+    pub fn transport_committed_exe_to_device(
+        &self,
+        committed_exe: &VmCommittedExe<E::SC>,
+    ) -> CommittedTraceData<E::PB> {
+        let commitment = committed_exe.get_program_commit();
+        let trace = &committed_exe.trace;
+        let prover_data = &committed_exe.prover_data;
+        self.engine
+            .device()
+            .transport_committed_trace_to_device(commitment, trace, prover_data)
+    }
+
+    /// Loads cached program trace into the VM.
+    pub fn load_program(&mut self, cached_program_trace: CommittedTraceData<E::PB>) {
+        self.chip_complex.system.load_program(cached_program_trace);
+    }
+
+    pub fn transport_init_memory_to_device(&mut self, memory: &GuestMemory) {
+        self.chip_complex
+            .system
+            .transport_init_memory_to_device(memory);
+    }
+
+    pub fn executor_idx_to_air_idx(&self) -> Vec<usize> {
+        let ret = self.chip_complex.inventory.executor_idx_to_air_idx();
+        tracing::debug!("executor_idx_to_air_idx: {:?}", ret);
+        assert_eq!(self.executor().inventory.executors().len(), ret.len());
+        ret
+    }
+
+    /// Convenience method to construct a [MeteredCtx] using data from the stored proving key.
+    pub fn build_metered_ctx(&self) -> MeteredCtx {
+        let (constant_trace_heights, air_names, widths, interactions): (
+            Vec<_>,
+            Vec<_>,
+            Vec<_>,
+            Vec<_>,
+        ) = self
+            .pk
+            .per_air
+            .iter()
+            .map(|pk| {
+                let constant_trace_height =
+                    pk.preprocessed_data.as_ref().map(|pd| pd.trace.height());
+                let air_names = pk.air_name.clone();
+                let width = pk
+                    .vk
+                    .params
+                    .width
+                    .total_width(<<E::SC as StarkGenericConfig>::Challenge>::D);
+                let num_interactions = pk.vk.symbolic_constraints.interactions.len();
+                (constant_trace_height, air_names, width, num_interactions)
+            })
+            .multiunzip();
+
+        self.executor().build_metered_ctx(
+            &constant_trace_heights,
+            &air_names,
+            &widths,
+            &interactions,
+        )
+    }
+
+    /// Convenience method to construct a [MeteredCostCtx] using data from the stored proving key.
+    pub fn build_metered_cost_ctx(&self) -> MeteredCostCtx {
+        let widths: Vec<_> = self
+            .pk
+            .per_air
+            .iter()
+            .map(|pk| {
+                pk.vk
+                    .params
+                    .width
+                    .total_width(<<E::SC as StarkGenericConfig>::Challenge>::D)
+            })
+            .collect();
+
+        self.executor().build_metered_cost_ctx(&widths)
+    }
+
+    pub fn num_airs(&self) -> usize {
+        let num_airs = self.pk.per_air.len();
+        debug_assert_eq!(num_airs, self.chip_complex.inventory.airs().num_airs());
+        num_airs
+    }
+
+    pub fn air_names(&self) -> impl Iterator<Item = &'_ str> {
+        self.pk.per_air.iter().map(|pk| pk.air_name.as_str())
+    }
+
+    /// See [`debug_proving_ctx`].
+    #[cfg(feature = "stark-debug")]
+    pub fn debug_proving_ctx(&mut self, ctx: &ProvingContext<E::PB>) {
+        if self.h_pk.is_none() {
+            let air_inv = self.config().create_airs().unwrap();
+            self.h_pk = Some(air_inv.keygen(&self.engine));
         }
+        let pk = self.h_pk.as_ref().unwrap();
+        debug_proving_ctx(self, pk, ctx);
+    }
+}
+
+#[derive(Serialize, Deserialize)]
+#[serde(bound(
+    serialize = "Com<SC>: Serialize",
+    deserialize = "Com<SC>: Deserialize<'de>"
+))]
+pub struct ContinuationVmProof<SC: StarkGenericConfig> {
+    pub per_segment: Vec<Proof<SC>>,
+    pub user_public_values: UserPublicValuesProof<{ CHUNK }, Val<SC>>,
+}
+
+/// Prover for a specific exe in a specific continuation VM using a specific Stark config.
+pub trait ContinuationVmProver<SC: StarkGenericConfig> {
+    fn prove(
+        &mut self,
+        input: impl Into<Streams<Val<SC>>>,
+    ) -> Result<ContinuationVmProof<SC>, VirtualMachineError>;
+}
+
+/// Prover for a specific exe in a specific single-segment VM using a specific Stark config.
+///
+/// Does not run metered execution and directly runs preflight execution. The `prove` function must
+/// be provided with the expected maximum `trace_heights` to use to allocate record arena
+/// capacities.
+pub trait SingleSegmentVmProver<SC: StarkGenericConfig> {
+    fn prove(
+        &mut self,
+        input: impl Into<Streams<Val<SC>>>,
+        trace_heights: &[u32],
+    ) -> Result<Proof<SC>, VirtualMachineError>;
+}
+
+/// Virtual machine prover instance for a fixed VM config and a fixed program. For use in proving a
+/// program directly on bare metal.
+///
+/// This struct contains the [VmState] itself to avoid re-allocating guest memory. The memory is
+/// reset with zeros before execution.
+#[derive(Getters, MutGetters)]
+pub struct VmInstance<E, VB>
+where
+    E: StarkEngine,
+    VB: VmBuilder<E>,
+{
+    pub vm: VirtualMachine<E, VB>,
+    pub interpreter: PreflightInterpretedInstance2<Val<E::SC>, VB::VmConfig>,
+    #[getset(get = "pub")]
+    program_commitment: Com<E::SC>,
+    #[getset(get = "pub")]
+    exe: Arc<VmExe<Val<E::SC>>>,
+    #[getset(get = "pub", get_mut = "pub")]
+    state: Option<VmState<Val<E::SC>, GuestMemory>>,
+}
+
+impl<E, VB> VmInstance<E, VB>
+where
+    E: StarkEngine,
+    VB: VmBuilder<E>,
+{
+    pub fn new(
+        mut vm: VirtualMachine<E, VB>,
+        exe: Arc<VmExe<Val<E::SC>>>,
+        cached_program_trace: CommittedTraceData<E::PB>,
+    ) -> Result<Self, StaticProgramError> {
+        let program_commitment = cached_program_trace.commitment.clone();
+        vm.load_program(cached_program_trace);
+        let interpreter = vm.preflight_interpreter(&exe)?;
+        let state = vm.create_initial_state(&exe, vec![]);
+        Ok(Self {
+            vm,
+            interpreter,
+            program_commitment,
+            exe,
+            state: Some(state),
+        })
+    }
+
+    #[instrument(name = "vm.reset_state", level = "debug", skip_all)]
+    pub fn reset_state(&mut self, inputs: impl Into<Streams<Val<E::SC>>>) {
+        self.state
+            .as_mut()
+            .unwrap()
+            .reset(&self.exe.init_memory, self.exe.pc_start, inputs);
+    }
+}
+
+impl<E, VB> ContinuationVmProver<E::SC> for VmInstance<E, VB>
+where
+    E: StarkEngine,
+    Val<E::SC>: PrimeField32,
+    VB: VmBuilder<E>,
+    <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor: Executor<Val<E::SC>>
+        + MeteredExecutor<Val<E::SC>>
+        + PreflightExecutor<Val<E::SC>, VB::RecordArena>,
+{
+    /// First performs metered execution (E2) to determine segments. Then sequentially proves each
+    /// segment. The proof for each segment uses the specified [ProverBackend], but the proof for
+    /// the next segment does not start before the current proof finishes.
+    fn prove(
+        &mut self,
+        input: impl Into<Streams<Val<E::SC>>>,
+    ) -> Result<ContinuationVmProof<E::SC>, VirtualMachineError> {
+        self.prove_continuations(input, |_, _| {})
+    }
+}
+
+impl<E, VB> VmInstance<E, VB>
+where
+    E: StarkEngine,
+    Val<E::SC>: PrimeField32,
+    VB: VmBuilder<E>,
+    <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor: Executor<Val<E::SC>>
+        + MeteredExecutor<Val<E::SC>>
+        + PreflightExecutor<Val<E::SC>, VB::RecordArena>,
+{
+    /// For internal use to resize trace matrices before proving.
+    ///
+    /// The closure `modify_ctx(seg_idx, &mut ctx)` is called sequentially for each segment.
+    pub fn prove_continuations(
+        &mut self,
+        input: impl Into<Streams<Val<E::SC>>>,
+        mut modify_ctx: impl FnMut(usize, &mut ProvingContext<E::PB>),
+    ) -> Result<ContinuationVmProof<E::SC>, VirtualMachineError> {
+        let input = input.into();
+        self.reset_state(input.clone());
+        let vm = &mut self.vm;
+        let metered_ctx = vm.build_metered_ctx();
+        let metered_interpreter = vm.metered_interpreter(&self.exe)?;
+        let (segments, _) = metered_interpreter.execute_metered(input, metered_ctx)?;
+        let mut proofs = Vec::with_capacity(segments.len());
+        let mut state = self.state.take();
+        for (seg_idx, segment) in segments.into_iter().enumerate() {
+            let _segment_span = info_span!("prove_segment", segment = seg_idx).entered();
+            // We need a separate span so the metric label includes "segment" from _segment_span
+            let _prove_span = info_span!("total_proof").entered();
+            let Segment {
+                instret_start,
+                num_insns,
+                trace_heights,
+            } = segment;
+            assert_eq!(state.as_ref().unwrap().instret, instret_start);
+            let from_state = Option::take(&mut state).unwrap();
+            vm.transport_init_memory_to_device(&from_state.memory);
+            let PreflightExecutionOutput {
+                system_records,
+                record_arenas,
+                to_state,
+            } = vm.execute_preflight(
+                &mut self.interpreter,
+                from_state,
+                Some(num_insns),
+                &trace_heights,
+            )?;
+            state = Some(to_state);
+
+            let mut ctx = vm.generate_proving_ctx(system_records, record_arenas)?;
+            modify_ctx(seg_idx, &mut ctx);
+            let proof = vm.engine.prove(vm.pk(), ctx);
+            proofs.push(proof);
+        }
+        let to_state = state.unwrap();
+        let final_memory = &to_state.memory.memory;
+        let user_public_values = UserPublicValuesProof::compute(
+            vm.config().as_ref().memory_config.memory_dimensions(),
+            vm.config().as_ref().num_public_values,
+            &vm_poseidon2_hasher(),
+            final_memory,
+        );
+        self.state = Some(to_state);
+        Ok(ContinuationVmProof {
+            per_segment: proofs,
+            user_public_values,
+        })
+    }
+}
+
+impl<E, VB> SingleSegmentVmProver<E::SC> for VmInstance<E, VB>
+where
+    E: StarkEngine,
+    Val<E::SC>: PrimeField32,
+    VB: VmBuilder<E>,
+    <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor:
+        PreflightExecutor<Val<E::SC>, VB::RecordArena>,
+{
+    #[instrument(name = "total_proof", skip_all)]
+    fn prove(
+        &mut self,
+        input: impl Into<Streams<Val<E::SC>>>,
+        trace_heights: &[u32],
+    ) -> Result<Proof<E::SC>, VirtualMachineError> {
+        self.reset_state(input);
+        let vm = &mut self.vm;
+        let exe = &self.exe;
+        assert!(!vm.config().as_ref().continuation_enabled);
+        let mut trace_heights = trace_heights.to_vec();
+        trace_heights[PUBLIC_VALUES_AIR_ID] = vm.config().as_ref().num_public_values as u32;
+        let state = self.state.take().expect("State should always be present");
+        let num_custom_pvs = state.custom_pvs.len();
+        let (proof, final_memory) = vm.prove(&mut self.interpreter, state, None, &trace_heights)?;
+        let final_memory = final_memory.ok_or(ExecutionError::DidNotTerminate)?;
+        // Put back state to avoid re-allocation
+        self.state = Some(VmState::new(
+            0,
+            exe.pc_start,
+            final_memory,
+            vec![],
+            DEFAULT_RNG_SEED,
+            num_custom_pvs,
+        ));
+        Ok(proof)
     }
 }
 
@@ -690,14 +1050,13 @@ where
 /// ## Note
 /// This function does not check any public values or extract the starting pc or commitment
 /// to the [VmCommittedExe].
-pub fn verify_single<SC, E>(
+pub fn verify_single<E>(
     engine: &E,
-    vk: &MultiStarkVerifyingKey<SC>,
-    proof: &Proof<SC>,
+    vk: &MultiStarkVerifyingKey<E::SC>,
+    proof: &Proof<E::SC>,
 ) -> Result<(), VerificationError>
 where
-    SC: StarkGenericConfig,
-    E: StarkEngine<SC>,
+    E: StarkEngine,
 {
     engine.verify(vk, proof)
 }
@@ -732,16 +1091,15 @@ pub struct VerifiedExecutionPayload<F> {
 /// This verification requires an additional Merkle proof with respect to the Merkle root of
 /// the final memory state.
 // @dev: This function doesn't need to be generic in `VC`.
-pub fn verify_segments<SC, E>(
+pub fn verify_segments<E>(
     engine: &E,
-    vk: &MultiStarkVerifyingKey<SC>,
-    proofs: &[Proof<SC>],
-) -> Result<VerifiedExecutionPayload<Val<SC>>, VmVerificationError>
+    vk: &MultiStarkVerifyingKey<E::SC>,
+    proofs: &[Proof<E::SC>],
+) -> Result<VerifiedExecutionPayload<Val<E::SC>>, VmVerificationError>
 where
-    SC: StarkGenericConfig,
-    E: StarkEngine<SC>,
-    Val<SC>: PrimeField32,
-    Com<SC>: AsRef<[Val<SC>; CHUNK]>,
+    E: StarkEngine,
+    Val<E::SC>: PrimeField32,
+    Com<E::SC>: AsRef<[Val<E::SC>; CHUNK]>,
 {
     if proofs.is_empty() {
         return Err(VmVerificationError::ProofNotFound);
@@ -865,16 +1223,6 @@ where
     })
 }
 
-#[derive(Serialize, Deserialize)]
-#[serde(bound(
-    serialize = "Com<SC>: Serialize",
-    deserialize = "Com<SC>: Deserialize<'de>"
-))]
-pub struct ContinuationVmProof<SC: StarkGenericConfig> {
-    pub per_segment: Vec<Proof<SC>>,
-    pub user_public_values: UserPublicValuesProof<{ CHUNK }, Val<SC>>,
-}
-
 impl<SC: StarkGenericConfig> Clone for ContinuationVmProof<SC>
 where
     Com<SC>: Clone,
@@ -886,3 +1234,154 @@ where
         }
     }
 }
+
+pub(super) fn create_memory_image(
+    memory_config: &MemoryConfig,
+    init_memory: &SparseMemoryImage,
+) -> GuestMemory {
+    let mut inner = AddressMap::new(memory_config.addr_spaces.clone());
+    inner.set_from_sparse(init_memory);
+    GuestMemory::new(inner)
+}
+
+impl<E, VC> VirtualMachine<E, VC>
+where
+    E: StarkEngine,
+    VC: VmBuilder<E>,
+    VC::SystemChipInventory: SystemWithFixedTraceHeights,
+{
+    /// Sets fixed trace heights for the system AIRs' trace matrices.
+    pub fn override_system_trace_heights(&mut self, heights: &[u32]) {
+        let num_sys_airs = self.config().as_ref().num_airs();
+        assert!(heights.len() >= num_sys_airs);
+        self.chip_complex
+            .system
+            .override_trace_heights(&heights[..num_sys_airs]);
+    }
+}
+
+/// Runs the STARK backend debugger to check the constraints against the trace matrices
+/// logically, instead of cryptographically. This will panic if any constraint is violated, and
+/// using `RUST_BACKTRACE=1` can be used to read the stack backtrace of where the constraint
+/// failed in the code (this requires the code to be compiled with debug=true). Using lower
+/// optimization levels like -O0 will prevent the compiler from inlining and give better
+/// debugging information.
+// @dev The debugger needs the host proving key.
+//      This function is used both by VirtualMachine::debug_proving_ctx and by
+// stark_utils::air_test_impl
+#[cfg(any(debug_assertions, feature = "test-utils", feature = "stark-debug"))]
+#[tracing::instrument(level = "debug", skip_all)]
+pub fn debug_proving_ctx<E, VB>(
+    vm: &VirtualMachine<E, VB>,
+    pk: &MultiStarkProvingKey<E::SC>,
+    ctx: &ProvingContext<E::PB>,
+) where
+    E: StarkEngine,
+    VB: VmBuilder<E>,
+{
+    use itertools::multiunzip;
+    use openvm_stark_backend::prover::types::AirProofRawInput;
+
+    let device = vm.engine.device();
+    let air_inv = vm.config().create_airs().unwrap();
+    let global_airs = air_inv.into_airs().collect_vec();
+    let (airs, pks, proof_inputs): (Vec<_>, Vec<_>, Vec<_>) =
+        multiunzip(ctx.per_air.iter().map(|(air_id, air_ctx)| {
+            // Transfer from device **back** to host so the debugger can read the data.
+            let cached_mains = air_ctx
+                .cached_mains
+                .iter()
+                .map(|pre| device.transport_matrix_from_device_to_host(&pre.trace))
+                .collect_vec();
+            let common_main = air_ctx
+                .common_main
+                .as_ref()
+                .map(|m| device.transport_matrix_from_device_to_host(m));
+            let public_values = air_ctx.public_values.clone();
+            let raw = AirProofRawInput {
+                cached_mains,
+                common_main,
+                public_values,
+            };
+            (
+                global_airs[*air_id].clone(),
+                pk.per_air[*air_id].clone(),
+                raw,
+            )
+        }));
+    vm.engine.debug(&airs, &pks, &proof_inputs);
+}
+
+#[cfg(feature = "metrics")]
+mod vm_metrics {
+    use std::iter::zip;
+
+    use metrics::counter;
+
+    use super::*;
+    use crate::arch::Arena;
+
+    impl<E, VB> VirtualMachine<E, VB>
+    where
+        E: StarkEngine,
+        VB: VmBuilder<E>,
+    {
+        /// Assumed that `record_arenas` has length equal to number of AIRs.
+        ///
+        /// Best effort calculation of the used trace heights per chip without padding to powers of
+        /// two. This is best effort because some periphery chips may not have record arenas to
+        /// instrument. This function includes the constant trace heights, and the used height of
+        /// the program trace. It does not include the memory access adapter trace heights,
+        /// which is included in `SystemChipComplex::finalize_trace_heights`.
+        pub(crate) fn get_trace_heights_from_arenas(
+            &self,
+            system_records: &SystemRecords<Val<E::SC>>,
+            record_arenas: &[VB::RecordArena],
+        ) -> Vec<usize> {
+            let num_airs = self.num_airs();
+            assert_eq!(num_airs, record_arenas.len());
+            let mut heights: Vec<usize> = record_arenas
+                .iter()
+                .map(|arena| arena.current_trace_height())
+                .collect();
+            // If there are any constant trace heights, set them
+            for (pk, height) in zip(&self.pk.per_air, &mut heights) {
+                if let Some(constant_height) =
+                    pk.preprocessed_data.as_ref().map(|pd| pd.trace.height())
+                {
+                    *height = constant_height;
+                }
+            }
+            // Program chip used height
+            heights[PROGRAM_AIR_ID] = system_records.filtered_exec_frequencies.len();
+
+            heights
+        }
+
+        /// Update used trace heights after tracegen is done (primarily updating memory-related
+        /// metrics) and then emit the final metrics.
+        pub(crate) fn finalize_metrics(&self, heights: &mut [usize]) {
+            self.chip_complex.system.finalize_trace_heights(heights);
+            let mut main_cells_used = 0usize;
+            let mut total_cells_used = 0usize;
+            for (pk, height) in zip(&self.pk.per_air, heights.iter()) {
+                let width = &pk.vk.params.width;
+                main_cells_used += width.main_width() * *height;
+                total_cells_used +=
+                    width.total_width(<E::SC as StarkGenericConfig>::Challenge::D) * *height;
+            }
+            tracing::debug!(?heights);
+            tracing::info!(main_cells_used, total_cells_used);
+            counter!("main_cells_used").absolute(main_cells_used as u64);
+            counter!("total_cells_used").absolute(total_cells_used as u64);
+
+            #[cfg(feature = "perf-metrics")]
+            {
+                for (name, value) in zip(self.air_names(), heights) {
+                    let labels = [("air_name", name.to_string())];
+                    counter!("rows_used", &labels).absolute(*value as u64);
+                }
+            }
+        }
+    }
+}
diff --git a/crates/vm/src/lib.rs b/crates/vm/src/lib.rs
index 2e3ba461c5..271ea04b82 100644
--- a/crates/vm/src/lib.rs
+++ b/crates/vm/src/lib.rs
@@ -8,7 +8,7 @@ pub use openvm_stark_sdk;
 /// Traits and constructs for the OpenVM architecture.
 pub mod arch;
 /// Instrumentation metrics for performance analysis and debugging
-#[cfg(feature = "bench-metrics")]
+#[cfg(feature = "metrics")]
 pub mod metrics;
 /// System chips that are always required by the architecture.
 /// (The [PhantomChip](system::phantom::PhantomChip) is not technically required for a functioning
diff --git a/crates/vm/src/metrics/cycle_tracker/mod.rs b/crates/vm/src/metrics/cycle_tracker/mod.rs
index b1ef065451..3d989bc44b 100644
--- a/crates/vm/src/metrics/cycle_tracker/mod.rs
+++ b/crates/vm/src/metrics/cycle_tracker/mod.rs
@@ -46,7 +46,7 @@ impl CycleTracker {
     }
 }
 
-#[cfg(feature = "bench-metrics")]
+#[cfg(feature = "metrics")]
 mod emit {
     use metrics::counter;
 
diff --git a/crates/vm/src/metrics/mod.rs b/crates/vm/src/metrics/mod.rs
index 916e8251ac..7cb812a196 100644
--- a/crates/vm/src/metrics/mod.rs
+++ b/crates/vm/src/metrics/mod.rs
@@ -1,70 +1,168 @@
 use std::{collections::BTreeMap, mem};
 
+use backtrace::Backtrace;
 use cycle_tracker::CycleTracker;
+use itertools::Itertools;
 use metrics::counter;
 use openvm_instructions::{
     exe::{FnBound, FnBounds},
-    VmOpcode,
+    program::ProgramDebugInfo,
 };
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::prover::{hal::ProverBackend, types::DeviceMultiStarkProvingKey};
 
-use crate::arch::{ExecutionSegment, InstructionExecutor, VmConfig};
+use crate::{
+    arch::{
+        execution_mode::PreflightCtx, interpreter_preflight::PcEntry, Arena, PreflightExecutor,
+        VmExecState,
+    },
+    system::memory::online::TracingMemory,
+};
 
 pub mod cycle_tracker;
 
 #[derive(Clone, Debug, Default)]
 pub struct VmMetrics {
-    pub cycle_count: usize,
-    pub chip_heights: Vec<(String, usize)>,
+    // Static info
+    pub air_names: Vec<String>,
+    pub debug_infos: ProgramDebugInfo,
+    #[cfg(feature = "perf-metrics")]
+    pub(crate) num_sys_airs: usize,
+    #[cfg(feature = "perf-metrics")]
+    pub(crate) access_adapter_offset: usize,
+    pub(crate) main_widths: Vec<usize>,
+    pub(crate) total_widths: Vec<usize>,
+
+    // Dynamic stats
     /// Maps (dsl_ir, opcode) to number of times opcode was executed
     pub counts: BTreeMap<(Option<String>, String), usize>,
     /// Maps (dsl_ir, opcode, air_name) to number of trace cells generated by opcode
     pub trace_cells: BTreeMap<(Option<String>, String, String), usize>,
-    /// Metric collection tools. Only collected when `config.profiling` is true.
+    /// Metric collection tools. Only collected when "perf-metrics" feature is enabled.
     pub cycle_tracker: CycleTracker,
+
+    pub(crate) current_trace_cells: Vec<usize>,
+
+    /// Backtrace for guest debug panic display
+    pub prev_backtrace: Option<Backtrace>,
     #[allow(dead_code)]
     pub(crate) fn_bounds: FnBounds,
     /// Cycle span by function if function start/end addresses are available
     #[allow(dead_code)]
     pub(crate) current_fn: FnBound,
-    pub(crate) current_trace_cells: Vec<usize>,
 }
 
-impl<F, VC> ExecutionSegment<F, VC>
+/// We assume this will be called after execute_instruction, so less error-handling is needed.
+#[allow(unused_variables)]
+#[inline(always)]
+pub fn update_instruction_metrics<F, RA, Executor>(
+    state: &mut VmExecState<F, TracingMemory, PreflightCtx<RA>>,
+    executor: &Executor,
+    prev_pc: u32, // the pc of the instruction executed, state.pc is next pc
+    pc_entry: &PcEntry<F>,
+) where
+    F: Clone + Send + Sync,
+    RA: Arena,
+    Executor: PreflightExecutor<F, RA>,
+{
+    #[cfg(any(debug_assertions, feature = "perf-metrics"))]
+    {
+        let pc = state.pc;
+        state.metrics.update_backtrace(pc);
+    }
+
+    #[cfg(feature = "perf-metrics")]
+    {
+        use std::iter::zip;
+
+        let pc = state.pc;
+        let opcode = pc_entry.insn.opcode;
+        let opcode_name = executor.get_opcode_name(opcode.as_usize());
+
+        let debug_info = state.metrics.debug_infos.get(prev_pc);
+        let dsl_instr = debug_info.as_ref().map(|info| info.dsl_instruction.clone());
+
+        let now_trace_heights: Vec<usize> = state
+            .ctx
+            .arenas
+            .iter()
+            .map(|arena| arena.current_trace_height())
+            .collect();
+        let now_trace_cells = zip(&state.metrics.main_widths, &now_trace_heights)
+            .map(|(main_width, h)| main_width * h)
+            .collect_vec();
+        state
+            .metrics
+            .update_trace_cells(now_trace_cells, opcode_name, dsl_instr);
+
+        state.metrics.update_current_fn(pc);
+    }
+}
+
+// Memory access adapter height calculation is slow, so only do it if this is the end of
+// execution.
+// We also clear the current trace cell counts so there aren't negative diffs at the start of the
+// next segment.
+#[cfg(feature = "perf-metrics")]
+pub fn end_segment_metrics<F, RA>(state: &mut VmExecState<F, TracingMemory, PreflightCtx<RA>>)
 where
-    F: PrimeField32,
-    VC: VmConfig<F>,
+    F: Clone + Send + Sync,
+    RA: Arena,
 {
-    /// Update metrics that increment per instruction
-    #[allow(unused_variables)]
-    pub fn update_instruction_metrics(
-        &mut self,
-        pc: u32,
-        opcode: VmOpcode,
-        dsl_instr: Option<String>,
+    use std::iter::zip;
+
+    use crate::system::memory::adapter::AccessAdapterInventory;
+
+    let access_adapter_offset = state.metrics.access_adapter_offset;
+    let num_sys_airs = state.metrics.num_sys_airs;
+    let mut now_heights = vec![0; num_sys_airs - access_adapter_offset];
+    AccessAdapterInventory::<F>::compute_heights_from_arena(
+        &state.memory.access_adapter_records,
+        &mut now_heights,
+    );
+    let now_trace_cells = zip(
+        &state.metrics.main_widths[access_adapter_offset..],
+        &now_heights,
+    )
+    .map(|(main_width, h)| main_width * h)
+    .collect_vec();
+    for (air_name, &now_value) in itertools::izip!(
+        &state.metrics.air_names[access_adapter_offset..],
+        &now_trace_cells,
     ) {
-        self.metrics.cycle_count += 1;
-
-        if self.system_config().profiling {
-            let executor = self.chip_complex.inventory.get_executor(opcode).unwrap();
-            let opcode_name = executor.get_opcode_name(opcode.as_usize());
-            self.metrics.update_trace_cells(
-                &self.air_names,
-                self.current_trace_cells(),
-                opcode_name,
-                dsl_instr,
-            );
-
-            #[cfg(feature = "function-span")]
-            self.metrics.update_current_fn(pc);
+        if now_value != 0 {
+            let labels = [
+                ("air_name", air_name.clone()),
+                ("opcode", String::default()),
+                ("dsl_ir", String::default()),
+                ("cycle_tracker_span", "memory_access_adapters".to_owned()),
+            ];
+            counter!("cells_used", &labels).increment(now_value as u64);
         }
     }
+    state.metrics.current_trace_cells.fill(0);
 }
 
 impl VmMetrics {
-    fn update_trace_cells(
+    pub fn set_pk_info<PB: ProverBackend>(&mut self, pk: &DeviceMultiStarkProvingKey<PB>) {
+        let (air_names, main_widths, total_widths): (Vec<_>, Vec<_>, Vec<_>) = pk
+            .per_air
+            .iter()
+            .map(|pk| {
+                let air_names = pk.air_name.clone();
+                let width = &pk.vk.params.width;
+                let main_width = width.main_width();
+                let total_width = width.total_width(PB::CHALLENGE_EXT_DEGREE as usize);
+                (air_names, main_width, total_width)
+            })
+            .multiunzip();
+        self.air_names = air_names;
+        self.main_widths = main_widths;
+        self.total_widths = total_widths;
+        self.current_trace_cells = vec![0; self.air_names.len()];
+    }
+
+    pub fn update_trace_cells(
         &mut self,
-        air_names: &[String],
         now_trace_cells: Vec<usize>,
         opcode_name: String,
         dsl_instr: Option<String>,
@@ -74,7 +172,7 @@ impl VmMetrics {
         *self.counts.entry(key.clone()).or_insert(0) += 1;
 
         for (air_name, now_value, prev_value) in
-            itertools::izip!(air_names, &now_trace_cells, &self.current_trace_cells)
+            itertools::izip!(&self.air_names, &now_trace_cells, &self.current_trace_cells)
         {
             if prev_value != now_value {
                 let key = (key.0.clone(), key.1.clone(), air_name.to_owned());
@@ -104,8 +202,17 @@ impl VmMetrics {
         *self = self.partial_take();
     }
 
-    #[cfg(feature = "function-span")]
-    fn update_current_fn(&mut self, pc: u32) {
+    #[cfg(any(debug_assertions, feature = "perf-metrics"))]
+    pub fn update_backtrace(&mut self, pc: u32) {
+        if let Some(info) = self.debug_infos.get(pc) {
+            if let Some(trace) = &info.trace {
+                self.prev_backtrace = Some(trace.clone());
+            }
+        }
+    }
+
+    #[cfg(feature = "perf-metrics")]
+    pub(super) fn update_current_fn(&mut self, pc: u32) {
         if self.fn_bounds.is_empty() {
             return;
         }
@@ -130,11 +237,6 @@ impl VmMetrics {
     }
 
     pub fn emit(&self) {
-        for (name, value) in self.chip_heights.iter() {
-            let labels = [("chip_name", name.clone())];
-            counter!("rows_used", &labels).absolute(*value as u64);
-        }
-
         for ((dsl_ir, opcode), value) in self.counts.iter() {
             let labels = [
                 ("dsl_ir", dsl_ir.clone().unwrap_or_else(String::new)),
diff --git a/crates/vm/src/system/connector/mod.rs b/crates/vm/src/system/connector/mod.rs
index 88a03c484b..6785a027a5 100644
--- a/crates/vm/src/system/connector/mod.rs
+++ b/crates/vm/src/system/connector/mod.rs
@@ -15,9 +15,9 @@ use openvm_stark_backend::{
     p3_air::{Air, AirBuilder, AirBuilderWithPublicValues, BaseAir, PairBuilder},
     p3_field::{Field, FieldAlgebra, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
     rap::{BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    Chip, ChipUsageGetter,
 };
 use serde::{Deserialize, Serialize};
 
@@ -88,6 +88,26 @@ impl<F: Field> BaseAir<F> for VmConnectorAir {
 }
 
 impl VmConnectorAir {
+    pub fn new(
+        execution_bus: ExecutionBus,
+        program_bus: ProgramBus,
+        range_bus: VariableRangeCheckerBus,
+        timestamp_max_bits: usize,
+    ) -> Self {
+        assert!(
+            range_bus.range_max_bits * 2 >= timestamp_max_bits,
+            "Range checker not large enough: range_max_bits={}, timestamp_max_bits={}",
+            range_bus.range_max_bits,
+            timestamp_max_bits
+        );
+        Self {
+            execution_bus,
+            program_bus,
+            range_bus,
+            timestamp_max_bits,
+        }
+    }
+
     /// Returns (low_bits, high_bits) to range check.
     fn timestamp_limb_bits(&self) -> (usize, usize) {
         let range_max_bits = self.range_bus.range_max_bits;
@@ -194,34 +214,25 @@ impl<AB: InteractionBuilder + PairBuilder + AirBuilderWithPublicValues> Air<AB>
 }
 
 pub struct VmConnectorChip<F> {
-    pub air: VmConnectorAir,
     pub range_checker: SharedVariableRangeCheckerChip,
     pub boundary_states: [Option<ConnectorCols<u32>>; 2],
+    timestamp_max_bits: usize,
     _marker: PhantomData<F>,
 }
 
-impl<F: PrimeField32> VmConnectorChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        range_checker: SharedVariableRangeCheckerChip,
-        timestamp_max_bits: usize,
-    ) -> Self {
+impl<F> VmConnectorChip<F> {
+    pub fn new(range_checker: SharedVariableRangeCheckerChip, timestamp_max_bits: usize) -> Self {
+        let range_bus = range_checker.bus();
         assert!(
-            range_checker.bus().range_max_bits * 2 >= timestamp_max_bits,
+            range_bus.range_max_bits * 2 >= timestamp_max_bits,
             "Range checker not large enough: range_max_bits={}, timestamp_max_bits={}",
-            range_checker.bus().range_max_bits,
+            range_bus.range_max_bits,
             timestamp_max_bits
         );
         Self {
-            air: VmConnectorAir {
-                execution_bus,
-                program_bus,
-                range_bus: range_checker.bus(),
-                timestamp_max_bits,
-            },
             range_checker,
             boundary_states: [None, None],
+            timestamp_max_bits,
             _marker: PhantomData,
         }
     }
@@ -245,25 +256,30 @@ impl<F: PrimeField32> VmConnectorChip<F> {
             timestamp_low_limb: 0, // will be computed during tracegen
         });
     }
+
+    fn timestamp_limb_bits(&self) -> (usize, usize) {
+        let range_max_bits = self.range_checker.bus().range_max_bits;
+        if self.timestamp_max_bits <= range_max_bits {
+            (self.timestamp_max_bits, 0)
+        } else {
+            (range_max_bits, self.timestamp_max_bits - range_max_bits)
+        }
+    }
 }
 
-impl<SC> Chip<SC> for VmConnectorChip<Val<SC>>
+impl<RA, SC> Chip<RA, CpuBackend<SC>> for VmConnectorChip<Val<SC>>
 where
     SC: StarkGenericConfig,
     Val<SC>: PrimeField32,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air)
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    fn generate_proving_ctx(&self, _: RA) -> AirProvingContext<CpuBackend<SC>> {
         let [initial_state, final_state] = self.boundary_states.map(|state| {
             let mut state = state.unwrap();
             // Decompose and range check timestamp
             let range_max_bits = self.range_checker.range_max_bits();
             let timestamp_low_limb = state.timestamp & ((1u32 << range_max_bits) - 1);
             state.timestamp_low_limb = timestamp_low_limb;
-            let (low_bits, high_bits) = self.air.timestamp_limb_bits();
+            let (low_bits, high_bits) = self.timestamp_limb_bits();
             self.range_checker.add_count(timestamp_low_limb, low_bits);
             self.range_checker
                 .add_count(state.timestamp >> range_max_bits, high_bits);
@@ -271,10 +287,10 @@ where
             state.map(Val::<SC>::from_canonical_u32)
         });
 
-        let trace = RowMajorMatrix::new(
+        let trace = Arc::new(RowMajorMatrix::new(
             [initial_state.flatten(), final_state.flatten()].concat(),
             self.trace_width(),
-        );
+        ));
 
         let mut public_values = Val::<SC>::zero_vec(VmConnectorPvs::<Val<SC>>::width());
         *public_values.as_mut_slice().borrow_mut() = VmConnectorPvs {
@@ -283,7 +299,7 @@ where
             exit_code: final_state.exit_code,
             is_terminate: final_state.is_terminate,
         };
-        AirProofInput::simple(trace, public_values)
+        AirProvingContext::simple(trace, public_values)
     }
 }
 
diff --git a/crates/vm/src/system/connector/tests.rs b/crates/vm/src/system/connector/tests.rs
index f3ded1812c..232367ed8b 100644
--- a/crates/vm/src/system/connector/tests.rs
+++ b/crates/vm/src/system/connector/tests.rs
@@ -7,8 +7,10 @@ use openvm_instructions::{
     instruction::Instruction, program::Program, LocalOpcode, SystemOpcode::TERMINATE,
 };
 use openvm_stark_backend::{
-    config::StarkGenericConfig, engine::StarkEngine, p3_field::FieldAlgebra,
-    prover::types::AirProofInput, utils::disable_debug_builder,
+    config::StarkGenericConfig,
+    engine::StarkEngine,
+    p3_field::FieldAlgebra,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
 };
 use openvm_stark_sdk::{
     config::{
@@ -21,16 +23,25 @@ use openvm_stark_sdk::{
 
 use super::VmConnectorPvs;
 use crate::{
-    arch::{SingleSegmentVmExecutor, SystemConfig, VirtualMachine, CONNECTOR_AIR_ID},
-    system::program::trace::VmCommittedExe,
+    arch::{
+        PreflightExecutionOutput, Streams, SystemConfig, VirtualMachine, VmState, CONNECTOR_AIR_ID,
+    },
+    system::{
+        memory::{online::GuestMemory, AddressMap},
+        program::trace::VmCommittedExe,
+        SystemCpuBuilder,
+    },
 };
 
 type F = BabyBear;
+type SC = BabyBearPoseidon2Config;
+type PB = CpuBackend<SC>;
+
 #[test]
 fn test_vm_connector_happy_path() {
     let exit_code = 1789;
-    test_impl(true, exit_code, |air_proof_input| {
-        let pvs: &VmConnectorPvs<F> = air_proof_input.raw.public_values.as_slice().borrow();
+    test_impl(true, exit_code, |air_ctx| {
+        let pvs: &VmConnectorPvs<F> = air_ctx.public_values.as_slice().borrow();
         assert_eq!(pvs.is_terminate, F::ONE);
         assert_eq!(pvs.exit_code, F::from_canonical_u32(exit_code));
     });
@@ -39,12 +50,8 @@ fn test_vm_connector_happy_path() {
 #[test]
 fn test_vm_connector_wrong_exit_code() {
     let exit_code = 1789;
-    test_impl(false, exit_code, |air_proof_input| {
-        let pvs: &mut VmConnectorPvs<F> = air_proof_input
-            .raw
-            .public_values
-            .as_mut_slice()
-            .borrow_mut();
+    test_impl(false, exit_code, |air_ctx| {
+        let pvs: &mut VmConnectorPvs<F> = air_ctx.public_values.as_mut_slice().borrow_mut();
         pvs.exit_code = F::from_canonical_u32(exit_code + 1);
     });
 }
@@ -52,57 +59,67 @@ fn test_vm_connector_wrong_exit_code() {
 #[test]
 fn test_vm_connector_wrong_is_terminate() {
     let exit_code = 1789;
-    test_impl(false, exit_code, |air_proof_input| {
-        let pvs: &mut VmConnectorPvs<F> = air_proof_input
-            .raw
-            .public_values
-            .as_mut_slice()
-            .borrow_mut();
+    test_impl(false, exit_code, |air_ctx| {
+        let pvs: &mut VmConnectorPvs<F> = air_ctx.public_values.as_mut_slice().borrow_mut();
         pvs.is_terminate = F::ZERO;
     });
 }
 
-fn test_impl(
-    should_pass: bool,
-    exit_code: u32,
-    f: impl FnOnce(&mut AirProofInput<BabyBearPoseidon2Config>),
-) {
-    let vm_config = SystemConfig::default();
-    let engine = BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(3));
-    let vm = VirtualMachine::new(engine, vm_config.clone());
-    let pk = vm.keygen();
+fn test_impl(should_pass: bool, exit_code: u32, f: impl FnOnce(&mut AirProvingContext<PB>)) {
+    let vm_config = SystemConfig::default().without_continuations();
+    let engine = BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(1));
+    let (mut vm, pk) =
+        VirtualMachine::new_with_keygen(engine, SystemCpuBuilder, vm_config.clone()).unwrap();
+    let vk = pk.get_vk();
 
-    {
-        let instructions = vec![Instruction::from_isize(
-            TERMINATE.global_opcode(),
-            0,
-            0,
-            exit_code as isize,
-            0,
-            0,
-        )];
+    let instructions = vec![Instruction::<F>::from_isize(
+        TERMINATE.global_opcode(),
+        0,
+        0,
+        exit_code as isize,
+        0,
+        0,
+    )];
 
-        let program = Program::from_instructions(&instructions);
-        let committed_exe = Arc::new(VmCommittedExe::commit(
-            program.into(),
-            vm.engine.config.pcs(),
-        ));
-        let single_vm = SingleSegmentVmExecutor::new(vm_config);
-        let mut proof_input = single_vm
-            .execute_and_generate(committed_exe, vec![])
-            .unwrap();
-        let connector_air_input = proof_input
-            .per_air
-            .iter_mut()
-            .find(|(air_id, _)| *air_id == CONNECTOR_AIR_ID);
-        f(&mut connector_air_input.unwrap().1);
-        if should_pass {
-            vm.engine
-                .prove_then_verify(&pk, proof_input)
-                .expect("Verification failed");
-        } else {
-            disable_debug_builder();
-            assert!(vm.engine.prove_then_verify(&pk, proof_input).is_err());
-        }
+    let program = Program::from_instructions(&instructions);
+    let committed_exe = Arc::new(VmCommittedExe::<SC>::commit(
+        program.into(),
+        vm.engine.config().pcs(),
+    ));
+    let max_trace_heights = vec![0; vk.total_widths().len()];
+    let memory = GuestMemory::new(AddressMap::from_mem_config(&vm_config.memory_config));
+    vm.transport_init_memory_to_device(&memory);
+    vm.load_program(committed_exe.get_committed_trace());
+    let from_state = VmState::new(
+        0,
+        0,
+        memory,
+        Streams::default(),
+        0,
+        vm_config.num_public_values,
+    );
+    let mut interpreter = vm.preflight_interpreter(&committed_exe.exe).unwrap();
+    let PreflightExecutionOutput {
+        system_records,
+        record_arenas,
+        ..
+    } = vm
+        .execute_preflight(&mut interpreter, from_state, None, &max_trace_heights)
+        .unwrap();
+    let mut ctx = vm
+        .generate_proving_ctx(system_records, record_arenas)
+        .unwrap();
+    let connector_air_ctx = &mut ctx
+        .per_air
+        .iter_mut()
+        .find(|(air_id, _)| *air_id == CONNECTOR_AIR_ID)
+        .unwrap()
+        .1;
+    f(connector_air_ctx);
+    let proof = vm.engine.prove(vm.pk(), ctx);
+    if should_pass {
+        vm.engine.verify(&vk, &proof).expect("Verification failed");
+    } else {
+        assert!(vm.engine.verify(&vk, &proof).is_err());
     }
 }
diff --git a/crates/vm/src/system/memory/adapter/mod.rs b/crates/vm/src/system/memory/adapter/mod.rs
index 64e79a920b..46df5d968e 100644
--- a/crates/vm/src/system/memory/adapter/mod.rs
+++ b/crates/vm/src/system/memory/adapter/mod.rs
@@ -1,145 +1,273 @@
-use std::{borrow::BorrowMut, cmp::max, sync::Arc};
+use std::{
+    borrow::{Borrow, BorrowMut},
+    marker::PhantomData,
+    ptr::copy_nonoverlapping,
+    sync::Arc,
+};
 
 pub use air::*;
 pub use columns::*;
 use enum_dispatch::enum_dispatch;
+use getset::Setters;
 use openvm_circuit_primitives::{
     is_less_than::IsLtSubAir, utils::next_power_of_two_or_zero,
     var_range::SharedVariableRangeCheckerChip, TraceSubRowGenerator,
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
 use openvm_stark_backend::{
-    config::{Domain, StarkGenericConfig, Val},
+    config::{Domain, StarkGenericConfig},
     p3_air::BaseAir,
     p3_commit::PolynomialSpace,
     p3_field::PrimeField32,
     p3_matrix::dense::RowMajorMatrix,
-    p3_maybe_rayon::prelude::*,
     p3_util::log2_strict_usize,
-    prover::types::AirProofInput,
-    AirRef, Chip, ChipUsageGetter,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
 };
 
-use crate::system::memory::{offline_checker::MemoryBus, MemoryAddress};
+use crate::{
+    arch::{
+        AddressSpaceHostConfig, AddressSpaceHostLayout, CustomBorrow, DenseRecordArena,
+        MemoryCellType, MemoryConfig, SizedRecord,
+    },
+    system::memory::{
+        adapter::records::{
+            arena_size_bound, AccessLayout, AccessRecordHeader, AccessRecordMut,
+            MERGE_AND_NOT_SPLIT_FLAG,
+        },
+        offline_checker::MemoryBus,
+        MemoryAddress,
+    },
+};
 
 mod air;
 mod columns;
-#[cfg(test)]
-mod tests;
+pub mod records;
 
+#[derive(Setters)]
 pub struct AccessAdapterInventory<F> {
+    pub(super) memory_config: MemoryConfig,
     chips: Vec<GenericAccessAdapterChip<F>>,
-    air_names: Vec<String>,
+    #[getset(set = "pub")]
+    arena: DenseRecordArena,
+    #[cfg(feature = "metrics")]
+    pub(crate) trace_heights: Vec<usize>,
 }
 
-impl<F> AccessAdapterInventory<F> {
+impl<F: Clone + Send + Sync> AccessAdapterInventory<F> {
     pub fn new(
         range_checker: SharedVariableRangeCheckerChip,
         memory_bus: MemoryBus,
-        clk_max_bits: usize,
-        max_access_adapter_n: usize,
+        memory_config: MemoryConfig,
     ) -> Self {
         let rc = range_checker;
         let mb = memory_bus;
-        let cmb = clk_max_bits;
-        let maan = max_access_adapter_n;
+        let tmb = memory_config.timestamp_max_bits;
+        let maan = memory_config.max_access_adapter_n;
         assert!(matches!(maan, 2 | 4 | 8 | 16 | 32));
         let chips: Vec<_> = [
-            Self::create_access_adapter_chip::<2>(rc.clone(), mb, cmb, maan),
-            Self::create_access_adapter_chip::<4>(rc.clone(), mb, cmb, maan),
-            Self::create_access_adapter_chip::<8>(rc.clone(), mb, cmb, maan),
-            Self::create_access_adapter_chip::<16>(rc.clone(), mb, cmb, maan),
-            Self::create_access_adapter_chip::<32>(rc.clone(), mb, cmb, maan),
+            Self::create_access_adapter_chip::<2>(rc.clone(), mb, tmb, maan),
+            Self::create_access_adapter_chip::<4>(rc.clone(), mb, tmb, maan),
+            Self::create_access_adapter_chip::<8>(rc.clone(), mb, tmb, maan),
+            Self::create_access_adapter_chip::<16>(rc.clone(), mb, tmb, maan),
+            Self::create_access_adapter_chip::<32>(rc.clone(), mb, tmb, maan),
         ]
         .into_iter()
         .flatten()
         .collect();
-        let air_names = (0..chips.len()).map(|i| air_name(1 << (i + 1))).collect();
-        Self { chips, air_names }
+        Self {
+            memory_config,
+            chips,
+            arena: DenseRecordArena::with_byte_capacity(0),
+            #[cfg(feature = "metrics")]
+            trace_heights: Vec::new(),
+        }
     }
+
     pub fn num_access_adapters(&self) -> usize {
         self.chips.len()
     }
-    pub fn set_override_trace_heights(&mut self, overridden_heights: Vec<usize>) {
-        assert_eq!(overridden_heights.len(), self.chips.len());
-        for (chip, oh) in self.chips.iter_mut().zip(overridden_heights) {
-            chip.set_override_trace_heights(oh);
-        }
-    }
-    pub fn add_record(&mut self, record: AccessAdapterRecord<F>) {
-        let n = record.data.len();
-        let idx = log2_strict_usize(n) - 1;
-        let chip = &mut self.chips[idx];
-        debug_assert!(chip.n() == n);
-        chip.add_record(record);
-    }
 
-    pub fn extend_records(&mut self, records: Vec<AccessAdapterRecord<F>>) {
-        for record in records {
-            self.add_record(record);
+    pub(super) fn set_override_trace_heights(&mut self, overridden_heights: Vec<usize>) {
+        self.set_arena_from_trace_heights(
+            &overridden_heights
+                .iter()
+                .map(|&h| h as u32)
+                .collect::<Vec<_>>(),
+        );
+        for (chip, oh) in self.chips.iter_mut().zip(overridden_heights) {
+            chip.set_override_trace_height(oh);
         }
     }
 
-    #[cfg(test)]
-    pub fn records_for_n(&self, n: usize) -> &[AccessAdapterRecord<F>] {
-        let idx = log2_strict_usize(n) - 1;
-        let chip = &self.chips[idx];
-        chip.records()
-    }
-
-    #[cfg(test)]
-    pub fn total_records(&self) -> usize {
-        self.chips.iter().map(|chip| chip.records().len()).sum()
+    pub(super) fn set_arena_from_trace_heights(&mut self, trace_heights: &[u32]) {
+        assert_eq!(trace_heights.len(), self.chips.len());
+        let size_bound = arena_size_bound(trace_heights);
+        tracing::debug!(
+            "Allocating {} bytes for memory adapters arena from heights {:?}",
+            size_bound,
+            trace_heights
+        );
+        self.arena.set_byte_capacity(size_bound);
     }
 
-    pub fn get_heights(&self) -> Vec<usize> {
-        self.chips
-            .iter()
-            .map(|chip| chip.current_trace_height())
-            .collect()
-    }
-    #[allow(dead_code)]
     pub fn get_widths(&self) -> Vec<usize> {
-        self.chips.iter().map(|chip| chip.trace_width()).collect()
-    }
-    pub fn get_cells(&self) -> Vec<usize> {
         self.chips
             .iter()
-            .map(|chip| chip.current_trace_cells())
+            .map(|chip: &GenericAccessAdapterChip<F>| chip.trace_width())
             .collect()
     }
-    pub fn airs<SC: StarkGenericConfig>(&self) -> Vec<AirRef<SC>>
-    where
-        F: PrimeField32,
-        Domain<SC>: PolynomialSpace<Val = F>,
-    {
-        self.chips.iter().map(|chip| chip.air()).collect()
+
+    /// `heights` should have length equal to the number of access adapter chips.
+    pub(crate) fn compute_heights_from_arena(arena: &DenseRecordArena, heights: &mut [usize]) {
+        let bytes = arena.allocated();
+        tracing::debug!(
+            "Computing heights from memory adapters arena: used {} bytes",
+            bytes.len()
+        );
+        let mut ptr = 0;
+        while ptr < bytes.len() {
+            let header: &AccessRecordHeader = bytes[ptr..].borrow();
+            let layout: AccessLayout = unsafe { bytes[ptr..].extract_layout() };
+            ptr += <AccessRecordMut<'_> as SizedRecord<AccessLayout>>::size(&layout);
+
+            let log_max_block_size = log2_strict_usize(header.block_size as usize);
+            for (i, h) in heights
+                .iter_mut()
+                .enumerate()
+                .take(log_max_block_size)
+                .skip(log2_strict_usize(header.lowest_block_size as usize))
+            {
+                *h += 1 << (log_max_block_size - i - 1);
+            }
+        }
+        tracing::debug!("Computed heights from memory adapters arena: {:?}", heights);
     }
-    pub fn air_names(&self) -> Vec<String> {
-        self.air_names.clone()
+
+    fn apply_overridden_heights(&mut self, heights: &mut [usize]) {
+        for (i, h) in heights.iter_mut().enumerate() {
+            if let Some(oh) = self.chips[i].overridden_trace_height() {
+                assert!(
+                    oh >= *h,
+                    "Overridden height {oh} is less than the required height {}",
+                    *h
+                );
+                *h = oh;
+            }
+            *h = next_power_of_two_or_zero(*h);
+        }
     }
-    pub fn generate_air_proof_inputs<SC: StarkGenericConfig>(self) -> Vec<AirProofInput<SC>>
+
+    pub fn generate_proving_ctx<SC: StarkGenericConfig>(
+        &mut self,
+    ) -> Vec<AirProvingContext<CpuBackend<SC>>>
     where
         F: PrimeField32,
         Domain<SC>: PolynomialSpace<Val = F>,
     {
-        self.chips
+        let num_adapters = self.chips.len();
+
+        let mut heights = vec![0; num_adapters];
+        Self::compute_heights_from_arena(&self.arena, &mut heights);
+        self.apply_overridden_heights(&mut heights);
+
+        let widths = self
+            .chips
+            .iter()
+            .map(|chip| chip.trace_width())
+            .collect::<Vec<_>>();
+        let mut traces = widths
+            .iter()
+            .zip(heights.iter())
+            .map(|(&width, &height)| RowMajorMatrix::new(vec![F::ZERO; width * height], width))
+            .collect::<Vec<_>>();
+        #[cfg(feature = "metrics")]
+        {
+            self.trace_heights = heights;
+        }
+
+        let mut trace_ptrs = vec![0; num_adapters];
+
+        let bytes = self.arena.allocated_mut();
+        let mut ptr = 0;
+        while ptr < bytes.len() {
+            let layout: AccessLayout = unsafe { bytes[ptr..].extract_layout() };
+            let record: AccessRecordMut<'_> = bytes[ptr..].custom_borrow(layout.clone());
+            ptr += <AccessRecordMut<'_> as SizedRecord<AccessLayout>>::size(&layout);
+
+            let log_min_block_size = log2_strict_usize(record.header.lowest_block_size as usize);
+            let log_max_block_size = log2_strict_usize(record.header.block_size as usize);
+
+            if record.header.timestamp_and_mask & MERGE_AND_NOT_SPLIT_FLAG != 0 {
+                for i in log_min_block_size..log_max_block_size {
+                    let data_len = layout.type_size << i;
+                    let ts_len = 1 << (i - log_min_block_size);
+                    for j in 0..record.data.len() / (2 * data_len) {
+                        let row_slice =
+                            &mut traces[i].values[trace_ptrs[i]..trace_ptrs[i] + widths[i]];
+                        trace_ptrs[i] += widths[i];
+                        self.chips[i].fill_trace_row(
+                            &self.memory_config.addr_spaces,
+                            row_slice,
+                            false,
+                            MemoryAddress::new(
+                                record.header.address_space,
+                                record.header.pointer + (j << (i + 1)) as u32,
+                            ),
+                            &record.data[j * 2 * data_len..(j + 1) * 2 * data_len],
+                            *record.timestamps[2 * j * ts_len..(2 * j + 1) * ts_len]
+                                .iter()
+                                .max()
+                                .unwrap(),
+                            *record.timestamps[(2 * j + 1) * ts_len..(2 * j + 2) * ts_len]
+                                .iter()
+                                .max()
+                                .unwrap(),
+                        );
+                    }
+                }
+            } else {
+                let timestamp = record.header.timestamp_and_mask;
+                for i in log_min_block_size..log_max_block_size {
+                    let data_len = layout.type_size << i;
+                    for j in 0..record.data.len() / (2 * data_len) {
+                        let row_slice =
+                            &mut traces[i].values[trace_ptrs[i]..trace_ptrs[i] + widths[i]];
+                        trace_ptrs[i] += widths[i];
+                        self.chips[i].fill_trace_row(
+                            &self.memory_config.addr_spaces,
+                            row_slice,
+                            true,
+                            MemoryAddress::new(
+                                record.header.address_space,
+                                record.header.pointer + (j << (i + 1)) as u32,
+                            ),
+                            &record.data[j * 2 * data_len..(j + 1) * 2 * data_len],
+                            timestamp,
+                            timestamp,
+                        );
+                    }
+                }
+            }
+        }
+        traces
             .into_iter()
-            .map(|chip| chip.generate_air_proof_input())
+            .map(|trace| AirProvingContext::simple_no_pis(Arc::new(trace)))
             .collect()
     }
 
     fn create_access_adapter_chip<const N: usize>(
         range_checker: SharedVariableRangeCheckerChip,
         memory_bus: MemoryBus,
-        clk_max_bits: usize,
+        timestamp_max_bits: usize,
         max_access_adapter_n: usize,
-    ) -> Option<GenericAccessAdapterChip<F>> {
+    ) -> Option<GenericAccessAdapterChip<F>>
+    where
+        F: Clone + Send + Sync,
+    {
         if N <= max_access_adapter_n {
             Some(GenericAccessAdapterChip::new::<N>(
                 range_checker,
                 memory_bus,
-                clk_max_bits,
+                timestamp_max_bits,
             ))
         } else {
             None
@@ -147,37 +275,27 @@ impl<F> AccessAdapterInventory<F> {
     }
 }
 
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub enum AccessAdapterRecordKind {
-    Split,
-    Merge {
+#[enum_dispatch]
+pub(crate) trait GenericAccessAdapterChipTrait<F> {
+    fn trace_width(&self) -> usize;
+    fn set_override_trace_height(&mut self, overridden_height: usize);
+    fn overridden_trace_height(&self) -> Option<usize>;
+
+    #[allow(clippy::too_many_arguments)]
+    fn fill_trace_row(
+        &self,
+        addr_spaces: &[AddressSpaceHostConfig],
+        row: &mut [F],
+        is_split: bool,
+        address: MemoryAddress<u32, u32>,
+        values: &[u8],
         left_timestamp: u32,
         right_timestamp: u32,
-    },
-}
-
-#[derive(Debug, Clone, PartialEq, Eq)]
-pub struct AccessAdapterRecord<T> {
-    pub timestamp: u32,
-    pub address_space: T,
-    pub start_index: T,
-    pub data: Vec<T>,
-    pub kind: AccessAdapterRecordKind,
-}
-
-#[enum_dispatch]
-pub trait GenericAccessAdapterChipTrait<F> {
-    fn set_override_trace_heights(&mut self, overridden_height: usize);
-    fn add_record(&mut self, record: AccessAdapterRecord<F>);
-    fn n(&self) -> usize;
-    fn generate_trace(self) -> RowMajorMatrix<F>
-    where
+    ) where
         F: PrimeField32;
 }
 
-#[derive(Chip, ChipUsageGetter)]
 #[enum_dispatch(GenericAccessAdapterChipTrait<F>)]
-#[chip(where = "F: PrimeField32")]
 enum GenericAccessAdapterChip<F> {
     N2(AccessAdapterChip<F, 2>),
     N4(AccessAdapterChip<F, 4>),
@@ -186,15 +304,15 @@ enum GenericAccessAdapterChip<F> {
     N32(AccessAdapterChip<F, 32>),
 }
 
-impl<F> GenericAccessAdapterChip<F> {
+impl<F: Clone + Send + Sync> GenericAccessAdapterChip<F> {
     fn new<const N: usize>(
         range_checker: SharedVariableRangeCheckerChip,
         memory_bus: MemoryBus,
-        clk_max_bits: usize,
+        timestamp_max_bits: usize,
     ) -> Self {
         let rc = range_checker;
         let mb = memory_bus;
-        let cmb = clk_max_bits;
+        let cmb = timestamp_max_bits;
         match N {
             2 => GenericAccessAdapterChip::N2(AccessAdapterChip::new(rc, mb, cmb)),
             4 => GenericAccessAdapterChip::N4(AccessAdapterChip::new(rc, mb, cmb)),
@@ -204,127 +322,89 @@ impl<F> GenericAccessAdapterChip<F> {
             _ => panic!("Only supports N in (2, 4, 8, 16, 32)"),
         }
     }
-
-    #[cfg(test)]
-    fn records(&self) -> &[AccessAdapterRecord<F>] {
-        match &self {
-            GenericAccessAdapterChip::N2(chip) => &chip.records,
-            GenericAccessAdapterChip::N4(chip) => &chip.records,
-            GenericAccessAdapterChip::N8(chip) => &chip.records,
-            GenericAccessAdapterChip::N16(chip) => &chip.records,
-            GenericAccessAdapterChip::N32(chip) => &chip.records,
-        }
-    }
 }
-pub struct AccessAdapterChip<F, const N: usize> {
+
+pub(crate) struct AccessAdapterChip<F, const N: usize> {
     air: AccessAdapterAir<N>,
     range_checker: SharedVariableRangeCheckerChip,
-    pub records: Vec<AccessAdapterRecord<F>>,
     overridden_height: Option<usize>,
+    _marker: PhantomData<F>,
 }
-impl<F, const N: usize> AccessAdapterChip<F, N> {
+
+impl<F: Clone + Send + Sync, const N: usize> AccessAdapterChip<F, N> {
     pub fn new(
         range_checker: SharedVariableRangeCheckerChip,
         memory_bus: MemoryBus,
-        clk_max_bits: usize,
+        timestamp_max_bits: usize,
     ) -> Self {
-        let lt_air = IsLtSubAir::new(range_checker.bus(), clk_max_bits);
+        let lt_air = IsLtSubAir::new(range_checker.bus(), timestamp_max_bits);
         Self {
             air: AccessAdapterAir::<N> { memory_bus, lt_air },
             range_checker,
-            records: vec![],
             overridden_height: None,
+            _marker: PhantomData,
         }
     }
 }
 impl<F, const N: usize> GenericAccessAdapterChipTrait<F> for AccessAdapterChip<F, N> {
-    fn set_override_trace_heights(&mut self, overridden_height: usize) {
-        self.overridden_height = Some(overridden_height);
-    }
-    fn add_record(&mut self, record: AccessAdapterRecord<F>) {
-        self.records.push(record);
-    }
-    fn n(&self) -> usize {
-        N
-    }
-    fn generate_trace(self) -> RowMajorMatrix<F>
-    where
-        F: PrimeField32,
-    {
-        let width = BaseAir::<F>::width(&self.air);
-        let height = if let Some(oh) = self.overridden_height {
-            assert!(
-                oh >= self.records.len(),
-                "Overridden height is less than the required height"
-            );
-            oh
-        } else {
-            self.records.len()
-        };
-        let height = next_power_of_two_or_zero(height);
-        let mut values = F::zero_vec(height * width);
-
-        values
-            .par_chunks_mut(width)
-            .zip(self.records.into_par_iter())
-            .for_each(|(row, record)| {
-                let row: &mut AccessAdapterCols<F, N> = row.borrow_mut();
-
-                row.is_valid = F::ONE;
-                row.values = record.data.try_into().unwrap();
-                row.address = MemoryAddress::new(record.address_space, record.start_index);
-
-                let (left_timestamp, right_timestamp) = match record.kind {
-                    AccessAdapterRecordKind::Split => (record.timestamp, record.timestamp),
-                    AccessAdapterRecordKind::Merge {
-                        left_timestamp,
-                        right_timestamp,
-                    } => (left_timestamp, right_timestamp),
-                };
-                debug_assert_eq!(max(left_timestamp, right_timestamp), record.timestamp);
-
-                row.left_timestamp = F::from_canonical_u32(left_timestamp);
-                row.right_timestamp = F::from_canonical_u32(right_timestamp);
-                row.is_split = F::from_bool(record.kind == AccessAdapterRecordKind::Split);
-
-                self.air.lt_air.generate_subrow(
-                    (self.range_checker.as_ref(), left_timestamp, right_timestamp),
-                    (&mut row.lt_aux, &mut row.is_right_larger),
-                );
-            });
-        RowMajorMatrix::new(values, width)
-    }
-}
-
-impl<SC: StarkGenericConfig, const N: usize> Chip<SC> for AccessAdapterChip<Val<SC>, N>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let trace = self.generate_trace();
-        AirProofInput::simple_no_pis(trace)
+    fn trace_width(&self) -> usize {
+        BaseAir::<F>::width(&self.air)
     }
-}
 
-impl<F, const N: usize> ChipUsageGetter for AccessAdapterChip<F, N> {
-    fn air_name(&self) -> String {
-        air_name(N)
+    fn set_override_trace_height(&mut self, overridden_height: usize) {
+        self.overridden_height = Some(overridden_height);
     }
 
-    fn current_trace_height(&self) -> usize {
-        self.records.len()
+    fn overridden_trace_height(&self) -> Option<usize> {
+        self.overridden_height
     }
 
-    fn trace_width(&self) -> usize {
-        BaseAir::<F>::width(&self.air)
+    fn fill_trace_row(
+        &self,
+        addr_spaces: &[AddressSpaceHostConfig],
+        row: &mut [F],
+        is_split: bool,
+        address: MemoryAddress<u32, u32>,
+        values: &[u8],
+        left_timestamp: u32,
+        right_timestamp: u32,
+    ) where
+        F: PrimeField32,
+    {
+        let row: &mut AccessAdapterCols<F, N> = row.borrow_mut();
+        row.is_valid = F::ONE;
+        row.is_split = F::from_bool(is_split);
+        row.address = MemoryAddress::new(
+            F::from_canonical_u32(address.address_space),
+            F::from_canonical_u32(address.pointer),
+        );
+        let addr_space_layout = addr_spaces[address.address_space as usize].layout;
+        // SAFETY: values will be a slice of the cell type
+        unsafe {
+            match addr_space_layout {
+                MemoryCellType::Native { .. } => {
+                    copy_nonoverlapping(
+                        values.as_ptr(),
+                        row.values.as_mut_ptr() as *mut u8,
+                        N * size_of::<F>(),
+                    );
+                }
+                _ => {
+                    for (dst, src) in row
+                        .values
+                        .iter_mut()
+                        .zip(values.chunks_exact(addr_space_layout.size()))
+                    {
+                        *dst = addr_space_layout.to_field(src);
+                    }
+                }
+            }
+        }
+        row.left_timestamp = F::from_canonical_u32(left_timestamp);
+        row.right_timestamp = F::from_canonical_u32(right_timestamp);
+        self.air.lt_air.generate_subrow(
+            (self.range_checker.as_ref(), left_timestamp, right_timestamp),
+            (&mut row.lt_aux, &mut row.is_right_larger),
+        );
     }
 }
-
-#[inline]
-fn air_name(n: usize) -> String {
-    format!("AccessAdapter<{}>", n)
-}
diff --git a/crates/vm/src/system/memory/adapter/records.rs b/crates/vm/src/system/memory/adapter/records.rs
new file mode 100644
index 0000000000..2a82bc51a4
--- /dev/null
+++ b/crates/vm/src/system/memory/adapter/records.rs
@@ -0,0 +1,144 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::{align_of, size_of},
+};
+
+use openvm_circuit_primitives::AlignedBytesBorrow;
+
+use crate::arch::{CustomBorrow, DenseRecordArena, RecordArena, SizedRecord};
+
+#[repr(C)]
+#[derive(Debug, Clone, Copy, AlignedBytesBorrow, PartialEq, Eq, PartialOrd, Ord)]
+pub struct AccessRecordHeader {
+    /// Iff we need to merge before, this has the `MERGE_AND_NOT_SPLIT_FLAG` bit set
+    pub timestamp_and_mask: u32,
+    pub address_space: u32,
+    pub pointer: u32,
+    // PERF: these three are easily mergeable into a single u32
+    pub block_size: u32,
+    pub lowest_block_size: u32,
+    pub type_size: u32,
+}
+
+#[repr(C)]
+#[derive(Debug)]
+pub struct AccessRecordMut<'a> {
+    pub header: &'a mut AccessRecordHeader,
+    // PERF(AG): optimize with some `Option` serialization stuff
+    pub timestamps: &'a mut [u32], // len is block_size / lowest_block_size
+    pub data: &'a mut [u8],        // len is block_size * type_size
+}
+
+#[derive(Debug, Clone)]
+pub struct AccessLayout {
+    /// The size of the block in elements.
+    pub block_size: usize,
+    /// The size of the minimal block we may split into/merge from (usually 1 or 4)
+    pub lowest_block_size: usize,
+    /// The size of the type in bytes (1 for u8, 4 for F).
+    pub type_size: usize,
+}
+
+impl AccessLayout {
+    pub(crate) fn from_record_header(header: &AccessRecordHeader) -> Self {
+        Self {
+            block_size: header.block_size as usize,
+            lowest_block_size: header.lowest_block_size as usize,
+            type_size: header.type_size as usize,
+        }
+    }
+}
+
+pub(crate) const MERGE_AND_NOT_SPLIT_FLAG: u32 = 1 << 31;
+
+pub(crate) fn size_by_layout(layout: &AccessLayout) -> usize {
+    size_of::<AccessRecordHeader>() // header struct
+    + (layout.block_size / layout.lowest_block_size) * size_of::<u32>() // timestamps
+    + (layout.block_size * layout.type_size).next_multiple_of(4) // data
+}
+
+impl SizedRecord<AccessLayout> for AccessRecordMut<'_> {
+    fn size(layout: &AccessLayout) -> usize {
+        size_by_layout(layout)
+    }
+
+    fn alignment(_: &AccessLayout) -> usize {
+        align_of::<AccessRecordHeader>()
+    }
+}
+
+impl<'a> CustomBorrow<'a, AccessRecordMut<'a>, AccessLayout> for [u8] {
+    fn custom_borrow(&'a mut self, layout: AccessLayout) -> AccessRecordMut<'a> {
+        // header: AccessRecordHeader (using trivial borrowing)
+        let (header_buf, rest) =
+            unsafe { self.split_at_mut_unchecked(size_of::<AccessRecordHeader>()) };
+        let header = header_buf.borrow_mut();
+
+        let mut offset = 0;
+
+        // timestamps: [u32] (block_size / cell_size * 4 bytes)
+        let timestamps = unsafe {
+            std::slice::from_raw_parts_mut(
+                rest.as_mut_ptr().add(offset) as *mut u32,
+                layout.block_size / layout.lowest_block_size,
+            )
+        };
+        offset += layout.block_size / layout.lowest_block_size * size_of::<u32>();
+
+        // data: [u8] (block_size * type_size bytes)
+        let data = unsafe {
+            std::slice::from_raw_parts_mut(
+                rest.as_mut_ptr().add(offset),
+                layout.block_size * layout.type_size,
+            )
+        };
+
+        AccessRecordMut {
+            header,
+            data,
+            timestamps,
+        }
+    }
+
+    unsafe fn extract_layout(&self) -> AccessLayout {
+        let header: &AccessRecordHeader = self.borrow();
+        AccessLayout {
+            block_size: header.block_size as usize,
+            lowest_block_size: header.lowest_block_size as usize,
+            type_size: header.type_size as usize,
+        }
+    }
+}
+
+impl<'a> RecordArena<'a, AccessLayout, AccessRecordMut<'a>> for DenseRecordArena {
+    fn alloc(&'a mut self, layout: AccessLayout) -> AccessRecordMut<'a> {
+        let bytes = self.alloc_bytes(<AccessRecordMut<'a> as SizedRecord<AccessLayout>>::size(
+            &layout,
+        ));
+        <[u8] as CustomBorrow<AccessRecordMut<'a>, AccessLayout>>::custom_borrow(bytes, layout)
+    }
+}
+
+/// `trace_heights[i]` is assumed to correspond to `Adapter< 2^(i+1) >`.
+pub fn arena_size_bound(trace_heights: &[u32]) -> usize {
+    // At the very worst, each row in `Adapter<N>`
+    // corresponds to a unique record of `block_size` being `2 * N`,
+    // and its `lowest_block_size` is at least 1 and `type_size` is at most 4.
+    let size_bound = trace_heights
+        .iter()
+        .enumerate()
+        .map(|(i, &h)| {
+            size_by_layout(&AccessLayout {
+                block_size: 1 << (i + 1),
+                lowest_block_size: 1,
+                type_size: 4,
+            }) * h as usize
+        })
+        .sum::<usize>();
+    tracing::debug!(
+        "Allocating {} bytes for memory adapters arena from heights {:?}",
+        size_bound,
+        trace_heights
+    );
+    size_bound
+}
diff --git a/crates/vm/src/system/memory/adapter/tests.rs b/crates/vm/src/system/memory/adapter/tests.rs
deleted file mode 100644
index 8b13789179..0000000000
--- a/crates/vm/src/system/memory/adapter/tests.rs
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/crates/vm/src/system/memory/controller/dimensions.rs b/crates/vm/src/system/memory/controller/dimensions.rs
index 1082d3adf0..77345c2e82 100644
--- a/crates/vm/src/system/memory/controller/dimensions.rs
+++ b/crates/vm/src/system/memory/controller/dimensions.rs
@@ -2,23 +2,24 @@ use derive_new::new;
 use openvm_stark_backend::p3_util::log2_strict_usize;
 use serde::{Deserialize, Serialize};
 
-use crate::{arch::MemoryConfig, system::memory::CHUNK};
+use crate::{
+    arch::{MemoryConfig, ADDR_SPACE_OFFSET},
+    system::memory::CHUNK,
+};
 
-// indicates that there are 2^`as_height` address spaces numbered starting from `as_offset`,
+// indicates that there are 2^`addr_space_height` address spaces numbered starting from 1,
 // and that each address space has 2^`address_height` addresses numbered starting from 0
 #[derive(Clone, Copy, Debug, Serialize, Deserialize, new)]
 pub struct MemoryDimensions {
     /// Address space height
-    pub as_height: usize,
+    pub addr_space_height: usize,
     /// Pointer height
     pub address_height: usize,
-    /// Address space offset
-    pub as_offset: u32,
 }
 
 impl MemoryDimensions {
     pub fn overall_height(&self) -> usize {
-        self.as_height + self.address_height
+        self.addr_space_height + self.address_height
     }
     /// Convert an address label (address space, block id) to its index in the memory merkle tree.
     ///
@@ -27,17 +28,29 @@ impl MemoryDimensions {
     /// This function is primarily for internal use for accessing the memory merkle tree.
     /// Users should use a higher-level API when possible.
     pub fn label_to_index(&self, (addr_space, block_id): (u32, u32)) -> u64 {
-        debug_assert!(block_id < (1 << self.address_height));
-        (((addr_space - self.as_offset) as u64) << self.address_height) + block_id as u64
+        debug_assert!(
+            block_id < (1 << self.address_height),
+            "block_id={block_id} exceeds address_height={}",
+            self.address_height
+        );
+        (((addr_space - ADDR_SPACE_OFFSET) as u64) << self.address_height) + block_id as u64
+    }
+
+    /// Convert an index in the memory merkle tree to an address label (address space, block id).
+    ///
+    /// This function performs the inverse operation of `label_to_index`.
+    pub fn index_to_label(&self, index: u64) -> (u32, u32) {
+        let block_id = (index & ((1 << self.address_height) - 1)) as u32;
+        let addr_space = (index >> self.address_height) as u32 + ADDR_SPACE_OFFSET;
+        (addr_space, block_id)
     }
 }
 
 impl MemoryConfig {
     pub fn memory_dimensions(&self) -> MemoryDimensions {
         MemoryDimensions {
-            as_height: self.as_height,
+            addr_space_height: self.addr_space_height,
             address_height: self.pointer_max_bits - log2_strict_usize(CHUNK),
-            as_offset: self.as_offset,
         }
     }
 }
diff --git a/crates/vm/src/system/memory/controller/interface.rs b/crates/vm/src/system/memory/controller/interface.rs
index b51e960a32..ff0a0b64a9 100644
--- a/crates/vm/src/system/memory/controller/interface.rs
+++ b/crates/vm/src/system/memory/controller/interface.rs
@@ -1,10 +1,23 @@
 use openvm_stark_backend::{interaction::PermutationCheckBus, p3_field::PrimeField32};
 
 use crate::system::memory::{
-    merkle::MemoryMerkleChip, persistent::PersistentBoundaryChip, volatile::VolatileBoundaryChip,
+    merkle::{MemoryMerkleAir, MemoryMerkleChip},
+    persistent::{PersistentBoundaryAir, PersistentBoundaryChip},
+    volatile::{VolatileBoundaryAir, VolatileBoundaryChip},
     MemoryImage, CHUNK,
 };
 
+#[derive(Clone)]
+pub enum MemoryInterfaceAirs {
+    Volatile {
+        boundary: VolatileBoundaryAir,
+    },
+    Persistent {
+        boundary: PersistentBoundaryAir<CHUNK>,
+        merkle: MemoryMerkleAir<CHUNK>,
+    },
+}
+
 #[allow(clippy::large_enum_variant)]
 pub enum MemoryInterface<F> {
     Volatile {
@@ -13,25 +26,11 @@ pub enum MemoryInterface<F> {
     Persistent {
         boundary_chip: PersistentBoundaryChip<F, CHUNK>,
         merkle_chip: MemoryMerkleChip<CHUNK, F>,
-        initial_memory: MemoryImage<F>,
+        initial_memory: MemoryImage,
     },
 }
 
 impl<F: PrimeField32> MemoryInterface<F> {
-    pub fn touch_range(&mut self, addr_space: u32, pointer: u32, len: u32) {
-        match self {
-            MemoryInterface::Volatile { .. } => {}
-            MemoryInterface::Persistent {
-                boundary_chip,
-                merkle_chip,
-                ..
-            } => {
-                boundary_chip.touch_range(addr_space, pointer, len);
-                merkle_chip.touch_range(addr_space, pointer, len);
-            }
-        }
-    }
-
     pub fn compression_bus(&self) -> Option<PermutationCheckBus> {
         match self {
             MemoryInterface::Volatile { .. } => None,
diff --git a/crates/vm/src/system/memory/controller/mod.rs b/crates/vm/src/system/memory/controller/mod.rs
index 680a03ab8e..aabe4df08d 100644
--- a/crates/vm/src/system/memory/controller/mod.rs
+++ b/crates/vm/src/system/memory/controller/mod.rs
@@ -1,51 +1,40 @@
-use std::{
-    array,
-    collections::BTreeMap,
-    iter,
-    marker::PhantomData,
-    mem,
-    sync::{Arc, Mutex},
-};
+//! [MemoryController] can be considered as the Memory Chip Complex for the CPU Backend.
+use std::{collections::BTreeMap, fmt::Debug, marker::PhantomData, sync::Arc};
 
 use getset::{Getters, MutGetters};
 use openvm_circuit_primitives::{
     assert_less_than::{AssertLtSubAir, LessThanAuxCols},
-    is_zero::IsZeroSubAir,
-    utils::next_power_of_two_or_zero,
-    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+    var_range::{
+        SharedVariableRangeCheckerChip, VariableRangeCheckerBus, VariableRangeCheckerChip,
+    },
     TraceSubRowGenerator,
 };
 use openvm_stark_backend::{
     config::{Domain, StarkGenericConfig},
     interaction::PermutationCheckBus,
     p3_commit::PolynomialSpace,
-    p3_field::PrimeField32,
+    p3_field::{Field, PrimeField32},
     p3_maybe_rayon::prelude::{IntoParallelIterator, ParallelIterator},
     p3_util::{log2_ceil_usize, log2_strict_usize},
-    prover::types::AirProofInput,
-    AirRef, Chip, ChipUsageGetter,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
+    Chip,
 };
 use serde::{Deserialize, Serialize};
 
 use self::interface::MemoryInterface;
-use super::{
-    paged_vec::{AddressMap, PAGE_SIZE},
-    volatile::VolatileBoundaryChip,
-};
+use super::{volatile::VolatileBoundaryChip, AddressMap};
 use crate::{
-    arch::{hasher::HasherChip, MemoryConfig},
-    system::memory::{
-        adapter::AccessAdapterInventory,
-        dimensions::MemoryDimensions,
-        merkle::{MemoryMerkleChip, SerialReceiver},
-        offline::{MemoryRecord, OfflineMemory, INITIAL_TIMESTAMP},
-        offline_checker::{
-            MemoryBaseAuxCols, MemoryBridge, MemoryBus, MemoryReadAuxCols,
-            MemoryReadOrImmediateAuxCols, MemoryWriteAuxCols, AUX_LEN,
+    arch::{DenseRecordArena, MemoryConfig, ADDR_SPACE_OFFSET},
+    system::{
+        memory::{
+            adapter::AccessAdapterInventory,
+            dimensions::MemoryDimensions,
+            merkle::MemoryMerkleChip,
+            offline_checker::{MemoryBaseAuxCols, MemoryBridge, MemoryBus, AUX_LEN},
+            persistent::PersistentBoundaryChip,
         },
-        online::{Memory, MemoryLogEntry},
-        persistent::PersistentBoundaryChip,
-        tree::MemoryNode,
+        poseidon2::Poseidon2PeripheryChip,
+        TouchedMemory,
     },
 };
 
@@ -53,16 +42,13 @@ pub mod dimensions;
 pub mod interface;
 
 pub const CHUNK: usize = 8;
+
 /// The offset of the Merkle AIR in AIRs of MemoryController.
 pub const MERKLE_AIR_OFFSET: usize = 1;
 /// The offset of the boundary AIR in AIRs of MemoryController.
 pub const BOUNDARY_AIR_OFFSET: usize = 0;
 
-#[repr(C)]
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
-pub struct RecordId(pub usize);
-
-pub type MemoryImage<F> = AddressMap<F, PAGE_SIZE>;
+pub type MemoryImage = AddressMap;
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
@@ -71,14 +57,11 @@ pub struct TimestampedValues<T, const N: usize> {
     pub values: [T; N],
 }
 
-/// An equipartition of memory, with timestamps and values.
+/// A sorted equipartition of memory, with timestamps and values.
 ///
-/// The key is a pair `(address_space, label)`, where `label` is the index of the block in the
+/// The "key" is a pair `(address_space, label)`, where `label` is the index of the block in the
 /// partition. I.e., the starting address of the block is `(address_space, label * N)`.
-///
-/// If a key is not present in the map, then the block is uninitialized (and therefore zero).
-pub type TimestampedEquipartition<F, const N: usize> =
-    BTreeMap<(u32, u32), TimestampedValues<F, N>>;
+pub type TimestampedEquipartition<F, const N: usize> = Vec<((u32, u32), TimestampedValues<F, N>)>;
 
 /// An equipartition of memory values.
 ///
@@ -89,69 +72,14 @@ pub type TimestampedEquipartition<F, const N: usize> =
 pub type Equipartition<F, const N: usize> = BTreeMap<(u32, u32), [F; N]>;
 
 #[derive(Getters, MutGetters)]
-pub struct MemoryController<F> {
+pub struct MemoryController<F: Field> {
     pub memory_bus: MemoryBus,
     pub interface_chip: MemoryInterface<F>,
-    #[getset(get = "pub")]
-    pub(crate) mem_config: MemoryConfig,
     pub range_checker: SharedVariableRangeCheckerChip,
     // Store separately to avoid smart pointer reference each time
     range_checker_bus: VariableRangeCheckerBus,
-    // addr_space -> Memory data structure
-    memory: Memory<F>,
-    /// A reference to the `OfflineMemory`. Will be populated after `finalize()`.
-    offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    pub access_adapters: AccessAdapterInventory<F>,
-    // Filled during finalization.
-    final_state: Option<FinalState<F>>,
-}
-
-#[allow(clippy::large_enum_variant)]
-#[derive(Debug)]
-enum FinalState<F> {
-    Volatile(VolatileFinalState<F>),
-    #[allow(dead_code)]
-    Persistent(PersistentFinalState<F>),
-}
-#[derive(Debug, Default)]
-struct VolatileFinalState<F> {
-    _marker: PhantomData<F>,
-}
-#[allow(dead_code)]
-#[derive(Debug)]
-struct PersistentFinalState<F> {
-    final_memory: Equipartition<F, CHUNK>,
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
-pub enum MemoryTraceHeights {
-    Volatile(VolatileMemoryTraceHeights),
-    Persistent(PersistentMemoryTraceHeights),
-}
-
-impl MemoryTraceHeights {
-    fn flatten(&self) -> Vec<usize> {
-        match self {
-            MemoryTraceHeights::Volatile(oh) => oh.flatten(),
-            MemoryTraceHeights::Persistent(oh) => oh.flatten(),
-        }
-    }
-
-    /// Round all trace heights to the next power of two. This will round trace heights of 0 to 1.
-    pub fn round_to_next_power_of_two(&mut self) {
-        match self {
-            MemoryTraceHeights::Volatile(oh) => oh.round_to_next_power_of_two(),
-            MemoryTraceHeights::Persistent(oh) => oh.round_to_next_power_of_two(),
-        }
-    }
-
-    /// Round all trace heights to the next power of two, except 0 stays 0.
-    pub fn round_to_next_power_of_two_or_zero(&mut self) {
-        match self {
-            MemoryTraceHeights::Volatile(oh) => oh.round_to_next_power_of_two_or_zero(),
-            MemoryTraceHeights::Persistent(oh) => oh.round_to_next_power_of_two_or_zero(),
-        }
-    }
+    pub(crate) access_adapter_inventory: AccessAdapterInventory<F>,
+    pub(crate) hasher_chip: Option<Arc<Poseidon2PeripheryChip<F>>>,
 }
 
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
@@ -161,24 +89,14 @@ pub struct VolatileMemoryTraceHeights {
 }
 
 impl VolatileMemoryTraceHeights {
-    pub fn flatten(&self) -> Vec<usize> {
-        iter::once(self.boundary)
-            .chain(self.access_adapters.iter().copied())
-            .collect()
-    }
-
-    fn round_to_next_power_of_two(&mut self) {
-        self.boundary = self.boundary.next_power_of_two();
-        self.access_adapters
-            .iter_mut()
-            .for_each(|v| *v = v.next_power_of_two());
-    }
-
-    fn round_to_next_power_of_two_or_zero(&mut self) {
-        self.boundary = next_power_of_two_or_zero(self.boundary);
-        self.access_adapters
-            .iter_mut()
-            .for_each(|v| *v = next_power_of_two_or_zero(*v));
+    /// `heights` must consist of only memory trace heights, in order of AIR IDs.
+    pub fn from_slice(heights: &[u32]) -> Self {
+        let boundary = heights[0] as usize;
+        let access_adapters = heights[1..].iter().map(|&h| h as usize).collect();
+        Self {
+            boundary,
+            access_adapters,
+        }
     }
 }
 
@@ -189,32 +107,21 @@ pub struct PersistentMemoryTraceHeights {
     access_adapters: Vec<usize>,
 }
 impl PersistentMemoryTraceHeights {
-    pub fn flatten(&self) -> Vec<usize> {
-        vec![self.boundary, self.merkle]
-            .into_iter()
-            .chain(self.access_adapters.iter().copied())
-            .collect()
-    }
-
-    fn round_to_next_power_of_two(&mut self) {
-        self.boundary = self.boundary.next_power_of_two();
-        self.merkle = self.merkle.next_power_of_two();
-        self.access_adapters
-            .iter_mut()
-            .for_each(|v| *v = v.next_power_of_two());
-    }
-
-    fn round_to_next_power_of_two_or_zero(&mut self) {
-        self.boundary = next_power_of_two_or_zero(self.boundary);
-        self.merkle = next_power_of_two_or_zero(self.merkle);
-        self.access_adapters
-            .iter_mut()
-            .for_each(|v| *v = next_power_of_two_or_zero(*v));
+    /// `heights` must consist of only memory trace heights, in order of AIR IDs.
+    pub fn from_slice(heights: &[u32]) -> Self {
+        let boundary = heights[0] as usize;
+        let merkle = heights[1] as usize;
+        let access_adapters = heights[2..].iter().map(|&h| h as usize).collect();
+        Self {
+            boundary,
+            merkle,
+            access_adapters,
+        }
     }
 }
 
 impl<F: PrimeField32> MemoryController<F> {
-    pub fn continuation_enabled(&self) -> bool {
+    pub(crate) fn continuation_enabled(&self) -> bool {
         match &self.interface_chip {
             MemoryInterface::Volatile { .. } => false,
             MemoryInterface::Persistent { .. } => true,
@@ -226,15 +133,17 @@ impl<F: PrimeField32> MemoryController<F> {
         range_checker: SharedVariableRangeCheckerChip,
     ) -> Self {
         let range_checker_bus = range_checker.bus();
-        let initial_memory = AddressMap::from_mem_config(&mem_config);
         assert!(mem_config.pointer_max_bits <= F::bits() - 2);
-        assert!(mem_config.as_height < F::bits() - 2);
+        assert!(mem_config
+            .addr_spaces
+            .iter()
+            .all(|&space| space.num_cells <= (1 << mem_config.pointer_max_bits)));
+        assert!(mem_config.addr_space_height < F::bits() - 2);
         let addr_space_max_bits = log2_ceil_usize(
-            (mem_config.as_offset + 2u32.pow(mem_config.as_height as u32)) as usize,
+            (ADDR_SPACE_OFFSET + 2u32.pow(mem_config.addr_space_height as u32)) as usize,
         );
         Self {
             memory_bus,
-            mem_config,
             interface_chip: MemoryInterface::Volatile {
                 boundary_chip: VolatileBoundaryChip::new(
                     memory_bus,
@@ -243,23 +152,14 @@ impl<F: PrimeField32> MemoryController<F> {
                     range_checker.clone(),
                 ),
             },
-            memory: Memory::new(&mem_config),
-            offline_memory: Arc::new(Mutex::new(OfflineMemory::new(
-                initial_memory,
-                1,
-                memory_bus,
-                range_checker.clone(),
-                mem_config,
-            ))),
-            access_adapters: AccessAdapterInventory::new(
+            access_adapter_inventory: AccessAdapterInventory::new(
                 range_checker.clone(),
                 memory_bus,
-                mem_config.clk_max_bits,
-                mem_config.max_access_adapter_n,
+                mem_config,
             ),
             range_checker,
             range_checker_bus,
-            final_state: None,
+            hasher_chip: None,
         }
     }
 
@@ -272,12 +172,11 @@ impl<F: PrimeField32> MemoryController<F> {
         range_checker: SharedVariableRangeCheckerChip,
         merkle_bus: PermutationCheckBus,
         compression_bus: PermutationCheckBus,
+        hasher_chip: Arc<Poseidon2PeripheryChip<F>>,
     ) -> Self {
-        assert_eq!(mem_config.as_offset, 1);
         let memory_dims = MemoryDimensions {
-            as_height: mem_config.as_height,
+            addr_space_height: mem_config.addr_space_height,
             address_height: mem_config.pointer_max_bits - log2_strict_usize(CHUNK),
-            as_offset: 1,
         };
         let range_checker_bus = range_checker.bus();
         let interface_chip = MemoryInterface::Persistent {
@@ -292,73 +191,50 @@ impl<F: PrimeField32> MemoryController<F> {
         };
         Self {
             memory_bus,
-            mem_config,
             interface_chip,
-            memory: Memory::new(&mem_config), // it is expected that the memory will be set later
-            offline_memory: Arc::new(Mutex::new(OfflineMemory::new(
-                AddressMap::from_mem_config(&mem_config),
-                CHUNK,
-                memory_bus,
-                range_checker.clone(),
-                mem_config,
-            ))),
-            access_adapters: AccessAdapterInventory::new(
+            access_adapter_inventory: AccessAdapterInventory::new(
                 range_checker.clone(),
                 memory_bus,
-                mem_config.clk_max_bits,
-                mem_config.max_access_adapter_n,
+                mem_config,
             ),
             range_checker,
             range_checker_bus,
-            final_state: None,
+            hasher_chip: Some(hasher_chip),
         }
     }
 
-    pub fn memory_image(&self) -> &MemoryImage<F> {
-        &self.memory.data
+    pub fn memory_config(&self) -> &MemoryConfig {
+        &self.access_adapter_inventory.memory_config
     }
 
-    pub fn set_override_trace_heights(&mut self, overridden_heights: MemoryTraceHeights) {
+    pub(crate) fn set_override_trace_heights(&mut self, overridden_heights: &[u32]) {
         match &mut self.interface_chip {
-            MemoryInterface::Volatile { boundary_chip } => match overridden_heights {
-                MemoryTraceHeights::Volatile(oh) => {
-                    boundary_chip.set_overridden_height(oh.boundary);
-                    self.access_adapters
-                        .set_override_trace_heights(oh.access_adapters);
-                }
-                _ => panic!("Expect overridden_heights to be MemoryTraceHeights::Volatile"),
-            },
+            MemoryInterface::Volatile { boundary_chip } => {
+                let oh = VolatileMemoryTraceHeights::from_slice(overridden_heights);
+                boundary_chip.set_overridden_height(oh.boundary);
+                self.access_adapter_inventory
+                    .set_override_trace_heights(oh.access_adapters);
+            }
             MemoryInterface::Persistent {
                 boundary_chip,
                 merkle_chip,
                 ..
-            } => match overridden_heights {
-                MemoryTraceHeights::Persistent(oh) => {
-                    boundary_chip.set_overridden_height(oh.boundary);
-                    merkle_chip.set_overridden_height(oh.merkle);
-                    self.access_adapters
-                        .set_override_trace_heights(oh.access_adapters);
-                }
-                _ => panic!("Expect overridden_heights to be MemoryTraceHeights::Persistent"),
-            },
+            } => {
+                let oh = PersistentMemoryTraceHeights::from_slice(overridden_heights);
+                boundary_chip.set_overridden_height(oh.boundary);
+                merkle_chip.set_overridden_height(oh.merkle);
+                self.access_adapter_inventory
+                    .set_override_trace_heights(oh.access_adapters);
+            }
         }
     }
 
-    pub fn set_initial_memory(&mut self, memory: MemoryImage<F>) {
-        if self.timestamp() > INITIAL_TIMESTAMP + 1 {
-            panic!("Cannot set initial memory after first timestamp");
-        }
-        let mut offline_memory = self.offline_memory.lock().unwrap();
-        offline_memory.set_initial_memory(memory.clone(), self.mem_config);
-
-        self.memory = Memory::from_image(memory.clone(), self.mem_config.access_capacity);
-
+    /// This only sets the initial memory image for the persistent boundary and merkle tree chips.
+    /// Tracing memory should be set separately.
+    pub(crate) fn set_initial_memory(&mut self, memory: AddressMap) {
         match &mut self.interface_chip {
             MemoryInterface::Volatile { .. } => {
-                assert!(
-                    memory.is_empty(),
-                    "Cannot set initial memory for volatile memory"
-                );
+                // Skip initialization for volatile memory
             }
             MemoryInterface::Persistent { initial_memory, .. } => {
                 *initial_memory = memory;
@@ -369,207 +245,68 @@ impl<F: PrimeField32> MemoryController<F> {
     pub fn memory_bridge(&self) -> MemoryBridge {
         MemoryBridge::new(
             self.memory_bus,
-            self.mem_config.clk_max_bits,
+            self.memory_config().timestamp_max_bits,
             self.range_checker_bus,
         )
     }
 
-    pub fn read_cell(&mut self, address_space: F, pointer: F) -> (RecordId, F) {
-        let (record_id, [data]) = self.read(address_space, pointer);
-        (record_id, data)
-    }
-
-    pub fn read<const N: usize>(&mut self, address_space: F, pointer: F) -> (RecordId, [F; N]) {
-        let address_space_u32 = address_space.as_canonical_u32();
-        let ptr_u32 = pointer.as_canonical_u32();
-        assert!(
-            address_space == F::ZERO || ptr_u32 < (1 << self.mem_config.pointer_max_bits),
-            "memory out of bounds: {ptr_u32:?}",
-        );
-
-        let (record_id, values) = self.memory.read::<N>(address_space_u32, ptr_u32);
-
-        (record_id, values)
-    }
-
-    /// Reads a word directly from memory without updating internal state.
-    ///
-    /// Any value returned is unconstrained.
-    pub fn unsafe_read_cell(&self, addr_space: F, ptr: F) -> F {
-        self.unsafe_read::<1>(addr_space, ptr)[0]
-    }
-
-    /// Reads a word directly from memory without updating internal state.
-    ///
-    /// Any value returned is unconstrained.
-    pub fn unsafe_read<const N: usize>(&self, addr_space: F, ptr: F) -> [F; N] {
-        let addr_space = addr_space.as_canonical_u32();
-        let ptr = ptr.as_canonical_u32();
-        array::from_fn(|i| self.memory.get(addr_space, ptr + i as u32))
-    }
-
-    /// Writes `data` to the given cell.
-    ///
-    /// Returns the `RecordId` and previous data.
-    pub fn write_cell(&mut self, address_space: F, pointer: F, data: F) -> (RecordId, F) {
-        let (record_id, [data]) = self.write(address_space, pointer, [data]);
-        (record_id, data)
-    }
-
-    pub fn write<const N: usize>(
-        &mut self,
-        address_space: F,
-        pointer: F,
-        data: [F; N],
-    ) -> (RecordId, [F; N]) {
-        assert_ne!(address_space, F::ZERO);
-        let address_space_u32 = address_space.as_canonical_u32();
-        let ptr_u32 = pointer.as_canonical_u32();
-        assert!(
-            ptr_u32 < (1 << self.mem_config.pointer_max_bits),
-            "memory out of bounds: {ptr_u32:?}",
-        );
-
-        self.memory.write(address_space_u32, ptr_u32, data)
-    }
-
-    pub fn aux_cols_factory(&self) -> MemoryAuxColsFactory<F> {
+    pub fn helper(&self) -> SharedMemoryHelper<F> {
         let range_bus = self.range_checker.bus();
-        MemoryAuxColsFactory {
+        SharedMemoryHelper {
             range_checker: self.range_checker.clone(),
-            timestamp_lt_air: AssertLtSubAir::new(range_bus, self.mem_config.clk_max_bits),
+            timestamp_lt_air: AssertLtSubAir::new(
+                range_bus,
+                self.memory_config().timestamp_max_bits,
+            ),
             _marker: Default::default(),
         }
     }
 
-    pub fn increment_timestamp(&mut self) {
-        self.memory.increment_timestamp_by(1);
-    }
-
-    pub fn increment_timestamp_by(&mut self, change: u32) {
-        self.memory.increment_timestamp_by(change);
-    }
-
-    pub fn timestamp(&self) -> u32 {
-        self.memory.timestamp()
-    }
-
-    fn replay_access_log(&mut self) {
-        let log = mem::take(&mut self.memory.log);
-        if log.is_empty() {
-            // Online memory logs may be empty, but offline memory may be replayed from external
-            // sources. In these cases, we skip the calls to replay access logs because
-            // `set_log_capacity` would panic.
-            tracing::debug!("skipping replay_access_log");
-            return;
-        }
-
-        let mut offline_memory = self.offline_memory.lock().unwrap();
-        offline_memory.set_log_capacity(log.len());
-
-        for entry in log {
-            Self::replay_access(
-                entry,
-                &mut offline_memory,
-                &mut self.interface_chip,
-                &mut self.access_adapters,
-            );
-        }
-    }
-
-    /// Low-level API to replay a single memory access log entry and populate the [OfflineMemory],
-    /// [MemoryInterface], and `AccessAdapterInventory`.
-    pub fn replay_access(
-        entry: MemoryLogEntry<F>,
-        offline_memory: &mut OfflineMemory<F>,
-        interface_chip: &mut MemoryInterface<F>,
-        adapter_records: &mut AccessAdapterInventory<F>,
-    ) {
-        match entry {
-            MemoryLogEntry::Read {
-                address_space,
-                pointer,
-                len,
-            } => {
-                if address_space != 0 {
-                    interface_chip.touch_range(address_space, pointer, len as u32);
-                }
-                offline_memory.read(address_space, pointer, len, adapter_records);
-            }
-            MemoryLogEntry::Write {
-                address_space,
-                pointer,
-                data,
-            } => {
-                if address_space != 0 {
-                    interface_chip.touch_range(address_space, pointer, data.len() as u32);
-                }
-                offline_memory.write(address_space, pointer, data, adapter_records);
-            }
-            MemoryLogEntry::IncrementTimestampBy(amount) => {
-                offline_memory.increment_timestamp_by(amount);
-            }
-        };
-    }
-
-    /// Returns the final memory state if persistent.
-    pub fn finalize<H>(&mut self, hasher: Option<&mut H>)
+    // @dev: Memory is complicated and allowed to break all the rules (e.g., 1 arena per chip) and
+    // there's no need for any memory chip to implement the Chip trait. We do it when convenient,
+    // but all that matters is that you can tracegen all the trace matrices for the memory AIRs
+    // _somehow_.
+    pub fn generate_proving_ctx<SC: StarkGenericConfig>(
+        &mut self,
+        access_adapter_records: DenseRecordArena,
+        touched_memory: TouchedMemory<F>,
+    ) -> Vec<AirProvingContext<CpuBackend<SC>>>
     where
-        H: HasherChip<CHUNK, F> + Sync + for<'a> SerialReceiver<&'a [F]>,
+        Domain<SC>: PolynomialSpace<Val = F>,
     {
-        if self.final_state.is_some() {
-            return;
-        }
-
-        self.replay_access_log();
-        let mut offline_memory = self.offline_memory.lock().unwrap();
-
-        match &mut self.interface_chip {
-            MemoryInterface::Volatile { boundary_chip } => {
-                let final_memory = offline_memory.finalize::<1>(&mut self.access_adapters);
+        match (&mut self.interface_chip, touched_memory) {
+            (
+                MemoryInterface::Volatile { boundary_chip },
+                TouchedMemory::Volatile(final_memory),
+            ) => {
                 boundary_chip.finalize(final_memory);
-                self.final_state = Some(FinalState::Volatile(VolatileFinalState::default()));
             }
-            MemoryInterface::Persistent {
-                merkle_chip,
-                boundary_chip,
-                initial_memory,
-            } => {
-                let hasher = hasher.unwrap();
-                let final_partition = offline_memory.finalize::<CHUNK>(&mut self.access_adapters);
-
-                boundary_chip.finalize(initial_memory, &final_partition, hasher);
-                let final_memory_values = final_partition
+            (
+                MemoryInterface::Persistent {
+                    boundary_chip,
+                    merkle_chip,
+                    initial_memory,
+                },
+                TouchedMemory::Persistent(final_memory),
+            ) => {
+                let hasher = self.hasher_chip.as_ref().unwrap();
+                boundary_chip.finalize(initial_memory, &final_memory, hasher.as_ref());
+                let final_memory_values = final_memory
                     .into_par_iter()
                     .map(|(key, value)| (key, value.values))
                     .collect();
-                let initial_node = MemoryNode::tree_from_memory(
-                    merkle_chip.air.memory_dimensions,
-                    initial_memory,
-                    hasher,
-                );
-                merkle_chip.finalize(&initial_node, &final_memory_values, hasher);
-                self.final_state = Some(FinalState::Persistent(PersistentFinalState {
-                    final_memory: final_memory_values.clone(),
-                }));
+                merkle_chip.finalize(initial_memory, &final_memory_values, hasher.as_ref());
             }
-        };
-    }
+            _ => panic!("TouchedMemory incorrect type"),
+        }
 
-    pub fn generate_air_proof_inputs<SC: StarkGenericConfig>(self) -> Vec<AirProofInput<SC>>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-    {
         let mut ret = Vec::new();
 
-        let Self {
-            interface_chip,
-            access_adapters,
-            ..
-        } = self;
-        match interface_chip {
+        let access_adapters = &mut self.access_adapter_inventory;
+        access_adapters.set_arena(access_adapter_records);
+        match &mut self.interface_chip {
             MemoryInterface::Volatile { boundary_chip } => {
-                ret.push(boundary_chip.generate_air_proof_input());
+                ret.push(boundary_chip.generate_proving_ctx(()));
             }
             MemoryInterface::Persistent {
                 merkle_chip,
@@ -577,191 +314,66 @@ impl<F: PrimeField32> MemoryController<F> {
                 ..
             } => {
                 debug_assert_eq!(ret.len(), BOUNDARY_AIR_OFFSET);
-                ret.push(boundary_chip.generate_air_proof_input());
+                ret.push(boundary_chip.generate_proving_ctx(()));
                 debug_assert_eq!(ret.len(), MERKLE_AIR_OFFSET);
-                ret.push(merkle_chip.generate_air_proof_input());
+                ret.push(merkle_chip.generate_proving_ctx());
             }
         }
-        ret.extend(access_adapters.generate_air_proof_inputs());
+        ret.extend(access_adapters.generate_proving_ctx());
         ret
     }
 
-    pub fn airs<SC: StarkGenericConfig>(&self) -> Vec<AirRef<SC>>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-    {
-        let mut airs = Vec::<AirRef<SC>>::new();
-
-        match &self.interface_chip {
-            MemoryInterface::Volatile { boundary_chip } => {
-                debug_assert_eq!(airs.len(), BOUNDARY_AIR_OFFSET);
-                airs.push(boundary_chip.air())
-            }
-            MemoryInterface::Persistent {
-                boundary_chip,
-                merkle_chip,
-                ..
-            } => {
-                debug_assert_eq!(airs.len(), BOUNDARY_AIR_OFFSET);
-                airs.push(boundary_chip.air());
-                debug_assert_eq!(airs.len(), MERKLE_AIR_OFFSET);
-                airs.push(merkle_chip.air());
-            }
-        }
-        airs.extend(self.access_adapters.airs());
-
-        airs
-    }
-
     /// Return the number of AIRs in the memory controller.
     pub fn num_airs(&self) -> usize {
         let mut num_airs = 1;
         if self.continuation_enabled() {
             num_airs += 1;
         }
-        num_airs += self.access_adapters.num_access_adapters();
+        num_airs += self.access_adapter_inventory.num_access_adapters();
         num_airs
     }
-
-    pub fn air_names(&self) -> Vec<String> {
-        let mut air_names = vec!["Boundary".to_string()];
-        if self.continuation_enabled() {
-            air_names.push("Merkle".to_string());
-        }
-        air_names.extend(self.access_adapters.air_names());
-        air_names
-    }
-
-    pub fn current_trace_heights(&self) -> Vec<usize> {
-        self.get_memory_trace_heights().flatten()
-    }
-
-    pub fn get_memory_trace_heights(&self) -> MemoryTraceHeights {
-        let access_adapters = self.access_adapters.get_heights();
-        match &self.interface_chip {
-            MemoryInterface::Volatile { boundary_chip } => {
-                MemoryTraceHeights::Volatile(VolatileMemoryTraceHeights {
-                    boundary: boundary_chip.current_trace_height(),
-                    access_adapters,
-                })
-            }
-            MemoryInterface::Persistent {
-                boundary_chip,
-                merkle_chip,
-                ..
-            } => MemoryTraceHeights::Persistent(PersistentMemoryTraceHeights {
-                boundary: boundary_chip.current_trace_height(),
-                merkle: merkle_chip.current_trace_height(),
-                access_adapters,
-            }),
-        }
-    }
-
-    pub fn get_dummy_memory_trace_heights(&self) -> MemoryTraceHeights {
-        let access_adapters = vec![1; self.access_adapters.num_access_adapters()];
-        match &self.interface_chip {
-            MemoryInterface::Volatile { .. } => {
-                MemoryTraceHeights::Volatile(VolatileMemoryTraceHeights {
-                    boundary: 1,
-                    access_adapters,
-                })
-            }
-            MemoryInterface::Persistent { .. } => {
-                MemoryTraceHeights::Persistent(PersistentMemoryTraceHeights {
-                    boundary: 1,
-                    merkle: 1,
-                    access_adapters,
-                })
-            }
-        }
-    }
-
-    pub fn current_trace_cells(&self) -> Vec<usize> {
-        let mut ret = Vec::new();
-        match &self.interface_chip {
-            MemoryInterface::Volatile { boundary_chip } => {
-                ret.push(boundary_chip.current_trace_cells())
-            }
-            MemoryInterface::Persistent {
-                boundary_chip,
-                merkle_chip,
-                ..
-            } => {
-                ret.push(boundary_chip.current_trace_cells());
-                ret.push(merkle_chip.current_trace_cells());
-            }
-        }
-        ret.extend(self.access_adapters.get_cells());
-        ret
-    }
-
-    /// Returns a reference to the offline memory.
-    ///
-    /// Until `finalize` is called, the `OfflineMemory` does not contain useful state, and should
-    /// therefore not be used by any chip during execution. However, to obtain a reference to the
-    /// offline memory that will be useful in trace generation, a chip can call `offline_memory()`
-    /// and store the returned reference for later use.
-    pub fn offline_memory(&self) -> Arc<Mutex<OfflineMemory<F>>> {
-        self.offline_memory.clone()
-    }
-    pub fn get_memory_logs(&self) -> &Vec<MemoryLogEntry<F>> {
-        &self.memory.log
-    }
-    pub fn set_memory_logs(&mut self, logs: Vec<MemoryLogEntry<F>>) {
-        self.memory.log = logs;
-    }
-    pub fn take_memory_logs(&mut self) -> Vec<MemoryLogEntry<F>> {
-        std::mem::take(&mut self.memory.log)
-    }
 }
 
-pub struct MemoryAuxColsFactory<T> {
+/// Owned version of [MemoryAuxColsFactory].
+#[derive(Clone)]
+pub struct SharedMemoryHelper<F> {
     pub(crate) range_checker: SharedVariableRangeCheckerChip,
     pub(crate) timestamp_lt_air: AssertLtSubAir,
-    pub(crate) _marker: PhantomData<T>,
+    pub(crate) _marker: PhantomData<F>,
 }
 
-// NOTE[jpw]: The `make_*_aux_cols` functions should be thread-safe so they can be used in
-// parallelized trace generation.
-impl<F: PrimeField32> MemoryAuxColsFactory<F> {
-    pub fn generate_read_aux(&self, read: &MemoryRecord<F>, buffer: &mut MemoryReadAuxCols<F>) {
-        assert!(
-            !read.address_space.is_zero(),
-            "cannot make `MemoryReadAuxCols` for address space 0"
-        );
-        self.generate_base_aux(read, &mut buffer.base);
+impl<F> SharedMemoryHelper<F> {
+    pub fn new(range_checker: SharedVariableRangeCheckerChip, timestamp_max_bits: usize) -> Self {
+        let timestamp_lt_air = AssertLtSubAir::new(range_checker.bus(), timestamp_max_bits);
+        Self {
+            range_checker,
+            timestamp_lt_air,
+            _marker: PhantomData,
+        }
     }
+}
 
-    pub fn generate_read_or_immediate_aux(
-        &self,
-        read: &MemoryRecord<F>,
-        buffer: &mut MemoryReadOrImmediateAuxCols<F>,
-    ) {
-        IsZeroSubAir.generate_subrow(
-            read.address_space,
-            (&mut buffer.is_zero_aux, &mut buffer.is_immediate),
-        );
-        self.generate_base_aux(read, &mut buffer.base);
-    }
+/// A helper for generating trace values in auxiliary memory columns related to the offline memory
+/// argument.
+pub struct MemoryAuxColsFactory<'a, F> {
+    pub(crate) range_checker: &'a VariableRangeCheckerChip,
+    pub(crate) timestamp_lt_air: AssertLtSubAir,
+    pub(crate) _marker: PhantomData<F>,
+}
 
-    pub fn generate_write_aux<const N: usize>(
-        &self,
-        write: &MemoryRecord<F>,
-        buffer: &mut MemoryWriteAuxCols<F, N>,
-    ) {
-        buffer
-            .prev_data
-            .copy_from_slice(write.prev_data_slice().unwrap());
-        self.generate_base_aux(write, &mut buffer.base);
+impl<F: PrimeField32> MemoryAuxColsFactory<'_, F> {
+    /// Fill the trace assuming `prev_timestamp` is already provided in `buffer`.
+    pub fn fill(&self, prev_timestamp: u32, timestamp: u32, buffer: &mut MemoryBaseAuxCols<F>) {
+        self.generate_timestamp_lt(prev_timestamp, timestamp, &mut buffer.timestamp_lt_aux);
+        // Safety: even if prev_timestamp were obtained by transmute_ref from
+        // `buffer.prev_timestamp`, this should still work because it is a direct assignment
+        buffer.prev_timestamp = F::from_canonical_u32(prev_timestamp);
     }
 
-    pub fn generate_base_aux(&self, record: &MemoryRecord<F>, buffer: &mut MemoryBaseAuxCols<F>) {
-        buffer.prev_timestamp = F::from_canonical_u32(record.prev_timestamp);
-        self.generate_timestamp_lt(
-            record.prev_timestamp,
-            record.timestamp,
-            &mut buffer.timestamp_lt_aux,
-        );
+    /// # Safety
+    /// We assume that `F::ZERO` has underlying memory equivalent to `mem::zeroed()`.
+    pub fn fill_zero(&self, buffer: &mut MemoryBaseAuxCols<F>) {
+        *buffer = unsafe { std::mem::zeroed() };
     }
 
     fn generate_timestamp_lt(
@@ -770,102 +382,23 @@ impl<F: PrimeField32> MemoryAuxColsFactory<F> {
         timestamp: u32,
         buffer: &mut LessThanAuxCols<F, AUX_LEN>,
     ) {
-        debug_assert!(prev_timestamp < timestamp);
-        self.timestamp_lt_air.generate_subrow(
-            (self.range_checker.as_ref(), prev_timestamp, timestamp),
-            &mut buffer.lower_decomp,
+        debug_assert!(
+            prev_timestamp < timestamp,
+            "prev_timestamp {prev_timestamp} >= timestamp {timestamp}"
         );
-    }
-
-    /// In general, prefer `generate_read_aux` which writes in-place rather than this function.
-    pub fn make_read_aux_cols(&self, read: &MemoryRecord<F>) -> MemoryReadAuxCols<F> {
-        assert!(
-            !read.address_space.is_zero(),
-            "cannot make `MemoryReadAuxCols` for address space 0"
-        );
-        MemoryReadAuxCols::new(
-            read.prev_timestamp,
-            self.generate_timestamp_lt_cols(read.prev_timestamp, read.timestamp),
-        )
-    }
-
-    /// In general, prefer `generate_write_aux` which writes in-place rather than this function.
-    pub fn make_write_aux_cols<const N: usize>(
-        &self,
-        write: &MemoryRecord<F>,
-    ) -> MemoryWriteAuxCols<F, N> {
-        let prev_data = write.prev_data_slice().unwrap();
-        MemoryWriteAuxCols::new(
-            prev_data.try_into().unwrap(),
-            F::from_canonical_u32(write.prev_timestamp),
-            self.generate_timestamp_lt_cols(write.prev_timestamp, write.timestamp),
-        )
-    }
-
-    fn generate_timestamp_lt_cols(
-        &self,
-        prev_timestamp: u32,
-        timestamp: u32,
-    ) -> LessThanAuxCols<F, AUX_LEN> {
-        debug_assert!(prev_timestamp < timestamp);
-        let mut decomp = [F::ZERO; AUX_LEN];
         self.timestamp_lt_air.generate_subrow(
-            (self.range_checker.as_ref(), prev_timestamp, timestamp),
-            &mut decomp,
+            (self.range_checker, prev_timestamp, timestamp),
+            &mut buffer.lower_decomp,
         );
-        LessThanAuxCols::new(decomp)
     }
 }
 
-#[cfg(test)]
-mod tests {
-    use openvm_circuit_primitives::var_range::{
-        SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-    };
-    use openvm_stark_backend::{interaction::BusIndex, p3_field::FieldAlgebra};
-    use openvm_stark_sdk::p3_baby_bear::BabyBear;
-    use rand::{prelude::SliceRandom, thread_rng, Rng};
-
-    use super::MemoryController;
-    use crate::{
-        arch::{testing::MEMORY_BUS, MemoryConfig},
-        system::memory::offline_checker::MemoryBus,
-    };
-
-    const RANGE_CHECKER_BUS: BusIndex = 3;
-
-    #[test]
-    fn test_no_adapter_records_for_singleton_accesses() {
-        type F = BabyBear;
-
-        let memory_bus = MemoryBus::new(MEMORY_BUS);
-        let memory_config = MemoryConfig::default();
-        let range_bus = VariableRangeCheckerBus::new(RANGE_CHECKER_BUS, memory_config.decomp);
-        let range_checker = SharedVariableRangeCheckerChip::new(range_bus);
-
-        let mut memory_controller = MemoryController::with_volatile_memory(
-            memory_bus,
-            memory_config,
-            range_checker.clone(),
-        );
-
-        let mut rng = thread_rng();
-        for _ in 0..1000 {
-            let address_space = F::from_canonical_u32(*[1, 2].choose(&mut rng).unwrap());
-            let pointer =
-                F::from_canonical_u32(rng.gen_range(0..1 << memory_config.pointer_max_bits));
-
-            if rng.gen_bool(0.5) {
-                let data = F::from_canonical_u32(rng.gen_range(0..1 << 30));
-                memory_controller.write(address_space, pointer, [data]);
-            } else {
-                memory_controller.read::<1>(address_space, pointer);
-            }
+impl<F> SharedMemoryHelper<F> {
+    pub fn as_borrowed(&self) -> MemoryAuxColsFactory<'_, F> {
+        MemoryAuxColsFactory {
+            range_checker: self.range_checker.as_ref(),
+            timestamp_lt_air: self.timestamp_lt_air,
+            _marker: PhantomData,
         }
-        assert!(memory_controller
-            .access_adapters
-            .get_heights()
-            .iter()
-            .all(|&h| h == 0));
     }
 }
diff --git a/crates/vm/src/system/memory/merkle/mod.rs b/crates/vm/src/system/memory/merkle/mod.rs
index 74f8951bc4..6d974ddde0 100644
--- a/crates/vm/src/system/memory/merkle/mod.rs
+++ b/crates/vm/src/system/memory/merkle/mod.rs
@@ -1,27 +1,39 @@
-use openvm_stark_backend::{interaction::PermutationCheckBus, p3_field::PrimeField32};
-use rustc_hash::FxHashSet;
+use std::array;
+
+use openvm_stark_backend::{
+    interaction::PermutationCheckBus, p3_field::PrimeField32, p3_maybe_rayon::prelude::*,
+};
+
+use super::{controller::dimensions::MemoryDimensions, online::LinearMemory};
+use crate::{
+    arch::AddressSpaceHostLayout,
+    system::memory::{online::PAGE_SIZE, AddressMap},
+};
 
-use super::controller::dimensions::MemoryDimensions;
 mod air;
 mod columns;
+pub mod public_values;
 mod trace;
+mod tree;
 
 pub use air::*;
 pub use columns::*;
 pub(super) use trace::SerialReceiver;
+pub use tree::*;
 
 #[cfg(test)]
 mod tests;
 
 pub struct MemoryMerkleChip<const CHUNK: usize, F> {
     pub air: MemoryMerkleAir<CHUNK>,
-    touched_nodes: FxHashSet<(usize, u32, u32)>,
-    num_touched_nonleaves: usize,
     final_state: Option<FinalState<CHUNK, F>>,
     overridden_height: Option<usize>,
+    /// Used for metric collection purposes only
+    #[cfg(feature = "metrics")]
+    pub(crate) current_height: usize,
 }
 #[derive(Debug)]
-struct FinalState<const CHUNK: usize, F> {
+pub struct FinalState<const CHUNK: usize, F> {
     rows: Vec<MemoryMerkleCols<F, CHUNK>>,
     init_root: [F; CHUNK],
     final_root: [F; CHUNK],
@@ -35,46 +47,76 @@ impl<const CHUNK: usize, F: PrimeField32> MemoryMerkleChip<CHUNK, F> {
         merkle_bus: PermutationCheckBus,
         compression_bus: PermutationCheckBus,
     ) -> Self {
-        assert!(memory_dimensions.as_height > 0);
+        assert!(memory_dimensions.addr_space_height > 0);
         assert!(memory_dimensions.address_height > 0);
-        let mut touched_nodes = FxHashSet::default();
-        touched_nodes.insert((memory_dimensions.overall_height(), 0, 0));
         Self {
             air: MemoryMerkleAir {
                 memory_dimensions,
                 merkle_bus,
                 compression_bus,
             },
-            touched_nodes,
-            num_touched_nonleaves: 1,
             final_state: None,
             overridden_height: None,
+            #[cfg(feature = "metrics")]
+            current_height: 0,
         }
     }
     pub fn set_overridden_height(&mut self, override_height: usize) {
         self.overridden_height = Some(override_height);
     }
+}
 
-    fn touch_node(&mut self, height: usize, as_label: u32, address_label: u32) {
-        if self.touched_nodes.insert((height, as_label, address_label)) {
-            assert_ne!(height, self.air.memory_dimensions.overall_height());
-            if height != 0 {
-                self.num_touched_nonleaves += 1;
-            }
-            if height >= self.air.memory_dimensions.address_height {
-                self.touch_node(height + 1, as_label / 2, address_label);
-            } else {
-                self.touch_node(height + 1, as_label, address_label / 2);
-            }
-        }
-    }
+#[tracing::instrument(level = "info", skip_all)]
+fn memory_to_vec_partition<F: PrimeField32, const N: usize>(
+    memory: &AddressMap,
+    md: &MemoryDimensions,
+) -> Vec<(u64, [F; N])> {
+    (0..memory.mem.len())
+        .into_par_iter()
+        .map(move |as_idx| {
+            let space_mem = memory.mem[as_idx].as_slice();
+            let addr_space_layout = memory.config[as_idx].layout;
+            let cell_size = addr_space_layout.size();
+            debug_assert_eq!(PAGE_SIZE % (cell_size * N), 0);
 
-    pub fn touch_range(&mut self, address_space: u32, address: u32, len: u32) {
-        let as_label = address_space - self.air.memory_dimensions.as_offset;
-        let first_address_label = address / CHUNK as u32;
-        let last_address_label = (address + len - 1) / CHUNK as u32;
-        for address_label in first_address_label..=last_address_label {
-            self.touch_node(0, as_label, address_label);
-        }
-    }
+            let num_nonzero_pages = space_mem
+                .par_chunks(PAGE_SIZE)
+                .enumerate()
+                .flat_map(|(idx, page)| {
+                    if page.iter().any(|x| *x != 0) {
+                        Some(idx + 1)
+                    } else {
+                        None
+                    }
+                })
+                .max()
+                .unwrap_or(0);
+
+            let space_mem = &space_mem[..(num_nonzero_pages * PAGE_SIZE).min(space_mem.len())];
+            let mut num_elements = space_mem.len() / (cell_size * N);
+            // virtual memory may be larger than dimensions due to rounding up to page size
+            num_elements = num_elements.min(1 << md.address_height);
+
+            (0..num_elements)
+                .into_par_iter()
+                .map(move |idx| {
+                    (
+                        md.label_to_index((as_idx as u32, idx as u32)),
+                        array::from_fn(|i| unsafe {
+                            // SAFETY: idx < num_elements = space_mem.len() / (cell_size * N) so ptr
+                            // is within bounds. We are reading one cell at a time, so alignment is
+                            // guaranteed.
+                            let ptr: *const u8 =
+                                space_mem.as_ptr().add(idx * cell_size * N + i * cell_size);
+                            addr_space_layout
+                                .to_field(&*core::ptr::slice_from_raw_parts(ptr, cell_size))
+                        }),
+                    )
+                })
+                .collect::<Vec<_>>()
+        })
+        .collect::<Vec<_>>()
+        .into_iter()
+        .flatten()
+        .collect::<Vec<_>>()
 }
diff --git a/crates/vm/src/system/memory/tree/public_values.rs b/crates/vm/src/system/memory/merkle/public_values.rs
similarity index 63%
rename from crates/vm/src/system/memory/tree/public_values.rs
rename to crates/vm/src/system/memory/merkle/public_values.rs
index 1c6866b959..2870a1a9a2 100644
--- a/crates/vm/src/system/memory/tree/public_values.rs
+++ b/crates/vm/src/system/memory/merkle/public_values.rs
@@ -1,17 +1,18 @@
-use std::{collections::BTreeMap, sync::Arc};
-
+use itertools::Itertools;
 use openvm_stark_backend::{p3_field::PrimeField32, p3_util::log2_strict_usize};
 use serde::{Deserialize, Serialize};
 use thiserror::Error;
+use tracing::instrument;
 
 use crate::{
-    arch::hasher::Hasher,
+    arch::{hasher::Hasher, MemoryCellType, ADDR_SPACE_OFFSET},
     system::memory::{
-        dimensions::MemoryDimensions, paged_vec::Address, tree::MemoryNode, MemoryImage,
+        dimensions::MemoryDimensions, merkle::tree::MerkleTree, online::LinearMemory, MemoryImage,
     },
 };
 
-pub const PUBLIC_VALUES_ADDRESS_SPACE_OFFSET: u32 = 2;
+pub const PUBLIC_VALUES_AS: u32 = 3;
+pub const PUBLIC_VALUES_ADDRESS_SPACE_OFFSET: u32 = PUBLIC_VALUES_AS - ADDR_SPACE_OFFSET;
 
 /// Merkle proof for user public values in the memory state.
 #[derive(Clone, Debug, Serialize, Deserialize)]
@@ -23,7 +24,7 @@ pub struct UserPublicValuesProof<const CHUNK: usize, F> {
     /// Proof of the path from the root of public values to the memory root in the format of
     /// sequence of sibling node hashes.
     pub proof: Vec<[F; CHUNK]>,
-    /// Raw public values. Its length should be a power of two * CHUNK.
+    /// Raw public values. Its length should be (a power of two) * CHUNK.
     pub public_values: Vec<F>,
     /// Merkle root of public values. The computation of this value follows the same logic of
     /// `MemoryNode`. The merkle tree doesn't pad because the length `public_values` implies the
@@ -47,11 +48,14 @@ impl<const CHUNK: usize, F: PrimeField32> UserPublicValuesProof<CHUNK, F> {
     /// Computes the proof of the public values from the final memory state.
     /// Assumption:
     /// - `num_public_values` is a power of two * CHUNK. It cannot be 0.
+    // TODO[jpw]: this currently reconstructs the merkle tree from final memory; we should avoid
+    // this. We should make this a function within SystemChipComplex
+    #[instrument(name = "compute_user_public_values_proof", skip_all)]
     pub fn compute(
         memory_dimensions: MemoryDimensions,
         num_public_values: usize,
         hasher: &(impl Hasher<CHUNK, F> + Sync),
-        final_memory: &MemoryImage<F>,
+        final_memory: &MemoryImage,
     ) -> Self {
         let proof = compute_merkle_proof_to_user_public_values_root(
             memory_dimensions,
@@ -59,8 +63,10 @@ impl<const CHUNK: usize, F: PrimeField32> UserPublicValuesProof<CHUNK, F> {
             hasher,
             final_memory,
         );
-        let public_values =
-            extract_public_values(&memory_dimensions, num_public_values, final_memory);
+        let public_values = extract_public_values(num_public_values, final_memory)
+            .iter()
+            .map(|&x| F::from_canonical_u8(x))
+            .collect_vec();
         let public_values_commit = hasher.merkle_root(&public_values);
         UserPublicValuesProof {
             proof,
@@ -81,7 +87,7 @@ impl<const CHUNK: usize, F: PrimeField32> UserPublicValuesProof<CHUNK, F> {
         // 2. Compare user public values commitment with Merkle root of user public values.
         let pv_commit = self.public_values_commit;
         // 0.
-        let pv_as = PUBLIC_VALUES_ADDRESS_SPACE_OFFSET + memory_dimensions.as_offset;
+        let pv_as = PUBLIC_VALUES_AS;
         let pv_start_idx = memory_dimensions.label_to_index((pv_as, 0));
         let pvs = &self.public_values;
         if pvs.len() % CHUNK != 0 || !(pvs.len() / CHUNK).is_power_of_two() {
@@ -121,14 +127,14 @@ fn compute_merkle_proof_to_user_public_values_root<const CHUNK: usize, F: PrimeF
     memory_dimensions: MemoryDimensions,
     num_public_values: usize,
     hasher: &(impl Hasher<CHUNK, F> + Sync),
-    final_memory: &MemoryImage<F>,
+    final_memory: &MemoryImage,
 ) -> Vec<[F; CHUNK]> {
     assert_eq!(
         num_public_values % CHUNK,
         0,
         "num_public_values must be a multiple of memory chunk {CHUNK}"
     );
-    let root = MemoryNode::tree_from_memory(memory_dimensions, final_memory, hasher);
+    let tree = MerkleTree::<F, CHUNK>::from_memory(final_memory, &memory_dimensions, hasher);
     let num_pv_chunks: usize = num_public_values / CHUNK;
     // This enforces the number of public values cannot be 0.
     assert!(
@@ -138,63 +144,45 @@ fn compute_merkle_proof_to_user_public_values_root<const CHUNK: usize, F: PrimeF
     let pv_height = log2_strict_usize(num_pv_chunks);
     let address_leading_zeros = memory_dimensions.address_height - pv_height;
 
-    let mut curr_node = Arc::new(root);
-    let mut proof = Vec::with_capacity(memory_dimensions.as_height + address_leading_zeros);
-    for i in 0..memory_dimensions.as_height {
-        let bit = 1 << (memory_dimensions.as_height - i - 1);
-        if let MemoryNode::NonLeaf { left, right, .. } = curr_node.as_ref().clone() {
-            if PUBLIC_VALUES_ADDRESS_SPACE_OFFSET & bit != 0 {
-                curr_node = right;
-                proof.push(left.hash());
-            } else {
-                curr_node = left;
-                proof.push(right.hash());
-            }
+    let mut cur_node_idx = 1; // root
+    let mut proof = Vec::with_capacity(memory_dimensions.addr_space_height + address_leading_zeros);
+    for i in 0..memory_dimensions.addr_space_height {
+        let bit = 1 << (memory_dimensions.addr_space_height - i - 1);
+        if (PUBLIC_VALUES_AS - ADDR_SPACE_OFFSET) & bit != 0 {
+            proof.push(tree.get_node(cur_node_idx * 2));
+            cur_node_idx = cur_node_idx * 2 + 1;
         } else {
-            unreachable!()
+            proof.push(tree.get_node(cur_node_idx * 2 + 1));
+            cur_node_idx *= 2;
         }
     }
     for _ in 0..address_leading_zeros {
-        if let MemoryNode::NonLeaf { left, right, .. } = curr_node.as_ref().clone() {
-            curr_node = left;
-            proof.push(right.hash());
-        } else {
-            unreachable!()
-        }
+        // always go left
+        proof.push(tree.get_node(cur_node_idx * 2 + 1));
+        cur_node_idx *= 2;
     }
     proof.reverse();
     proof
 }
 
-pub fn extract_public_values<F: PrimeField32>(
-    memory_dimensions: &MemoryDimensions,
-    num_public_values: usize,
-    final_memory: &MemoryImage<F>,
-) -> Vec<F> {
-    // All (addr, value) pairs in the public value address space.
-    let f_as_start = PUBLIC_VALUES_ADDRESS_SPACE_OFFSET + memory_dimensions.as_offset;
-    let f_as_end = PUBLIC_VALUES_ADDRESS_SPACE_OFFSET + memory_dimensions.as_offset + 1;
-
-    // This clones the entire memory. Ideally this should run in time proportional to
-    // the size of the PV address space, not entire memory.
-    let final_memory: BTreeMap<Address, F> = final_memory.items().collect();
-
-    let used_pvs: Vec<_> = final_memory
-        .range((f_as_start, 0)..(f_as_end, 0))
-        .map(|(&(_, pointer), &value)| (pointer as usize, value))
-        .collect();
-    if let Some(&last_pv) = used_pvs.last() {
-        assert!(
-            last_pv.0 < num_public_values || last_pv.1 == F::ZERO,
-            "Last public value is out of bounds"
+pub fn extract_public_values(num_public_values: usize, final_memory: &MemoryImage) -> Vec<u8> {
+    let mut public_values: Vec<u8> = {
+        assert_eq!(
+            final_memory.config[PUBLIC_VALUES_AS as usize].layout,
+            MemoryCellType::U8
         );
-    }
-    let mut public_values = F::zero_vec(num_public_values);
-    for (i, pv) in used_pvs {
-        if i < num_public_values {
-            public_values[i] = pv;
-        }
-    }
+        final_memory.mem[PUBLIC_VALUES_AS as usize]
+            .as_slice()
+            .to_vec()
+    };
+
+    assert!(
+        public_values.len() >= num_public_values,
+        "Public values address space has {} elements, but configuration has num_public_values={}",
+        public_values.len(),
+        num_public_values
+    );
+    public_values.truncate(num_public_values);
     public_values
 }
 
@@ -203,27 +191,32 @@ mod tests {
     use openvm_stark_backend::p3_field::FieldAlgebra;
     use openvm_stark_sdk::p3_baby_bear::BabyBear;
 
-    use super::{UserPublicValuesProof, PUBLIC_VALUES_ADDRESS_SPACE_OFFSET};
+    use super::UserPublicValuesProof;
     use crate::{
-        arch::{hasher::poseidon2::vm_poseidon2_hasher, SystemConfig},
-        system::memory::{paged_vec::AddressMap, tree::MemoryNode, CHUNK},
+        arch::{hasher::poseidon2::vm_poseidon2_hasher, MemoryConfig, SystemConfig},
+        system::memory::{
+            merkle::{public_values::PUBLIC_VALUES_AS, tree::MerkleTree},
+            online::GuestMemory,
+            AddressMap, CHUNK,
+        },
     };
 
     type F = BabyBear;
     #[test]
     fn test_public_value_happy_path() {
-        let mut vm_config = SystemConfig::default();
-        vm_config.memory_config.as_height = 4;
+        let mut vm_config = SystemConfig::default().without_continuations();
+        vm_config.memory_config.addr_space_height = 4;
         vm_config.memory_config.pointer_max_bits = 5;
         let memory_dimensions = vm_config.memory_config.memory_dimensions();
-        let pv_as = PUBLIC_VALUES_ADDRESS_SPACE_OFFSET + memory_dimensions.as_offset;
         let num_public_values = 16;
-        let memory = AddressMap::from_iter(
-            memory_dimensions.as_offset,
-            1 << memory_dimensions.as_height,
-            1 << memory_dimensions.address_height,
-            [((pv_as, 15), F::ONE)],
-        );
+        let mut addr_spaces_config = MemoryConfig::empty_address_space_configs(4);
+        addr_spaces_config[PUBLIC_VALUES_AS as usize].num_cells = num_public_values;
+        let mut memory = GuestMemory {
+            memory: AddressMap::new(addr_spaces_config),
+        };
+        unsafe {
+            memory.write::<u8, 4>(PUBLIC_VALUES_AS, 12, [0, 0, 0, 1]);
+        }
         let mut expected_pvs = F::zero_vec(num_public_values);
         expected_pvs[15] = F::ONE;
 
@@ -232,12 +225,13 @@ mod tests {
             memory_dimensions,
             num_public_values,
             &hasher,
-            &memory,
+            &memory.memory,
         );
         assert_eq!(pv_proof.public_values, expected_pvs);
-        let final_memory_root = MemoryNode::tree_from_memory(memory_dimensions, &memory, &hasher);
+        let final_memory_root =
+            MerkleTree::from_memory(&memory.memory, &memory_dimensions, &hasher).root();
         pv_proof
-            .verify(&hasher, memory_dimensions, final_memory_root.hash())
+            .verify(&hasher, memory_dimensions, final_memory_root)
             .unwrap();
     }
 }
diff --git a/crates/vm/src/system/memory/merkle/tests/mod.rs b/crates/vm/src/system/memory/merkle/tests/mod.rs
index 05c966dc23..09d996393e 100644
--- a/crates/vm/src/system/memory/merkle/tests/mod.rs
+++ b/crates/vm/src/system/memory/merkle/tests/mod.rs
@@ -1,7 +1,7 @@
 use std::{
     array,
     borrow::BorrowMut,
-    collections::{BTreeSet, HashSet},
+    collections::{BTreeMap, BTreeSet, HashSet},
     sync::Arc,
 };
 
@@ -9,8 +9,7 @@ use openvm_stark_backend::{
     interaction::{PermutationCheckBus, PermutationInteractionType},
     p3_field::FieldAlgebra,
     p3_matrix::dense::RowMajorMatrix,
-    prover::types::AirProofInput,
-    Chip, ChipUsageGetter,
+    prover::types::AirProvingContext,
 };
 use openvm_stark_sdk::{
     config::baby_bear_poseidon2::BabyBearPoseidon2Engine,
@@ -20,84 +19,83 @@ use openvm_stark_sdk::{
 use rand::RngCore;
 
 use crate::{
-    arch::testing::{MEMORY_MERKLE_BUS, POSEIDON2_DIRECT_BUS},
+    arch::{
+        testing::{MEMORY_MERKLE_BUS, POSEIDON2_DIRECT_BUS},
+        AddressSpaceHostConfig, MemoryCellType, MemoryConfig, ADDR_SPACE_OFFSET,
+    },
     system::memory::{
         merkle::{
-            columns::MemoryMerkleCols, tests::util::HashTestChip, MemoryDimensions,
-            MemoryMerkleChip,
+            memory_to_vec_partition, tests::util::HashTestChip, MemoryDimensions, MemoryMerkleChip,
+            MemoryMerkleCols, MerkleTree,
         },
-        paged_vec::{AddressMap, PAGE_SIZE},
-        tree::MemoryNode,
-        Equipartition, MemoryImage,
+        online::{GuestMemory, LinearMemory},
+        AddressMap, MemoryImage,
     },
 };
 
 mod util;
 
-const DEFAULT_CHUNK: usize = 8;
+const CHUNK: usize = 8;
 const COMPRESSION_BUS: PermutationCheckBus = PermutationCheckBus::new(POSEIDON2_DIRECT_BUS);
+type F = BabyBear;
 
-fn test<const CHUNK: usize>(
+fn test(
     memory_dimensions: MemoryDimensions,
-    initial_memory: &MemoryImage<BabyBear>,
+    initial_memory: &MemoryImage,
     touched_labels: BTreeSet<(u32, u32)>,
-    final_memory: &MemoryImage<BabyBear>,
+    final_memory: &MemoryImage,
 ) {
     let MemoryDimensions {
-        as_height,
+        addr_space_height,
         address_height,
-        as_offset,
     } = memory_dimensions;
+
     let merkle_bus = PermutationCheckBus::new(MEMORY_MERKLE_BUS);
 
-    // checking validity of test data
-    for ((address_space, pointer), value) in final_memory.items() {
-        let label = pointer / CHUNK as u32;
-        assert!(address_space - as_offset < (1 << as_height));
-        assert!(pointer < ((CHUNK << address_height).div_ceil(PAGE_SIZE) * PAGE_SIZE) as u32);
-        if initial_memory.get(&(address_space, pointer)) != Some(&value) {
-            assert!(touched_labels.contains(&(address_space, label)));
-        }
-    }
-    for key in initial_memory.items().map(|(key, _)| key) {
-        assert!(final_memory.get(&key).is_some());
-    }
-    for &(address_space, label) in touched_labels.iter() {
-        let mut contains_some_key = false;
-        for i in 0..CHUNK {
-            if final_memory
-                .get(&(address_space, label * CHUNK as u32 + i as u32))
-                .is_some()
-            {
-                contains_some_key = true;
-                break;
+    for address_space in 0..final_memory.config.len() {
+        for pointer in 0..final_memory.mem[address_space].size() / 4 {
+            if unsafe {
+                initial_memory.get_f::<F>(address_space as u32, pointer as u32)
+                    != final_memory.get_f(address_space as u32, pointer as u32)
+            } {
+                let label = (pointer / CHUNK) as u32;
+                assert!(address_space - (ADDR_SPACE_OFFSET as usize) < (1 << addr_space_height));
+                assert!(pointer < (CHUNK << address_height));
+                assert!(touched_labels.contains(&(address_space as u32, label)));
             }
         }
-        assert!(contains_some_key);
     }
 
     let mut hash_test_chip = HashTestChip::new();
 
-    let initial_tree =
-        MemoryNode::tree_from_memory(memory_dimensions, initial_memory, &hash_test_chip);
     let final_tree_check =
-        MemoryNode::tree_from_memory(memory_dimensions, final_memory, &hash_test_chip);
+        MerkleTree::from_memory(final_memory, &memory_dimensions, &hash_test_chip);
 
     let mut chip =
         MemoryMerkleChip::<CHUNK, _>::new(memory_dimensions, merkle_bus, COMPRESSION_BUS);
-    for &(address_space, label) in touched_labels.iter() {
-        chip.touch_range(address_space, label * CHUNK as u32, CHUNK as u32);
-    }
+    let final_partition: BTreeMap<_, [F; CHUNK]> =
+        memory_to_vec_partition::<F, CHUNK>(final_memory, &memory_dimensions)
+            .into_iter()
+            .map(|(idx, values)| {
+                let address_space =
+                    (idx >> memory_dimensions.address_height) as u32 + ADDR_SPACE_OFFSET;
+                let label = (idx & ((1 << memory_dimensions.address_height) - 1)) as u32;
+                ((address_space, label * (CHUNK as u32)), values)
+            })
+            .collect();
+    let final_partition = final_partition
+        .into_iter()
+        .filter(|((address_space, pointer), _)| {
+            touched_labels.contains(&(*address_space, pointer / CHUNK as u32))
+        })
+        .collect();
+    chip.finalize(initial_memory, &final_partition, &hash_test_chip);
 
-    let final_partition = memory_to_partition(final_memory);
-    println!("trace height = {}", chip.current_trace_height());
-    chip.finalize(&initial_tree, &final_partition, &mut hash_test_chip);
     assert_eq!(
         chip.final_state.as_ref().unwrap().final_root,
-        final_tree_check.hash()
+        final_tree_check.root()
     );
-    let chip_air = chip.air();
-    let chip_api = chip.generate_air_proof_input();
+    let chip_api = chip.generate_proving_ctx();
 
     let dummy_interaction_air = DummyInteractionAir::new(4 + CHUNK, true, merkle_bus.index);
     let mut dummy_interaction_trace_rows = vec![];
@@ -126,13 +124,12 @@ fn test<const CHUNK: usize>(
     };
 
     for (address_space, address_label) in touched_labels {
-        let initial_values = array::from_fn(|i| {
-            initial_memory
-                .get(&(address_space, address_label * CHUNK as u32 + i as u32))
-                .copied()
-                .unwrap_or_default()
-        });
-        let as_label = address_space - as_offset;
+        let initial_values = unsafe {
+            array::from_fn(|i| {
+                initial_memory.get((address_space, address_label * CHUNK as u32 + i as u32))
+            })
+        };
+        let as_label = address_space - ADDR_SPACE_OFFSET;
         interaction(
             PermutationInteractionType::Send,
             false,
@@ -142,7 +139,7 @@ fn test<const CHUNK: usize>(
             initial_values,
         );
         let final_values = *final_partition
-            .get(&(address_space, address_label))
+            .get(&(address_space, address_label * (CHUNK as u32)))
             .unwrap();
         interaction(
             PermutationInteractionType::Send,
@@ -163,38 +160,24 @@ fn test<const CHUNK: usize>(
         dummy_interaction_trace_rows,
         dummy_interaction_air.field_width() + 1,
     );
-    let dummy_interaction_api = AirProofInput::simple_no_pis(dummy_interaction_trace);
+    let dummy_interaction_api = AirProvingContext::simple_no_pis(Arc::new(dummy_interaction_trace));
 
     BabyBearPoseidon2Engine::run_test_fast(
         vec![
-            chip_air,
+            Arc::new(chip.air),
             Arc::new(dummy_interaction_air),
             Arc::new(hash_test_chip.air()),
         ],
         vec![
             chip_api,
             dummy_interaction_api,
-            hash_test_chip.generate_air_proof_input(),
+            hash_test_chip.generate_proving_ctx(),
         ],
     )
     .expect("Verification failed");
 }
 
-fn memory_to_partition<F: Default + Copy, const N: usize>(
-    memory: &MemoryImage<F>,
-) -> Equipartition<F, N> {
-    let mut memory_partition = Equipartition::new();
-    for ((address_space, pointer), value) in memory.items() {
-        let label = (address_space, pointer / N as u32);
-        let chunk = memory_partition
-            .entry(label)
-            .or_insert_with(|| [F::default(); N]);
-        chunk[(pointer % N as u32) as usize] = value;
-    }
-    memory_partition
-}
-
-fn random_test<const CHUNK: usize>(
+fn random_test(
     height: usize,
     max_value: u32,
     mut num_initial_addresses: usize,
@@ -203,8 +186,34 @@ fn random_test<const CHUNK: usize>(
     let mut rng = create_seeded_rng();
     let mut next_u32 = || rng.next_u64() as u32;
 
-    let mut initial_memory = AddressMap::new(1, 2, CHUNK << height);
-    let mut final_memory = AddressMap::new(1, 2, CHUNK << height);
+    let mem_config = MemoryConfig::new(
+        1,
+        vec![
+            AddressSpaceHostConfig {
+                num_cells: 0,
+                min_block_size: 0,
+                layout: MemoryCellType::Null,
+            },
+            AddressSpaceHostConfig {
+                num_cells: CHUNK << height,
+                min_block_size: 1,
+                layout: MemoryCellType::Native { size: 4 },
+            },
+            AddressSpaceHostConfig {
+                num_cells: CHUNK << height,
+                min_block_size: 1,
+                layout: MemoryCellType::Native { size: 4 },
+            },
+        ],
+        height + 3,
+        20,
+        17,
+        32,
+    );
+
+    let mut initial_memory = GuestMemory::new(AddressMap::from_mem_config(&mem_config));
+    let mut final_memory = GuestMemory::new(AddressMap::from_mem_config(&mem_config));
+
     let mut seen = HashSet::new();
     let mut touched_labels = BTreeSet::new();
 
@@ -221,132 +230,155 @@ fn random_test<const CHUNK: usize>(
             if is_initial && num_initial_addresses != 0 {
                 num_initial_addresses -= 1;
                 let value = BabyBear::from_canonical_u32(next_u32() % max_value);
-                initial_memory.insert(&(address_space, pointer), value);
-                final_memory.insert(&(address_space, pointer), value);
+                unsafe {
+                    initial_memory.write(address_space, pointer, [value]);
+                    final_memory.write(address_space, pointer, [value]);
+                }
             }
             if is_touched && num_touched_addresses != 0 {
                 num_touched_addresses -= 1;
                 touched_labels.insert((address_space, label));
                 if value_changes || !is_initial {
                     let value = BabyBear::from_canonical_u32(next_u32() % max_value);
-                    final_memory.insert(&(address_space, pointer), value);
+                    unsafe {
+                        final_memory.write(address_space, pointer, [value]);
+                    }
                 }
             }
         }
     }
 
-    test::<CHUNK>(
+    test(
         MemoryDimensions {
-            as_height: 1,
+            addr_space_height: 1,
             address_height: height,
-            as_offset: 1,
         },
-        &initial_memory,
+        &initial_memory.memory,
         touched_labels,
-        &final_memory,
+        &final_memory.memory,
     );
 }
 
 #[test]
 fn expand_test_0() {
-    random_test::<DEFAULT_CHUNK>(2, 3000, 2, 3);
+    random_test(2, 3000, 2, 3);
 }
 
 #[test]
 fn expand_test_1() {
-    random_test::<DEFAULT_CHUNK>(10, 3000, 400, 30);
+    random_test(10, 3000, 400, 30);
 }
 
 #[test]
 fn expand_test_2() {
-    random_test::<DEFAULT_CHUNK>(3, 3000, 3, 2);
+    random_test(3, 3000, 3, 2);
 }
 
 #[test]
 fn expand_test_no_accesses() {
-    let memory_dimensions = MemoryDimensions {
-        as_height: 2,
-        address_height: 1,
-        as_offset: 7,
-    };
     let mut hash_test_chip = HashTestChip::new();
+    let height = 1;
 
-    let memory = AddressMap::new(
-        memory_dimensions.as_offset,
-        1 << memory_dimensions.as_height,
-        1 << memory_dimensions.address_height,
-    );
-    let tree = MemoryNode::<DEFAULT_CHUNK, _>::tree_from_memory(
-        memory_dimensions,
-        &memory,
-        &hash_test_chip,
+    let mem_config = MemoryConfig::new(
+        1,
+        vec![
+            AddressSpaceHostConfig {
+                num_cells: 0,
+                min_block_size: 0,
+                layout: MemoryCellType::Null,
+            },
+            AddressSpaceHostConfig {
+                num_cells: CHUNK << height,
+                min_block_size: 1,
+                layout: MemoryCellType::Native { size: 4 },
+            },
+            AddressSpaceHostConfig {
+                num_cells: CHUNK << height,
+                min_block_size: 1,
+                layout: MemoryCellType::Native { size: 4 },
+            },
+        ],
+        height + 3,
+        20,
+        17,
+        32,
     );
+    let md = mem_config.memory_dimensions();
 
-    let mut chip: MemoryMerkleChip<DEFAULT_CHUNK, _> = MemoryMerkleChip::new(
-        memory_dimensions,
+    let memory = AddressMap::from_mem_config(&mem_config);
+
+    let mut chip: MemoryMerkleChip<CHUNK, _> = MemoryMerkleChip::new(
+        md,
         PermutationCheckBus::new(MEMORY_MERKLE_BUS),
         COMPRESSION_BUS,
     );
 
-    let partition = memory_to_partition(&memory);
-    chip.finalize(&tree, &partition, &mut hash_test_chip);
+    chip.finalize(&memory, &BTreeMap::new(), &hash_test_chip);
+    let trace = chip.generate_proving_ctx();
     BabyBearPoseidon2Engine::run_test_fast(
-        vec![chip.air(), Arc::new(hash_test_chip.air())],
-        vec![
-            chip.generate_air_proof_input(),
-            hash_test_chip.generate_air_proof_input(),
-        ],
+        vec![Arc::new(chip.air), Arc::new(hash_test_chip.air())],
+        vec![trace, hash_test_chip.generate_proving_ctx()],
     )
-    .expect("This should occur");
+    .expect("Empty touched memory doesn't work");
 }
 
 #[test]
 #[should_panic]
 fn expand_test_negative() {
-    let memory_dimensions = MemoryDimensions {
-        as_height: 2,
-        address_height: 1,
-        as_offset: 7,
-    };
-
     let mut hash_test_chip = HashTestChip::new();
+    let height = 1;
 
-    let memory = AddressMap::new(
-        memory_dimensions.as_offset,
-        1 << memory_dimensions.as_height,
-        1 << memory_dimensions.address_height,
-    );
-    let tree = MemoryNode::<DEFAULT_CHUNK, _>::tree_from_memory(
-        memory_dimensions,
-        &memory,
-        &hash_test_chip,
+    let mem_config = MemoryConfig::new(
+        1,
+        vec![
+            AddressSpaceHostConfig {
+                num_cells: 0,
+                min_block_size: 0,
+                layout: MemoryCellType::Null,
+            },
+            AddressSpaceHostConfig {
+                num_cells: CHUNK << height,
+                min_block_size: 1,
+                layout: MemoryCellType::Native { size: 4 },
+            },
+            AddressSpaceHostConfig {
+                num_cells: CHUNK << height,
+                min_block_size: 1,
+                layout: MemoryCellType::Native { size: 4 },
+            },
+        ],
+        height + 3,
+        20,
+        17,
+        32,
     );
+    let md = mem_config.memory_dimensions();
+
+    let memory = AddressMap::from_mem_config(&mem_config);
 
-    let mut chip = MemoryMerkleChip::<DEFAULT_CHUNK, _>::new(
-        memory_dimensions,
+    let mut chip: MemoryMerkleChip<CHUNK, _> = MemoryMerkleChip::new(
+        md,
         PermutationCheckBus::new(MEMORY_MERKLE_BUS),
         COMPRESSION_BUS,
     );
 
-    let partition = memory_to_partition(&memory);
-    chip.finalize(&tree, &partition, &mut hash_test_chip);
-    let air = chip.air();
-    let mut chip_api = chip.generate_air_proof_input();
+    chip.finalize(&memory, &BTreeMap::new(), &hash_test_chip);
+    let mut chip_ctx = chip.generate_proving_ctx();
     {
-        let trace = chip_api.raw.common_main.as_mut().unwrap();
+        let mut trace = (*chip_ctx.clone().common_main.unwrap()).clone();
         for row in trace.rows_mut() {
-            let row: &mut MemoryMerkleCols<_, DEFAULT_CHUNK> = row.borrow_mut();
+            let row: &mut MemoryMerkleCols<_, CHUNK> = row.borrow_mut();
             if row.expand_direction == BabyBear::NEG_ONE {
                 row.left_direction_different = BabyBear::ZERO;
                 row.right_direction_different = BabyBear::ZERO;
             }
         }
+        chip_ctx.common_main.replace(Arc::new(trace));
     }
 
-    let hash_air = Arc::new(hash_test_chip.air());
     BabyBearPoseidon2Engine::run_test_fast(
-        vec![air, hash_air],
-        vec![chip_api, hash_test_chip.generate_air_proof_input()],
+        vec![Arc::new(chip.air), Arc::new(hash_test_chip.air())],
+        vec![chip_ctx, hash_test_chip.generate_proving_ctx()],
     )
-    .expect("This should occur");
+    .expect("We tinkered with the trace and now it doesn't pass");
 }
diff --git a/crates/vm/src/system/memory/merkle/tests/util.rs b/crates/vm/src/system/memory/merkle/tests/util.rs
index c838fa06db..d976979d6b 100644
--- a/crates/vm/src/system/memory/merkle/tests/util.rs
+++ b/crates/vm/src/system/memory/merkle/tests/util.rs
@@ -1,4 +1,7 @@
-use std::array::from_fn;
+use std::{
+    array::from_fn,
+    sync::{Arc, Mutex},
+};
 
 use openvm_stark_backend::{
     config::{Domain, StarkGenericConfig},
@@ -6,7 +9,7 @@ use openvm_stark_backend::{
     p3_commit::PolynomialSpace,
     p3_field::Field,
     p3_matrix::dense::RowMajorMatrix,
-    prover::types::AirProofInput,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
 };
 use openvm_stark_sdk::dummy_airs::interaction::dummy_interaction_air::DummyInteractionAir;
 
@@ -23,12 +26,14 @@ pub fn test_hash_sum<const CHUNK: usize, F: Field>(
 }
 
 pub struct HashTestChip<const CHUNK: usize, F> {
-    requests: Vec<[[F; CHUNK]; 3]>,
+    requests: Mutex<Vec<[[F; CHUNK]; 3]>>,
 }
 
 impl<const CHUNK: usize, F: Field> HashTestChip<CHUNK, F> {
     pub fn new() -> Self {
-        Self { requests: vec![] }
+        Self {
+            requests: Mutex::new(vec![]),
+        }
     }
 
     pub fn air(&self) -> DummyInteractionAir {
@@ -37,7 +42,8 @@ impl<const CHUNK: usize, F: Field> HashTestChip<CHUNK, F> {
 
     pub fn trace(&self) -> RowMajorMatrix<F> {
         let mut rows = vec![];
-        for request in self.requests.iter() {
+        let requests = self.requests.lock().expect("mutex poisoned");
+        for request in requests.iter() {
             rows.push(F::ONE);
             rows.extend(request.iter().flatten());
         }
@@ -47,11 +53,12 @@ impl<const CHUNK: usize, F: Field> HashTestChip<CHUNK, F> {
         }
         RowMajorMatrix::new(rows, width)
     }
-    pub fn generate_air_proof_input<SC: StarkGenericConfig>(&self) -> AirProofInput<SC>
+    pub fn generate_proving_ctx<SC>(&mut self) -> AirProvingContext<CpuBackend<SC>>
     where
+        SC: StarkGenericConfig,
         Domain<SC>: PolynomialSpace<Val = F>,
     {
-        AirProofInput::simple_no_pis(self.trace())
+        AirProvingContext::simple_no_pis(Arc::new(self.trace()))
     }
 }
 
@@ -60,10 +67,12 @@ impl<const CHUNK: usize, F: Field> Hasher<CHUNK, F> for HashTestChip<CHUNK, F> {
         test_hash_sum(*left, *right)
     }
 }
+
 impl<const CHUNK: usize, F: Field> HasherChip<CHUNK, F> for HashTestChip<CHUNK, F> {
-    fn compress_and_record(&mut self, left: &[F; CHUNK], right: &[F; CHUNK]) -> [F; CHUNK] {
+    fn compress_and_record(&self, left: &[F; CHUNK], right: &[F; CHUNK]) -> [F; CHUNK] {
         let result = test_hash_sum(*left, *right);
-        self.requests.push([*left, *right, result]);
+        let mut requests = self.requests.lock().expect("mutex poisoned");
+        requests.push([*left, *right, result]);
         result
     }
 }
diff --git a/crates/vm/src/system/memory/merkle/trace.rs b/crates/vm/src/system/memory/merkle/trace.rs
index 52609f259a..f6135e014d 100644
--- a/crates/vm/src/system/memory/merkle/trace.rs
+++ b/crates/vm/src/system/memory/merkle/trace.rs
@@ -1,26 +1,24 @@
 use std::{
     borrow::BorrowMut,
-    cmp::Reverse,
     sync::{atomic::AtomicU32, Arc},
 };
 
 use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
-    p3_field::{FieldAlgebra, PrimeField32},
+    config::{Domain, StarkGenericConfig, Val},
+    p3_commit::PolynomialSpace,
+    p3_field::PrimeField32,
     p3_matrix::dense::RowMajorMatrix,
-    prover::types::AirProofInput,
-    AirRef, Chip, ChipUsageGetter,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
+    ChipUsageGetter,
 };
-use rustc_hash::FxHashSet;
+use tracing::instrument;
 
 use crate::{
     arch::hasher::HasherChip,
     system::{
         memory::{
-            controller::dimensions::MemoryDimensions,
-            merkle::{FinalState, MemoryMerkleChip, MemoryMerkleCols},
-            tree::MemoryNode::{self, NonLeaf},
-            Equipartition,
+            merkle::{tree::MerkleTree, FinalState, MemoryMerkleChip, MemoryMerkleCols},
+            Equipartition, MemoryImage,
         },
         poseidon2::{
             Poseidon2PeripheryBaseChip, Poseidon2PeripheryChip, PERIPHERY_POSEIDON2_WIDTH,
@@ -29,51 +27,28 @@ use crate::{
 };
 
 impl<const CHUNK: usize, F: PrimeField32> MemoryMerkleChip<CHUNK, F> {
-    pub fn finalize(
+    #[instrument(name = "merkle_finalize", level = "debug", skip_all)]
+    pub(crate) fn finalize(
         &mut self,
-        initial_tree: &MemoryNode<CHUNK, F>,
+        initial_memory: &MemoryImage,
         final_memory: &Equipartition<F, CHUNK>,
-        hasher: &mut impl HasherChip<CHUNK, F>,
+        hasher: &impl HasherChip<CHUNK, F>,
     ) {
         assert!(self.final_state.is_none(), "Merkle chip already finalized");
-        // there needs to be a touched node with `height_section` = 0
-        // shouldn't be a leaf because
-        // trace generation will expect an interaction from MemoryInterfaceChip in that case
-        if self.touched_nodes.len() == 1 {
-            self.touch_node(1, 0, 0);
-        }
-
-        let mut rows = vec![];
-        let mut tree_helper = TreeHelper {
-            memory_dimensions: self.air.memory_dimensions,
-            final_memory,
-            touched_nodes: &self.touched_nodes,
-            trace_rows: &mut rows,
-        };
-        let final_tree = tree_helper.recur(
-            self.air.memory_dimensions.overall_height(),
-            initial_tree,
-            0,
-            0,
-            hasher,
-        );
-        self.final_state = Some(FinalState {
-            rows,
-            init_root: initial_tree.hash(),
-            final_root: final_tree.hash(),
-        });
+        let mut tree = MerkleTree::from_memory(initial_memory, &self.air.memory_dimensions, hasher);
+        self.final_state = Some(tree.finalize(hasher, final_memory, &self.air.memory_dimensions));
     }
 }
 
-impl<const CHUNK: usize, SC: StarkGenericConfig> Chip<SC> for MemoryMerkleChip<CHUNK, Val<SC>>
+impl<const CHUNK: usize, F> MemoryMerkleChip<CHUNK, F>
 where
-    Val<SC>: PrimeField32,
+    F: PrimeField32,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    pub fn generate_proving_ctx<SC>(&mut self) -> AirProvingContext<CpuBackend<SC>>
+    where
+        SC: StarkGenericConfig,
+        Domain<SC>: PolynomialSpace<Val = F>,
+    {
         assert!(
             self.final_state.is_some(),
             "Merkle chip must finalize before trace generation"
@@ -82,11 +57,16 @@ where
             mut rows,
             init_root,
             final_root,
-        } = self.final_state.unwrap();
+        } = self.final_state.take().unwrap();
         // important that this sort be stable,
         // because we need the initial root to be first and the final root to be second
-        rows.sort_by_key(|row| Reverse(row.parent_height));
+        rows.reverse();
+        rows.swap(0, 1);
 
+        #[cfg(feature = "metrics")]
+        {
+            self.current_height = rows.len();
+        }
         let width = MemoryMerkleCols::<Val<SC>, CHUNK>::width();
         let mut height = rows.len().next_power_of_two();
         if let Some(mut oh) = self.overridden_height {
@@ -103,9 +83,9 @@ where
             *trace_row.borrow_mut() = row;
         }
 
-        let trace = RowMajorMatrix::new(trace, width);
+        let trace = Arc::new(RowMajorMatrix::new(trace, width));
         let pvs = init_root.into_iter().chain(final_root).collect();
-        AirProofInput::simple(trace, pvs)
+        AirProvingContext::simple(trace, pvs)
     }
 }
 impl<const CHUNK: usize, F: PrimeField32> ChipUsageGetter for MemoryMerkleChip<CHUNK, F> {
@@ -114,7 +94,7 @@ impl<const CHUNK: usize, F: PrimeField32> ChipUsageGetter for MemoryMerkleChip<C
     }
 
     fn current_trace_height(&self) -> usize {
-        2 * self.num_touched_nonleaves
+        self.final_state.as_ref().map(|s| s.rows.len()).unwrap_or(0)
     }
 
     fn trace_width(&self) -> usize {
@@ -122,138 +102,8 @@ impl<const CHUNK: usize, F: PrimeField32> ChipUsageGetter for MemoryMerkleChip<C
     }
 }
 
-struct TreeHelper<'a, const CHUNK: usize, F: PrimeField32> {
-    memory_dimensions: MemoryDimensions,
-    final_memory: &'a Equipartition<F, CHUNK>,
-    touched_nodes: &'a FxHashSet<(usize, u32, u32)>,
-    trace_rows: &'a mut Vec<MemoryMerkleCols<F, CHUNK>>,
-}
-
-impl<const CHUNK: usize, F: PrimeField32> TreeHelper<'_, CHUNK, F> {
-    fn recur(
-        &mut self,
-        height: usize,
-        initial_node: &MemoryNode<CHUNK, F>,
-        as_label: u32,
-        address_label: u32,
-        hasher: &mut impl HasherChip<CHUNK, F>,
-    ) -> MemoryNode<CHUNK, F> {
-        if height == 0 {
-            let address_space = as_label + self.memory_dimensions.as_offset;
-            let leaf_values = *self
-                .final_memory
-                .get(&(address_space, address_label))
-                .unwrap_or(&[F::ZERO; CHUNK]);
-            MemoryNode::new_leaf(hasher.hash(&leaf_values))
-        } else if let NonLeaf {
-            left: initial_left_node,
-            right: initial_right_node,
-            ..
-        } = initial_node.clone()
-        {
-            // Tell the hasher about this hash.
-            hasher.compress_and_record(&initial_left_node.hash(), &initial_right_node.hash());
-
-            let is_as_section = height > self.memory_dimensions.address_height;
-
-            let (left_as_label, right_as_label) = if is_as_section {
-                (2 * as_label, 2 * as_label + 1)
-            } else {
-                (as_label, as_label)
-            };
-            let (left_address_label, right_address_label) = if is_as_section {
-                (address_label, address_label)
-            } else {
-                (2 * address_label, 2 * address_label + 1)
-            };
-
-            let left_is_final =
-                !self
-                    .touched_nodes
-                    .contains(&(height - 1, left_as_label, left_address_label));
-
-            let final_left_node = if left_is_final {
-                initial_left_node
-            } else {
-                Arc::new(self.recur(
-                    height - 1,
-                    &initial_left_node,
-                    left_as_label,
-                    left_address_label,
-                    hasher,
-                ))
-            };
-
-            let right_is_final =
-                !self
-                    .touched_nodes
-                    .contains(&(height - 1, right_as_label, right_address_label));
-
-            let final_right_node = if right_is_final {
-                initial_right_node
-            } else {
-                Arc::new(self.recur(
-                    height - 1,
-                    &initial_right_node,
-                    right_as_label,
-                    right_address_label,
-                    hasher,
-                ))
-            };
-
-            let final_node = MemoryNode::new_nonleaf(final_left_node, final_right_node, hasher);
-            self.add_trace_row(height, as_label, address_label, initial_node, None);
-            self.add_trace_row(
-                height,
-                as_label,
-                address_label,
-                &final_node,
-                Some([left_is_final, right_is_final]),
-            );
-            final_node
-        } else {
-            panic!("Leaf {:?} found at nonzero height {}", initial_node, height);
-        }
-    }
-
-    /// Expects `node` to be NonLeaf
-    fn add_trace_row(
-        &mut self,
-        parent_height: usize,
-        as_label: u32,
-        address_label: u32,
-        node: &MemoryNode<CHUNK, F>,
-        direction_changes: Option<[bool; 2]>,
-    ) {
-        let [left_direction_change, right_direction_change] =
-            direction_changes.unwrap_or([false; 2]);
-        let cols = if let NonLeaf { hash, left, right } = node {
-            MemoryMerkleCols {
-                expand_direction: if direction_changes.is_none() {
-                    F::ONE
-                } else {
-                    F::NEG_ONE
-                },
-                height_section: F::from_bool(parent_height > self.memory_dimensions.address_height),
-                parent_height: F::from_canonical_usize(parent_height),
-                is_root: F::from_bool(parent_height == self.memory_dimensions.overall_height()),
-                parent_as_label: F::from_canonical_u32(as_label),
-                parent_address_label: F::from_canonical_u32(address_label),
-                parent_hash: *hash,
-                left_child_hash: left.hash(),
-                right_child_hash: right.hash(),
-                left_direction_different: F::from_bool(left_direction_change),
-                right_direction_different: F::from_bool(right_direction_change),
-            }
-        } else {
-            panic!("trace_rows expects node = {:?} to be NonLeaf", node);
-        };
-        self.trace_rows.push(cols);
-    }
-}
-
 pub trait SerialReceiver<T> {
-    fn receive(&mut self, msg: T);
+    fn receive(&self, msg: T);
 }
 
 impl<'a, F: PrimeField32, const SBOX_REGISTERS: usize> SerialReceiver<&'a [F]>
@@ -261,7 +111,7 @@ impl<'a, F: PrimeField32, const SBOX_REGISTERS: usize> SerialReceiver<&'a [F]>
 {
     /// Receives a permutation preimage, pads with zeros to the permutation width, and records.
     /// The permutation preimage must have length at most the permutation width (panics otherwise).
-    fn receive(&mut self, perm_preimage: &'a [F]) {
+    fn receive(&self, perm_preimage: &'a [F]) {
         assert!(perm_preimage.len() <= PERIPHERY_POSEIDON2_WIDTH);
         let mut state = [F::ZERO; PERIPHERY_POSEIDON2_WIDTH];
         state[..perm_preimage.len()].copy_from_slice(perm_preimage);
@@ -271,7 +121,7 @@ impl<'a, F: PrimeField32, const SBOX_REGISTERS: usize> SerialReceiver<&'a [F]>
 }
 
 impl<'a, F: PrimeField32> SerialReceiver<&'a [F]> for Poseidon2PeripheryChip<F> {
-    fn receive(&mut self, perm_preimage: &'a [F]) {
+    fn receive(&self, perm_preimage: &'a [F]) {
         match self {
             Poseidon2PeripheryChip::Register0(chip) => chip.receive(perm_preimage),
             Poseidon2PeripheryChip::Register1(chip) => chip.receive(perm_preimage),
diff --git a/crates/vm/src/system/memory/merkle/tree.rs b/crates/vm/src/system/memory/merkle/tree.rs
new file mode 100644
index 0000000000..956908abf2
--- /dev/null
+++ b/crates/vm/src/system/memory/merkle/tree.rs
@@ -0,0 +1,267 @@
+use openvm_stark_backend::{
+    p3_field::PrimeField32,
+    p3_maybe_rayon::prelude::{IntoParallelIterator, IntoParallelRefIterator, ParallelIterator},
+};
+use rustc_hash::FxHashMap;
+
+use super::{FinalState, MemoryMerkleCols};
+use crate::{
+    arch::hasher::{Hasher, HasherChip},
+    system::memory::{
+        dimensions::MemoryDimensions, merkle::memory_to_vec_partition, AddressMap, Equipartition,
+    },
+};
+
+#[derive(Debug)]
+pub struct MerkleTree<F, const CHUNK: usize> {
+    /// Height of the tree -- the root is the only node at height `height`,
+    /// and the leaves are at height `0`.
+    height: usize,
+    /// Nodes corresponding to all zeroes.
+    zero_nodes: Vec<[F; CHUNK]>,
+    /// Nodes in the tree that have ever been touched.
+    nodes: FxHashMap<u64, [F; CHUNK]>,
+}
+
+impl<F: PrimeField32, const CHUNK: usize> MerkleTree<F, CHUNK> {
+    pub fn new(height: usize, hasher: &impl Hasher<CHUNK, F>) -> Self {
+        Self {
+            height,
+            zero_nodes: (0..height + 1)
+                .scan(hasher.hash(&[F::ZERO; CHUNK]), |acc, _| {
+                    let result = Some(*acc);
+                    *acc = hasher.compress(acc, acc);
+                    result
+                })
+                .collect(),
+            nodes: FxHashMap::default(),
+        }
+    }
+
+    pub fn root(&self) -> [F; CHUNK] {
+        self.get_node(1)
+    }
+
+    pub fn get_node(&self, index: u64) -> [F; CHUNK] {
+        self.nodes
+            .get(&index)
+            .cloned()
+            .unwrap_or(self.zero_nodes[self.height - index.ilog2() as usize])
+    }
+
+    #[allow(clippy::type_complexity)]
+    /// Shared logic for both from_memory and finalize.
+    fn process_layers<CompressFn>(
+        &mut self,
+        layer: Vec<(u64, [F; CHUNK])>,
+        md: &MemoryDimensions,
+        mut rows: Option<&mut Vec<MemoryMerkleCols<F, CHUNK>>>,
+        compress: CompressFn,
+    ) where
+        CompressFn: Fn(&[F; CHUNK], &[F; CHUNK]) -> [F; CHUNK] + Send + Sync,
+    {
+        let mut new_entries = layer;
+        let mut layer = new_entries
+            .par_iter()
+            .map(|(index, values)| {
+                let old_values = self.nodes.get(index).unwrap_or(&self.zero_nodes[0]);
+                (*index, *values, *old_values)
+            })
+            .collect::<Vec<_>>();
+        for height in 1..=self.height {
+            let new_layer = layer
+                .iter()
+                .enumerate()
+                .filter_map(|(i, (index, values, old_values))| {
+                    if i > 0 && layer[i - 1].0 ^ 1 == *index {
+                        return None;
+                    }
+
+                    let par_index = index >> 1;
+
+                    if i + 1 < layer.len() && layer[i + 1].0 == index ^ 1 {
+                        let (_, sibling_values, sibling_old_values) = &layer[i + 1];
+                        Some((
+                            par_index,
+                            Some((values, old_values)),
+                            Some((sibling_values, sibling_old_values)),
+                        ))
+                    } else if index & 1 == 0 {
+                        Some((par_index, Some((values, old_values)), None))
+                    } else {
+                        Some((par_index, None, Some((values, old_values))))
+                    }
+                })
+                .collect::<Vec<_>>();
+
+            match rows {
+                None => {
+                    layer = new_layer
+                        .into_par_iter()
+                        .map(|(par_index, left, right)| {
+                            let left = if let Some(left) = left {
+                                left.0
+                            } else {
+                                &self.get_node(2 * par_index)
+                            };
+                            let right = if let Some(right) = right {
+                                right.0
+                            } else {
+                                &self.get_node(2 * par_index + 1)
+                            };
+                            let combined = compress(left, right);
+                            let par_old_values = self.get_node(par_index);
+                            (par_index, combined, par_old_values)
+                        })
+                        .collect();
+                }
+                Some(ref mut rows) => {
+                    let label_section_height = md.address_height.saturating_sub(height);
+                    let (tmp, new_rows): (Vec<(u64, [F; CHUNK], [F; CHUNK])>, Vec<[_; 2]>) =
+                        new_layer
+                            .into_par_iter()
+                            .map(|(par_index, left, right)| {
+                                let parent_address_label =
+                                    (par_index & ((1 << label_section_height) - 1)) as u32;
+                                let parent_as_label = ((par_index & !(1 << (self.height - height)))
+                                    >> label_section_height)
+                                    as u32;
+                                let left_node;
+                                let (left, old_left, changed_left) = match left {
+                                    Some((left, old_left)) => (left, old_left, true),
+                                    None => {
+                                        left_node = self.get_node(2 * par_index);
+                                        (&left_node, &left_node, false)
+                                    }
+                                };
+                                let right_node;
+                                let (right, old_right, changed_right) = match right {
+                                    Some((right, old_right)) => (right, old_right, true),
+                                    None => {
+                                        right_node = self.get_node(2 * par_index + 1);
+                                        (&right_node, &right_node, false)
+                                    }
+                                };
+                                let combined = compress(left, right);
+                                // This is a hacky way to say:
+                                // "and we also want to record the old values"
+                                compress(old_left, old_right);
+                                let par_old_values = self.get_node(par_index);
+                                (
+                                    (par_index, combined, par_old_values),
+                                    [
+                                        MemoryMerkleCols {
+                                            expand_direction: F::ONE,
+                                            height_section: F::from_bool(
+                                                height > md.address_height,
+                                            ),
+                                            parent_height: F::from_canonical_usize(height),
+                                            is_root: F::from_bool(height == md.overall_height()),
+                                            parent_as_label: F::from_canonical_u32(parent_as_label),
+                                            parent_address_label: F::from_canonical_u32(
+                                                parent_address_label,
+                                            ),
+                                            parent_hash: par_old_values,
+                                            left_child_hash: *old_left,
+                                            right_child_hash: *old_right,
+                                            left_direction_different: F::ZERO,
+                                            right_direction_different: F::ZERO,
+                                        },
+                                        MemoryMerkleCols {
+                                            expand_direction: F::NEG_ONE,
+                                            height_section: F::from_bool(
+                                                height > md.address_height,
+                                            ),
+                                            parent_height: F::from_canonical_usize(height),
+                                            is_root: F::from_bool(height == md.overall_height()),
+                                            parent_as_label: F::from_canonical_u32(parent_as_label),
+                                            parent_address_label: F::from_canonical_u32(
+                                                parent_address_label,
+                                            ),
+                                            parent_hash: combined,
+                                            left_child_hash: *left,
+                                            right_child_hash: *right,
+                                            left_direction_different: F::from_bool(!changed_left),
+                                            right_direction_different: F::from_bool(!changed_right),
+                                        },
+                                    ],
+                                )
+                            })
+                            .unzip();
+                    rows.extend(new_rows.into_iter().flatten());
+                    layer = tmp;
+                }
+            }
+            new_entries.extend(layer.iter().map(|(idx, values, _)| (*idx, *values)));
+        }
+
+        if self.nodes.is_empty() {
+            // This, for example, should happen in every `from_memory` call
+            self.nodes = FxHashMap::from_iter(new_entries);
+        } else {
+            self.nodes.extend(new_entries);
+        }
+    }
+
+    pub fn from_memory(
+        memory: &AddressMap,
+        md: &MemoryDimensions,
+        hasher: &(impl Hasher<CHUNK, F> + Sync),
+    ) -> Self {
+        let mut tree = Self::new(md.overall_height(), hasher);
+        let layer: Vec<_> = memory_to_vec_partition(memory, md)
+            .par_iter()
+            .map(|(idx, v)| ((1 << tree.height) + idx, hasher.hash(v)))
+            .collect();
+        tree.process_layers(layer, md, None, |left, right| hasher.compress(left, right));
+        tree
+    }
+
+    pub fn finalize(
+        &mut self,
+        hasher: &impl HasherChip<CHUNK, F>,
+        touched: &Equipartition<F, CHUNK>,
+        md: &MemoryDimensions,
+    ) -> FinalState<CHUNK, F> {
+        let init_root = self.get_node(1);
+        let layer: Vec<_> = if !touched.is_empty() {
+            touched
+                .iter()
+                .map(|((addr_sp, ptr), v)| {
+                    (
+                        (1 << self.height) + md.label_to_index((*addr_sp, *ptr / CHUNK as u32)),
+                        hasher.hash(v),
+                    )
+                })
+                .collect()
+        } else {
+            let index = 1 << self.height;
+            vec![(index, self.get_node(index))]
+        };
+        let mut rows = Vec::with_capacity(if layer.is_empty() {
+            0
+        } else {
+            layer
+                .iter()
+                .zip(layer.iter().skip(1))
+                .fold(md.overall_height(), |acc, ((lhs, _), (rhs, _))| {
+                    acc + (lhs ^ rhs).ilog2() as usize
+                })
+        });
+        self.process_layers(layer, md, Some(&mut rows), |left, right| {
+            hasher.compress_and_record(left, right)
+        });
+        if touched.is_empty() {
+            // If we made an artificial touch, we need to change the direction changes for the
+            // leaves
+            rows[1].left_direction_different = F::ONE;
+            rows[1].right_direction_different = F::ONE;
+        }
+        let final_root = self.get_node(1);
+        FinalState {
+            rows,
+            init_root,
+            final_root,
+        }
+    }
+}
diff --git a/crates/vm/src/system/memory/mod.rs b/crates/vm/src/system/memory/mod.rs
index ac6a7d85cf..411e7a5473 100644
--- a/crates/vm/src/system/memory/mod.rs
+++ b/crates/vm/src/system/memory/mod.rs
@@ -1,21 +1,40 @@
+use std::sync::Arc;
+
+use openvm_circuit_primitives::{is_less_than::IsLtSubAir, var_range::VariableRangeCheckerBus};
 use openvm_circuit_primitives_derive::AlignedBorrow;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    interaction::PermutationCheckBus,
+    p3_field::Field,
+    p3_util::{log2_ceil_usize, log2_strict_usize},
+    AirRef,
+};
 
-mod adapter;
+pub mod adapter;
 mod controller;
 pub mod merkle;
-mod offline;
 pub mod offline_checker;
 pub mod online;
-pub mod paged_vec;
-mod persistent;
+pub mod persistent;
 #[cfg(test)]
 mod tests;
-pub mod tree;
-mod volatile;
+pub mod volatile;
 
 pub use controller::*;
-pub use offline::*;
-pub use paged_vec::*;
+pub use online::{Address, AddressMap, INITIAL_TIMESTAMP};
+
+use crate::{
+    arch::{MemoryConfig, ADDR_SPACE_OFFSET},
+    system::memory::{
+        adapter::AccessAdapterAir, dimensions::MemoryDimensions, interface::MemoryInterfaceAirs,
+        merkle::MemoryMerkleAir, offline_checker::MemoryBridge, persistent::PersistentBoundaryAir,
+        volatile::VolatileBoundaryAir,
+    },
+};
+
+// @dev Currently this is only used for debug assertions, but we may switch to making it constant
+// and removing from MemoryConfig
+pub const POINTER_MAX_BITS: usize = 29;
 
 #[derive(PartialEq, Copy, Clone, Debug, Eq)]
 pub enum OpType {
@@ -52,9 +71,95 @@ impl<S, T> MemoryAddress<S, T> {
     }
 }
 
-#[derive(Clone, Copy, Debug, PartialEq, Eq, AlignedBorrow)]
-#[repr(C)]
-pub struct HeapAddress<S, T> {
-    pub address: MemoryAddress<S, T>,
-    pub data: MemoryAddress<S, T>,
+#[derive(Clone)]
+pub struct MemoryAirInventory<SC: StarkGenericConfig> {
+    pub bridge: MemoryBridge,
+    pub interface: MemoryInterfaceAirs,
+    pub access_adapters: Vec<AirRef<SC>>,
+}
+
+impl<SC: StarkGenericConfig> MemoryAirInventory<SC> {
+    pub fn new(
+        bridge: MemoryBridge,
+        mem_config: &MemoryConfig,
+        range_bus: VariableRangeCheckerBus,
+        merkle_compression_buses: Option<(PermutationCheckBus, PermutationCheckBus)>,
+    ) -> Self {
+        let memory_bus = bridge.memory_bus();
+        let interface = if let Some((merkle_bus, compression_bus)) = merkle_compression_buses {
+            // Persistent memory
+            let memory_dims = MemoryDimensions {
+                addr_space_height: mem_config.addr_space_height,
+                address_height: mem_config.pointer_max_bits - log2_strict_usize(CHUNK),
+            };
+            let boundary = PersistentBoundaryAir::<CHUNK> {
+                memory_dims,
+                memory_bus,
+                merkle_bus,
+                compression_bus,
+            };
+            let merkle = MemoryMerkleAir::<CHUNK> {
+                memory_dimensions: memory_dims,
+                merkle_bus,
+                compression_bus,
+            };
+            MemoryInterfaceAirs::Persistent { boundary, merkle }
+        } else {
+            // Volatile memory
+            let addr_space_height = mem_config.addr_space_height;
+            assert!(addr_space_height < Val::<SC>::bits() - 2);
+            let addr_space_max_bits =
+                log2_ceil_usize((ADDR_SPACE_OFFSET + 2u32.pow(addr_space_height as u32)) as usize);
+            let boundary = VolatileBoundaryAir::new(
+                memory_bus,
+                addr_space_max_bits,
+                mem_config.pointer_max_bits,
+                range_bus,
+            );
+            MemoryInterfaceAirs::Volatile { boundary }
+        };
+        // Memory access adapters
+        let lt_air = IsLtSubAir::new(range_bus, mem_config.timestamp_max_bits);
+        let maan = mem_config.max_access_adapter_n;
+        assert!(matches!(maan, 2 | 4 | 8 | 16 | 32));
+        let access_adapters: Vec<AirRef<SC>> = [
+            Arc::new(AccessAdapterAir::<2> { memory_bus, lt_air }) as AirRef<SC>,
+            Arc::new(AccessAdapterAir::<4> { memory_bus, lt_air }) as AirRef<SC>,
+            Arc::new(AccessAdapterAir::<8> { memory_bus, lt_air }) as AirRef<SC>,
+            Arc::new(AccessAdapterAir::<16> { memory_bus, lt_air }) as AirRef<SC>,
+            Arc::new(AccessAdapterAir::<32> { memory_bus, lt_air }) as AirRef<SC>,
+        ]
+        .into_iter()
+        .take(log2_strict_usize(maan))
+        .collect();
+
+        Self {
+            bridge,
+            interface,
+            access_adapters,
+        }
+    }
+
+    /// The order of memory AIRs is boundary, merkle (if exists), access adapters
+    pub fn into_airs(self) -> Vec<AirRef<SC>> {
+        let mut airs: Vec<AirRef<SC>> = Vec::new();
+        match self.interface {
+            MemoryInterfaceAirs::Volatile { boundary } => {
+                airs.push(Arc::new(boundary));
+            }
+            MemoryInterfaceAirs::Persistent { boundary, merkle } => {
+                airs.push(Arc::new(boundary));
+                airs.push(Arc::new(merkle));
+            }
+        }
+        airs.extend(self.access_adapters);
+        airs
+    }
+}
+
+/// This is O(1) and returns the length of
+/// [`MemoryAirInventory::into_airs`].
+pub fn num_memory_airs(is_persistent: bool, max_access_adapter_n: usize) -> usize {
+    // boundary + { merkle if is_persistent } + access_adapters
+    1 + usize::from(is_persistent) + log2_strict_usize(max_access_adapter_n)
 }
diff --git a/crates/vm/src/system/memory/offline.rs b/crates/vm/src/system/memory/offline.rs
deleted file mode 100644
index 74bb238811..0000000000
--- a/crates/vm/src/system/memory/offline.rs
+++ /dev/null
@@ -1,1070 +0,0 @@
-use std::{array, cmp::max};
-
-use openvm_circuit_primitives::{
-    assert_less_than::AssertLtSubAir, var_range::SharedVariableRangeCheckerChip,
-};
-use openvm_stark_backend::p3_field::PrimeField32;
-use rustc_hash::FxHashSet;
-
-use super::{AddressMap, PagedVec, PAGE_SIZE};
-use crate::{
-    arch::MemoryConfig,
-    system::memory::{
-        adapter::{AccessAdapterInventory, AccessAdapterRecord, AccessAdapterRecordKind},
-        offline_checker::{MemoryBridge, MemoryBus},
-        MemoryAuxColsFactory, MemoryImage, RecordId, TimestampedEquipartition, TimestampedValues,
-    },
-};
-
-pub const INITIAL_TIMESTAMP: u32 = 0;
-
-#[repr(C)]
-#[derive(Clone, Default, PartialEq, Eq, Debug)]
-struct BlockData {
-    pointer: u32,
-    timestamp: u32,
-    size: usize,
-}
-
-struct BlockMap {
-    /// Block ids. 0 is a special value standing for the default block.
-    id: AddressMap<usize, PAGE_SIZE>,
-    /// The place where non-default blocks are stored.
-    storage: Vec<BlockData>,
-    initial_block_size: usize,
-}
-
-impl BlockMap {
-    pub fn from_mem_config(mem_config: &MemoryConfig, initial_block_size: usize) -> Self {
-        assert!(initial_block_size.is_power_of_two());
-        Self {
-            id: AddressMap::from_mem_config(mem_config),
-            storage: vec![],
-            initial_block_size,
-        }
-    }
-
-    fn initial_block_data(pointer: u32, initial_block_size: usize) -> BlockData {
-        let aligned_pointer = (pointer / initial_block_size as u32) * initial_block_size as u32;
-        BlockData {
-            pointer: aligned_pointer,
-            size: initial_block_size,
-            timestamp: INITIAL_TIMESTAMP,
-        }
-    }
-
-    pub fn get_without_adding(&self, address: &(u32, u32)) -> BlockData {
-        let idx = self.id.get(address).unwrap_or(&0);
-        if idx == &0 {
-            Self::initial_block_data(address.1, self.initial_block_size)
-        } else {
-            self.storage[idx - 1].clone()
-        }
-    }
-
-    pub fn get(&mut self, address: &(u32, u32)) -> &BlockData {
-        let (address_space, pointer) = *address;
-        let idx = self.id.get(&(address_space, pointer)).unwrap_or(&0);
-        if idx == &0 {
-            // `initial_block_size` is a power of two, as asserted in `from_mem_config`.
-            let pointer = pointer & !(self.initial_block_size as u32 - 1);
-            self.set_range(
-                &(address_space, pointer),
-                self.initial_block_size,
-                Self::initial_block_data(pointer, self.initial_block_size),
-            );
-            self.storage.last().unwrap()
-        } else {
-            &self.storage[idx - 1]
-        }
-    }
-
-    pub fn get_mut(&mut self, address: &(u32, u32)) -> &mut BlockData {
-        let (address_space, pointer) = *address;
-        let idx = self.id.get(&(address_space, pointer)).unwrap_or(&0);
-        if idx == &0 {
-            let pointer = pointer - pointer % self.initial_block_size as u32;
-            self.set_range(
-                &(address_space, pointer),
-                self.initial_block_size,
-                Self::initial_block_data(pointer, self.initial_block_size),
-            );
-            self.storage.last_mut().unwrap()
-        } else {
-            &mut self.storage[idx - 1]
-        }
-    }
-
-    pub fn set_range(&mut self, address: &(u32, u32), len: usize, block: BlockData) {
-        let (address_space, pointer) = address;
-        self.storage.push(block);
-        for i in 0..len {
-            self.id
-                .insert(&(*address_space, pointer + i as u32), self.storage.len());
-        }
-    }
-
-    pub fn items(&self) -> impl Iterator<Item = ((u32, u32), &BlockData)> + '_ {
-        self.id
-            .items()
-            .filter(|(_, idx)| *idx > 0)
-            .map(|(address, idx)| (address, &self.storage[idx - 1]))
-    }
-}
-
-#[derive(Debug, Clone, PartialEq)]
-pub struct MemoryRecord<T> {
-    pub address_space: T,
-    pub pointer: T,
-    pub timestamp: u32,
-    pub prev_timestamp: u32,
-    data: Vec<T>,
-    /// None if a read.
-    prev_data: Option<Vec<T>>,
-}
-
-impl<T> MemoryRecord<T> {
-    pub fn data_slice(&self) -> &[T] {
-        self.data.as_slice()
-    }
-
-    pub fn prev_data_slice(&self) -> Option<&[T]> {
-        self.prev_data.as_deref()
-    }
-}
-
-impl<T: Copy> MemoryRecord<T> {
-    pub fn data_at(&self, index: usize) -> T {
-        self.data[index]
-    }
-}
-
-pub struct OfflineMemory<F> {
-    block_data: BlockMap,
-    data: Vec<PagedVec<F, PAGE_SIZE>>,
-    as_offset: u32,
-    timestamp: u32,
-    timestamp_max_bits: usize,
-
-    memory_bus: MemoryBus,
-    range_checker: SharedVariableRangeCheckerChip,
-
-    log: Vec<Option<MemoryRecord<F>>>,
-}
-
-impl<F: PrimeField32> OfflineMemory<F> {
-    /// Creates a new partition with the given initial block size.
-    ///
-    /// Panics if the initial block size is not a power of two.
-    pub fn new(
-        initial_memory: MemoryImage<F>,
-        initial_block_size: usize,
-        memory_bus: MemoryBus,
-        range_checker: SharedVariableRangeCheckerChip,
-        config: MemoryConfig,
-    ) -> Self {
-        assert_eq!(initial_memory.as_offset, config.as_offset);
-        Self {
-            block_data: BlockMap::from_mem_config(&config, initial_block_size),
-            data: initial_memory.paged_vecs,
-            as_offset: config.as_offset,
-            timestamp: INITIAL_TIMESTAMP + 1,
-            timestamp_max_bits: config.clk_max_bits,
-            memory_bus,
-            range_checker,
-            log: vec![],
-        }
-    }
-
-    pub fn set_initial_memory(&mut self, initial_memory: MemoryImage<F>, config: MemoryConfig) {
-        assert_eq!(self.timestamp, INITIAL_TIMESTAMP + 1);
-        assert_eq!(initial_memory.as_offset, config.as_offset);
-        self.as_offset = config.as_offset;
-        self.data = initial_memory.paged_vecs;
-    }
-
-    pub(super) fn set_log_capacity(&mut self, access_capacity: usize) {
-        assert!(self.log.is_empty());
-        self.log = Vec::with_capacity(access_capacity);
-    }
-
-    pub fn memory_bridge(&self) -> MemoryBridge {
-        MemoryBridge::new(
-            self.memory_bus,
-            self.timestamp_max_bits,
-            self.range_checker.bus(),
-        )
-    }
-
-    pub fn timestamp(&self) -> u32 {
-        self.timestamp
-    }
-
-    /// Increments the current timestamp by one and returns the new value.
-    pub fn increment_timestamp(&mut self) {
-        self.increment_timestamp_by(1)
-    }
-
-    /// Increments the current timestamp by a specified delta and returns the new value.
-    pub fn increment_timestamp_by(&mut self, delta: u32) {
-        self.log.push(None);
-        self.timestamp += delta;
-    }
-
-    /// Writes an array of values to the memory at the specified address space and start index.
-    pub fn write(
-        &mut self,
-        address_space: u32,
-        pointer: u32,
-        values: Vec<F>,
-        records: &mut AccessAdapterInventory<F>,
-    ) {
-        let len = values.len();
-        assert!(len.is_power_of_two());
-        assert_ne!(address_space, 0);
-
-        let prev_timestamp = self.access_updating_timestamp(address_space, pointer, len, records);
-
-        debug_assert!(prev_timestamp < self.timestamp);
-
-        let pointer = pointer as usize;
-        let prev_data = self.data[(address_space - self.as_offset) as usize]
-            .set_range(pointer..pointer + len, &values);
-
-        let record = MemoryRecord {
-            address_space: F::from_canonical_u32(address_space),
-            pointer: F::from_canonical_usize(pointer),
-            timestamp: self.timestamp,
-            prev_timestamp,
-            data: values,
-            prev_data: Some(prev_data),
-        };
-        self.log.push(Some(record));
-        self.timestamp += 1;
-    }
-
-    /// Reads an array of values from the memory at the specified address space and start index.
-    pub fn read(
-        &mut self,
-        address_space: u32,
-        pointer: u32,
-        len: usize,
-        adapter_records: &mut AccessAdapterInventory<F>,
-    ) {
-        assert!(len.is_power_of_two());
-        if address_space == 0 {
-            let pointer = F::from_canonical_u32(pointer);
-            self.log.push(Some(MemoryRecord {
-                address_space: F::ZERO,
-                pointer,
-                timestamp: self.timestamp,
-                prev_timestamp: 0,
-                data: vec![pointer],
-                prev_data: None,
-            }));
-            self.timestamp += 1;
-            return;
-        }
-
-        let prev_timestamp =
-            self.access_updating_timestamp(address_space, pointer, len, adapter_records);
-
-        debug_assert!(prev_timestamp < self.timestamp);
-
-        let values = self.range_vec(address_space, pointer, len);
-
-        self.log.push(Some(MemoryRecord {
-            address_space: F::from_canonical_u32(address_space),
-            pointer: F::from_canonical_u32(pointer),
-            timestamp: self.timestamp,
-            prev_timestamp,
-            data: values,
-            prev_data: None,
-        }));
-        self.timestamp += 1;
-    }
-
-    pub fn record_by_id(&self, id: RecordId) -> &MemoryRecord<F> {
-        self.log[id.0].as_ref().unwrap()
-    }
-
-    pub fn finalize<const N: usize>(
-        &mut self,
-        adapter_records: &mut AccessAdapterInventory<F>,
-    ) -> TimestampedEquipartition<F, N> {
-        // First make sure the partition we maintain in self.block_data is an equipartition.
-        // Grab all aligned pointers that need to be re-accessed.
-        let to_access: FxHashSet<_> = self
-            .block_data
-            .items()
-            .map(|((address_space, pointer), _)| (address_space, (pointer / N as u32) * N as u32))
-            .collect();
-
-        for &(address_space, pointer) in to_access.iter() {
-            let block = self.block_data.get(&(address_space, pointer));
-            if block.pointer != pointer || block.size != N {
-                self.access(address_space, pointer, N, adapter_records);
-            }
-        }
-
-        let mut equipartition = TimestampedEquipartition::<F, N>::new();
-        for (address_space, pointer) in to_access {
-            let block = self.block_data.get(&(address_space, pointer));
-
-            debug_assert_eq!(block.pointer % N as u32, 0);
-            debug_assert_eq!(block.size, N);
-
-            equipartition.insert(
-                (address_space, pointer / N as u32),
-                TimestampedValues {
-                    timestamp: block.timestamp,
-                    values: self.range_array::<N>(address_space, pointer),
-                },
-            );
-        }
-        equipartition
-    }
-
-    // Modifies the partition to ensure that there is a block starting at (address_space, query).
-    fn split_to_make_boundary(
-        &mut self,
-        address_space: u32,
-        query: u32,
-        records: &mut AccessAdapterInventory<F>,
-    ) {
-        let lim = (self.data[(address_space - self.as_offset) as usize].memory_size()) as u32;
-        if query == lim {
-            return;
-        }
-        assert!(query < lim);
-        let original_block = self.block_containing(address_space, query);
-        if original_block.pointer == query {
-            return;
-        }
-
-        let data = self.range_vec(address_space, original_block.pointer, original_block.size);
-
-        let timestamp = original_block.timestamp;
-
-        let mut cur_ptr = original_block.pointer;
-        let mut cur_size = original_block.size;
-        while cur_size > 0 {
-            // Split.
-            records.add_record(AccessAdapterRecord {
-                timestamp,
-                address_space: F::from_canonical_u32(address_space),
-                start_index: F::from_canonical_u32(cur_ptr),
-                data: data[(cur_ptr - original_block.pointer) as usize
-                    ..(cur_ptr - original_block.pointer) as usize + cur_size]
-                    .to_vec(),
-                kind: AccessAdapterRecordKind::Split,
-            });
-
-            let half_size = cur_size / 2;
-            let half_size_u32 = half_size as u32;
-            let mid_ptr = cur_ptr + half_size_u32;
-
-            if query <= mid_ptr {
-                // The right is finalized; add it to the partition.
-                let block = BlockData {
-                    pointer: mid_ptr,
-                    size: half_size,
-                    timestamp,
-                };
-                self.block_data
-                    .set_range(&(address_space, mid_ptr), half_size, block);
-            }
-            if query >= cur_ptr + half_size_u32 {
-                // The left is finalized; add it to the partition.
-                let block = BlockData {
-                    pointer: cur_ptr,
-                    size: half_size,
-                    timestamp,
-                };
-                self.block_data
-                    .set_range(&(address_space, cur_ptr), half_size, block);
-            }
-            if mid_ptr <= query {
-                cur_ptr = mid_ptr;
-            }
-            if cur_ptr == query {
-                break;
-            }
-            cur_size = half_size;
-        }
-    }
-
-    fn access_updating_timestamp(
-        &mut self,
-        address_space: u32,
-        pointer: u32,
-        size: usize,
-        records: &mut AccessAdapterInventory<F>,
-    ) -> u32 {
-        self.access(address_space, pointer, size, records);
-
-        let mut prev_timestamp = None;
-
-        let mut i = 0;
-        while i < size as u32 {
-            let block = self.block_data.get_mut(&(address_space, pointer + i));
-            debug_assert!(i == 0 || prev_timestamp == Some(block.timestamp));
-            prev_timestamp = Some(block.timestamp);
-            block.timestamp = self.timestamp;
-            i = block.pointer + block.size as u32;
-        }
-        prev_timestamp.unwrap()
-    }
-
-    fn access(
-        &mut self,
-        address_space: u32,
-        pointer: u32,
-        size: usize,
-        records: &mut AccessAdapterInventory<F>,
-    ) {
-        self.split_to_make_boundary(address_space, pointer, records);
-        self.split_to_make_boundary(address_space, pointer + size as u32, records);
-
-        let block_data = self.block_containing(address_space, pointer);
-
-        if block_data.pointer == pointer && block_data.size == size {
-            return;
-        }
-        assert!(size > 1);
-
-        // Now recursively access left and right blocks to ensure they are in the partition.
-        let half_size = size / 2;
-        self.access(address_space, pointer, half_size, records);
-        self.access(
-            address_space,
-            pointer + half_size as u32,
-            half_size,
-            records,
-        );
-
-        self.merge_block_with_next(address_space, pointer, records);
-    }
-
-    /// Merges the two adjacent blocks starting at (address_space, pointer).
-    ///
-    /// Panics if there is no block starting at (address_space, pointer) or if the two blocks
-    /// do not have the same size.
-    fn merge_block_with_next(
-        &mut self,
-        address_space: u32,
-        pointer: u32,
-        records: &mut AccessAdapterInventory<F>,
-    ) {
-        let left_block = self.block_data.get(&(address_space, pointer));
-
-        let left_timestamp = left_block.timestamp;
-        let size = left_block.size;
-
-        let right_timestamp = self
-            .block_data
-            .get(&(address_space, pointer + size as u32))
-            .timestamp;
-
-        let timestamp = max(left_timestamp, right_timestamp);
-        self.block_data.set_range(
-            &(address_space, pointer),
-            2 * size,
-            BlockData {
-                pointer,
-                size: 2 * size,
-                timestamp,
-            },
-        );
-        records.add_record(AccessAdapterRecord {
-            timestamp,
-            address_space: F::from_canonical_u32(address_space),
-            start_index: F::from_canonical_u32(pointer),
-            data: self.range_vec(address_space, pointer, 2 * size),
-            kind: AccessAdapterRecordKind::Merge {
-                left_timestamp,
-                right_timestamp,
-            },
-        });
-    }
-
-    fn block_containing(&mut self, address_space: u32, pointer: u32) -> BlockData {
-        self.block_data
-            .get_without_adding(&(address_space, pointer))
-    }
-
-    pub fn get(&self, address_space: u32, pointer: u32) -> F {
-        self.data[(address_space - self.as_offset) as usize]
-            .get(pointer as usize)
-            .cloned()
-            .unwrap_or_default()
-    }
-
-    fn range_array<const N: usize>(&self, address_space: u32, pointer: u32) -> [F; N] {
-        array::from_fn(|i| self.get(address_space, pointer + i as u32))
-    }
-
-    fn range_vec(&self, address_space: u32, pointer: u32, len: usize) -> Vec<F> {
-        let pointer = pointer as usize;
-        self.data[(address_space - self.as_offset) as usize].range_vec(pointer..pointer + len)
-    }
-
-    pub fn aux_cols_factory(&self) -> MemoryAuxColsFactory<F> {
-        let range_bus = self.range_checker.bus();
-        MemoryAuxColsFactory {
-            range_checker: self.range_checker.clone(),
-            timestamp_lt_air: AssertLtSubAir::new(range_bus, self.timestamp_max_bits),
-            _marker: Default::default(),
-        }
-    }
-
-    // just for unit testing
-    #[cfg(test)]
-    fn last_record(&self) -> &MemoryRecord<F> {
-        self.log.last().unwrap().as_ref().unwrap()
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use openvm_circuit_primitives::var_range::{
-        SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-    };
-    use openvm_stark_backend::p3_field::FieldAlgebra;
-    use openvm_stark_sdk::p3_baby_bear::BabyBear;
-
-    use super::{BlockData, MemoryRecord, OfflineMemory};
-    use crate::{
-        arch::MemoryConfig,
-        system::memory::{
-            adapter::{AccessAdapterInventory, AccessAdapterRecord, AccessAdapterRecordKind},
-            offline_checker::MemoryBus,
-            paged_vec::AddressMap,
-            MemoryImage, TimestampedValues,
-        },
-    };
-
-    macro_rules! bb {
-        ($x:expr) => {
-            BabyBear::from_canonical_u32($x)
-        };
-    }
-
-    macro_rules! bba {
-        [$($x:expr),*] => {
-            [$(BabyBear::from_canonical_u32($x)),*]
-        }
-    }
-
-    macro_rules! bbvec {
-        [$($x:expr),*] => {
-            vec![$(BabyBear::from_canonical_u32($x)),*]
-        }
-    }
-
-    fn setup_test(
-        initial_memory: MemoryImage<BabyBear>,
-        initial_block_size: usize,
-    ) -> (OfflineMemory<BabyBear>, AccessAdapterInventory<BabyBear>) {
-        let memory_bus = MemoryBus::new(0);
-        let range_checker =
-            SharedVariableRangeCheckerChip::new(VariableRangeCheckerBus::new(1, 29));
-        let mem_config = MemoryConfig {
-            as_offset: initial_memory.as_offset,
-            ..Default::default()
-        };
-        let memory = OfflineMemory::new(
-            initial_memory,
-            initial_block_size,
-            memory_bus,
-            range_checker.clone(),
-            mem_config,
-        );
-        let access_adapter_inventory = AccessAdapterInventory::new(
-            range_checker,
-            memory_bus,
-            mem_config.clk_max_bits,
-            mem_config.max_access_adapter_n,
-        );
-        (memory, access_adapter_inventory)
-    }
-
-    #[test]
-    fn test_partition() {
-        let initial_memory = AddressMap::new(0, 1, 16);
-        let (mut memory, _) = setup_test(initial_memory, 8);
-        assert_eq!(
-            memory.block_containing(1, 13),
-            BlockData {
-                pointer: 8,
-                size: 8,
-                timestamp: 0,
-            }
-        );
-
-        assert_eq!(
-            memory.block_containing(1, 8),
-            BlockData {
-                pointer: 8,
-                size: 8,
-                timestamp: 0,
-            }
-        );
-
-        assert_eq!(
-            memory.block_containing(1, 15),
-            BlockData {
-                pointer: 8,
-                size: 8,
-                timestamp: 0,
-            }
-        );
-
-        assert_eq!(
-            memory.block_containing(1, 16),
-            BlockData {
-                pointer: 16,
-                size: 8,
-                timestamp: 0,
-            }
-        );
-    }
-
-    #[test]
-    fn test_write_read_initial_block_len_1() {
-        let (mut memory, mut access_adapters) = setup_test(MemoryImage::default(), 1);
-        let address_space = 1;
-
-        memory.write(address_space, 0, bbvec![1, 2, 3, 4], &mut access_adapters);
-
-        memory.read(address_space, 0, 2, &mut access_adapters);
-        let read_record = memory.last_record();
-        assert_eq!(read_record.data, bba![1, 2]);
-
-        memory.write(address_space, 2, bbvec![100], &mut access_adapters);
-
-        memory.read(address_space, 0, 4, &mut access_adapters);
-        let read_record = memory.last_record();
-        assert_eq!(read_record.data, bba![1, 2, 100, 4]);
-    }
-
-    #[test]
-    fn test_records_initial_block_len_1() {
-        let (mut memory, mut adapter_records) = setup_test(MemoryImage::default(), 1);
-
-        memory.write(1, 0, bbvec![1, 2, 3, 4], &mut adapter_records);
-
-        // Above write first causes merge of [0:1] and [1:2] into [0:2].
-        assert_eq!(
-            adapter_records.records_for_n(2)[0],
-            AccessAdapterRecord {
-                timestamp: 0,
-                address_space: bb!(1),
-                start_index: bb!(0),
-                data: bbvec![0, 0],
-                kind: AccessAdapterRecordKind::Merge {
-                    left_timestamp: 0,
-                    right_timestamp: 0,
-                },
-            }
-        );
-        // then merge [2:3] and [3:4] into [2:4].
-        assert_eq!(
-            adapter_records.records_for_n(2)[1],
-            AccessAdapterRecord {
-                timestamp: 0,
-                address_space: bb!(1),
-                start_index: bb!(2),
-                data: bbvec![0, 0],
-                kind: AccessAdapterRecordKind::Merge {
-                    left_timestamp: 0,
-                    right_timestamp: 0,
-                },
-            }
-        );
-        // then merge [0:2] and [2:4] into [0:4].
-        assert_eq!(
-            adapter_records.records_for_n(4)[0],
-            AccessAdapterRecord {
-                timestamp: 0,
-                address_space: bb!(1),
-                start_index: bb!(0),
-                data: bbvec![0, 0, 0, 0],
-                kind: AccessAdapterRecordKind::Merge {
-                    left_timestamp: 0,
-                    right_timestamp: 0,
-                },
-            }
-        );
-        // At time 1 we write [0:4].
-        let write_record = memory.last_record();
-        assert_eq!(
-            write_record,
-            &MemoryRecord {
-                address_space: bb!(1),
-                pointer: bb!(0),
-                timestamp: 1,
-                prev_timestamp: 0,
-                data: bbvec![1, 2, 3, 4],
-                prev_data: Some(bbvec![0, 0, 0, 0]),
-            }
-        );
-        assert_eq!(memory.timestamp(), 2);
-        assert_eq!(adapter_records.total_records(), 3);
-
-        memory.read(1, 0, 4, &mut adapter_records);
-        let read_record = memory.last_record();
-        // At time 2 we read [0:4].
-        assert_eq!(adapter_records.total_records(), 3);
-        assert_eq!(
-            read_record,
-            &MemoryRecord {
-                address_space: bb!(1),
-                pointer: bb!(0),
-                timestamp: 2,
-                prev_timestamp: 1,
-                data: bbvec![1, 2, 3, 4],
-                prev_data: None,
-            }
-        );
-        assert_eq!(memory.timestamp(), 3);
-
-        memory.write(1, 0, bbvec![10, 11], &mut adapter_records);
-        let write_record = memory.last_record();
-        // write causes split [0:4] into [0:2] and [2:4] (to prepare for write to [0:2]).
-        assert_eq!(adapter_records.total_records(), 4);
-        assert_eq!(
-            adapter_records.records_for_n(4).last().unwrap(),
-            &AccessAdapterRecord {
-                timestamp: 2,
-                address_space: bb!(1),
-                start_index: bb!(0),
-                data: bbvec![1, 2, 3, 4],
-                kind: AccessAdapterRecordKind::Split,
-            }
-        );
-
-        // At time 3 we write [10, 11] into [0, 2].
-        assert_eq!(
-            write_record,
-            &MemoryRecord {
-                address_space: bb!(1),
-                pointer: bb!(0),
-                timestamp: 3,
-                prev_timestamp: 2,
-                data: bbvec![10, 11],
-                prev_data: Some(bbvec![1, 2]),
-            }
-        );
-
-        memory.read(1, 0, 4, &mut adapter_records);
-        let read_record = memory.last_record();
-        assert_eq!(adapter_records.total_records(), 5);
-        assert_eq!(
-            adapter_records.records_for_n(4).last().unwrap(),
-            &AccessAdapterRecord {
-                timestamp: 3,
-                address_space: bb!(1),
-                start_index: bb!(0),
-                data: bbvec![10, 11, 3, 4],
-                kind: AccessAdapterRecordKind::Merge {
-                    left_timestamp: 3,
-                    right_timestamp: 2
-                },
-            }
-        );
-        // At time 9 we read [0:4].
-        assert_eq!(
-            read_record,
-            &MemoryRecord {
-                address_space: bb!(1),
-                pointer: bb!(0),
-                timestamp: 4,
-                prev_timestamp: 3,
-                data: bbvec![10, 11, 3, 4],
-                prev_data: None,
-            }
-        );
-    }
-
-    #[test]
-    fn test_records_initial_block_len_8() {
-        let (mut memory, mut adapter_records) = setup_test(MemoryImage::default(), 8);
-
-        memory.write(1, 0, bbvec![1, 2, 3, 4], &mut adapter_records);
-        let write_record = memory.last_record();
-
-        // Above write first causes split of [0:8] into [0:4] and [4:8].
-        assert_eq!(adapter_records.total_records(), 1);
-        assert_eq!(
-            adapter_records.records_for_n(8).last().unwrap(),
-            &AccessAdapterRecord {
-                timestamp: 0,
-                address_space: bb!(1),
-                start_index: bb!(0),
-                data: bbvec![0, 0, 0, 0, 0, 0, 0, 0],
-                kind: AccessAdapterRecordKind::Split,
-            }
-        );
-        // At time 1 we write [0:4].
-        assert_eq!(
-            write_record,
-            &MemoryRecord {
-                address_space: bb!(1),
-                pointer: bb!(0),
-                timestamp: 1,
-                prev_timestamp: 0,
-                data: bbvec![1, 2, 3, 4],
-                prev_data: Some(bbvec![0, 0, 0, 0]),
-            }
-        );
-        assert_eq!(memory.timestamp(), 2);
-
-        memory.read(1, 0, 4, &mut adapter_records);
-        let read_record = memory.last_record();
-        // At time 2 we read [0:4].
-        assert_eq!(adapter_records.total_records(), 1);
-        assert_eq!(
-            read_record,
-            &MemoryRecord {
-                address_space: bb!(1),
-                pointer: bb!(0),
-                timestamp: 2,
-                prev_timestamp: 1,
-                data: bbvec![1, 2, 3, 4],
-                prev_data: None,
-            }
-        );
-        assert_eq!(memory.timestamp(), 3);
-
-        memory.write(1, 0, bbvec![10, 11], &mut adapter_records);
-        let write_record = memory.last_record();
-        // write causes split [0:4] into [0:2] and [2:4] (to prepare for write to [0:2]).
-        assert_eq!(adapter_records.total_records(), 2);
-        assert_eq!(
-            adapter_records.records_for_n(4).last().unwrap(),
-            &AccessAdapterRecord {
-                timestamp: 2,
-                address_space: bb!(1),
-                start_index: bb!(0),
-                data: bbvec![1, 2, 3, 4],
-                kind: AccessAdapterRecordKind::Split,
-            }
-        );
-
-        // At time 3 we write [10, 11] into [0, 2].
-        assert_eq!(
-            write_record,
-            &MemoryRecord {
-                address_space: bb!(1),
-                pointer: bb!(0),
-                timestamp: 3,
-                prev_timestamp: 2,
-                data: bbvec![10, 11],
-                prev_data: Some(bbvec![1, 2]),
-            }
-        );
-
-        memory.read(1, 0, 4, &mut adapter_records);
-        let read_record = memory.last_record();
-        assert_eq!(adapter_records.total_records(), 3);
-        assert_eq!(
-            adapter_records.records_for_n(4).last().unwrap(),
-            &AccessAdapterRecord {
-                timestamp: 3,
-                address_space: bb!(1),
-                start_index: bb!(0),
-                data: bbvec![10, 11, 3, 4],
-                kind: AccessAdapterRecordKind::Merge {
-                    left_timestamp: 3,
-                    right_timestamp: 2
-                },
-            }
-        );
-        // At time 9 we read [0:4].
-        assert_eq!(
-            read_record,
-            &MemoryRecord {
-                address_space: bb!(1),
-                pointer: bb!(0),
-                timestamp: 4,
-                prev_timestamp: 3,
-                data: bbvec![10, 11, 3, 4],
-                prev_data: None,
-            }
-        );
-    }
-
-    #[test]
-    fn test_get_initial_block_len_1() {
-        let (mut memory, mut adapter_records) = setup_test(MemoryImage::default(), 1);
-
-        memory.write(2, 0, bbvec![4, 3, 2, 1], &mut adapter_records);
-
-        assert_eq!(memory.get(2, 0), BabyBear::from_canonical_u32(4));
-        assert_eq!(memory.get(2, 1), BabyBear::from_canonical_u32(3));
-        assert_eq!(memory.get(2, 2), BabyBear::from_canonical_u32(2));
-        assert_eq!(memory.get(2, 3), BabyBear::from_canonical_u32(1));
-        assert_eq!(memory.get(2, 5), BabyBear::ZERO);
-
-        assert_eq!(memory.get(1, 0), BabyBear::ZERO);
-    }
-
-    #[test]
-    fn test_get_initial_block_len_8() {
-        let (mut memory, mut adapter_records) = setup_test(MemoryImage::default(), 8);
-
-        memory.write(2, 0, bbvec![4, 3, 2, 1], &mut adapter_records);
-
-        assert_eq!(memory.get(2, 0), BabyBear::from_canonical_u32(4));
-        assert_eq!(memory.get(2, 1), BabyBear::from_canonical_u32(3));
-        assert_eq!(memory.get(2, 2), BabyBear::from_canonical_u32(2));
-        assert_eq!(memory.get(2, 3), BabyBear::from_canonical_u32(1));
-        assert_eq!(memory.get(2, 5), BabyBear::ZERO);
-        assert_eq!(memory.get(2, 9), BabyBear::ZERO);
-        assert_eq!(memory.get(1, 0), BabyBear::ZERO);
-    }
-
-    #[test]
-    fn test_finalize_empty() {
-        let (mut memory, mut adapter_records) = setup_test(MemoryImage::default(), 4);
-
-        let memory = memory.finalize::<4>(&mut adapter_records);
-        assert_eq!(memory.len(), 0);
-        assert_eq!(adapter_records.total_records(), 0);
-    }
-
-    #[test]
-    fn test_finalize_block_len_8() {
-        let (mut memory, mut adapter_records) = setup_test(MemoryImage::default(), 8);
-        // Make block 0:4 in address space 1 active.
-        memory.write(1, 0, bbvec![1, 2, 3, 4], &mut adapter_records);
-
-        // Make block 16:32 in address space 1 active.
-        memory.write(
-            1,
-            16,
-            bbvec![1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
-            &mut adapter_records,
-        );
-
-        // Make block 64:72 in address space 2 active.
-        memory.write(2, 64, bbvec![8, 7, 6, 5, 4, 3, 2, 1], &mut adapter_records);
-
-        let num_records_before_finalize = adapter_records.total_records();
-
-        // Finalize to a partition of size 8.
-        let final_memory = memory.finalize::<8>(&mut adapter_records);
-        assert_eq!(final_memory.len(), 4);
-        assert_eq!(
-            final_memory.get(&(1, 0)),
-            Some(&TimestampedValues {
-                values: bba![1, 2, 3, 4, 0, 0, 0, 0],
-                timestamp: 1,
-            })
-        );
-        // start_index = 16 corresponds to label = 2
-        assert_eq!(
-            final_memory.get(&(1, 2)),
-            Some(&TimestampedValues {
-                values: bba![1, 1, 1, 1, 1, 1, 1, 1],
-                timestamp: 2,
-            })
-        );
-        // start_index = 24 corresponds to label = 3
-        assert_eq!(
-            final_memory.get(&(1, 3)),
-            Some(&TimestampedValues {
-                values: bba![1, 1, 1, 1, 1, 1, 1, 1],
-                timestamp: 2,
-            })
-        );
-        // start_index = 64 corresponds to label = 8
-        assert_eq!(
-            final_memory.get(&(2, 8)),
-            Some(&TimestampedValues {
-                values: bba![8, 7, 6, 5, 4, 3, 2, 1],
-                timestamp: 3,
-            })
-        );
-
-        // We need to do 1 + 1 + 0 = 2 adapters.
-        assert_eq!(
-            adapter_records.total_records() - num_records_before_finalize,
-            2
-        );
-    }
-
-    #[test]
-    fn test_write_read_initial_block_len_8_initial_memory() {
-        type F = BabyBear;
-
-        // Initialize initial memory with blocks at indices 0 and 2
-        let mut initial_memory = MemoryImage::default();
-        for i in 0..8 {
-            initial_memory.insert(&(1, i), F::from_canonical_u32(i + 1));
-            initial_memory.insert(&(1, 16 + i), F::from_canonical_u32(i + 1));
-        }
-
-        let (mut memory, mut adapter_records) = setup_test(initial_memory, 8);
-
-        // Verify initial state of block 0 (pointers 0–8)
-        memory.read(1, 0, 8, &mut adapter_records);
-        let initial_read_record_0 = memory.last_record();
-        assert_eq!(initial_read_record_0.data, bbvec![1, 2, 3, 4, 5, 6, 7, 8]);
-
-        // Verify initial state of block 2 (pointers 16–24)
-        memory.read(1, 16, 8, &mut adapter_records);
-        let initial_read_record_2 = memory.last_record();
-        assert_eq!(initial_read_record_2.data, bbvec![1, 2, 3, 4, 5, 6, 7, 8]);
-
-        // Test: Write a partial block to block 0 (pointer 0) and read back partially and fully
-        memory.write(1, 0, bbvec![9, 9, 9, 9], &mut adapter_records);
-        memory.read(1, 0, 2, &mut adapter_records);
-        let partial_read_record = memory.last_record();
-        assert_eq!(partial_read_record.data, bbvec![9, 9]);
-
-        memory.read(1, 0, 8, &mut adapter_records);
-        let full_read_record_0 = memory.last_record();
-        assert_eq!(full_read_record_0.data, bbvec![9, 9, 9, 9, 5, 6, 7, 8]);
-
-        // Test: Write a single element to pointer 2 and verify read in different lengths
-        memory.write(1, 2, bbvec![100], &mut adapter_records);
-        memory.read(1, 1, 4, &mut adapter_records);
-        let read_record_4 = memory.last_record();
-        assert_eq!(read_record_4.data, bbvec![9, 100, 9, 5]);
-
-        memory.read(1, 2, 8, &mut adapter_records);
-        let full_read_record_2 = memory.last_record();
-        assert_eq!(full_read_record_2.data, bba![100, 9, 5, 6, 7, 8, 0, 0]);
-
-        // Test: Write and read at the last pointer in block 2 (pointer 23, part of key (1, 2))
-        memory.write(1, 23, bbvec![77], &mut adapter_records);
-        memory.read(1, 23, 2, &mut adapter_records);
-        let boundary_read_record = memory.last_record();
-        assert_eq!(boundary_read_record.data, bba![77, 0]); // Last byte modified, ensuring boundary check
-
-        // Test: Reading from an uninitialized block (should default to 0)
-        memory.read(1, 10, 4, &mut adapter_records);
-        let default_read_record = memory.last_record();
-        assert_eq!(default_read_record.data, bba![0, 0, 0, 0]);
-
-        memory.read(1, 100, 4, &mut adapter_records);
-        let default_read_record = memory.last_record();
-        assert_eq!(default_read_record.data, bba![0, 0, 0, 0]);
-
-        // Test: Overwrite entire memory pointer 16–24 and verify
-        memory.write(
-            1,
-            16,
-            bbvec![50, 50, 50, 50, 50, 50, 50, 50],
-            &mut adapter_records,
-        );
-        memory.read(1, 16, 8, &mut adapter_records);
-        let overwrite_read_record = memory.last_record();
-        assert_eq!(
-            overwrite_read_record.data,
-            bba![50, 50, 50, 50, 50, 50, 50, 50]
-        ); // Verify entire block overwrite
-    }
-}
diff --git a/crates/vm/src/system/memory/offline_checker/bridge.rs b/crates/vm/src/system/memory/offline_checker/bridge.rs
index 2c7e180cfb..367e1344d7 100644
--- a/crates/vm/src/system/memory/offline_checker/bridge.rs
+++ b/crates/vm/src/system/memory/offline_checker/bridge.rs
@@ -1,3 +1,4 @@
+use getset::CopyGetters;
 use openvm_circuit_primitives::{
     assert_less_than::{AssertLessThanIo, AssertLtSubAir},
     is_zero::{IsZeroIo, IsZeroSubAir},
@@ -19,9 +20,9 @@ use crate::system::memory::{
 
 /// AUX_LEN is the number of auxiliary columns (aka the number of limbs that the input numbers will
 /// be decomposed into) for the `AssertLtSubAir` in the `MemoryOfflineChecker`.
-/// Warning: This requires that (clk_max_bits + decomp - 1) / decomp = AUX_LEN
+/// Warning: This requires that (timestamp_max_bits + decomp - 1) / decomp = AUX_LEN
 ///         in MemoryOfflineChecker (or whenever AssertLtSubAir is used)
-pub(crate) const AUX_LEN: usize = 2;
+pub const AUX_LEN: usize = 2;
 
 /// The [MemoryBridge] is used within AIR evaluation functions to constrain logical memory
 /// operations (read/write). It adds all necessary constraints and interactions.
@@ -34,14 +35,22 @@ impl MemoryBridge {
     /// Create a new [MemoryBridge] with the provided offline_checker.
     pub fn new(
         memory_bus: MemoryBus,
-        clk_max_bits: usize,
+        timestamp_max_bits: usize,
         range_bus: VariableRangeCheckerBus,
     ) -> Self {
         Self {
-            offline_checker: MemoryOfflineChecker::new(memory_bus, clk_max_bits, range_bus),
+            offline_checker: MemoryOfflineChecker::new(memory_bus, timestamp_max_bits, range_bus),
         }
     }
 
+    pub fn memory_bus(&self) -> MemoryBus {
+        self.offline_checker.memory_bus
+    }
+
+    pub fn range_bus(&self) -> VariableRangeCheckerBus {
+        self.offline_checker.timestamp_lt_air.bus
+    }
+
     /// Prepare a logical memory read operation.
     #[must_use]
     pub fn read<'a, T, V, const N: usize>(
@@ -256,17 +265,23 @@ impl<T: FieldAlgebra, V: Copy + Into<T>, const N: usize> MemoryWriteOperation<'_
     }
 }
 
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, CopyGetters)]
 struct MemoryOfflineChecker {
+    #[get_copy = "pub"]
     memory_bus: MemoryBus,
+    #[get_copy = "pub"]
     timestamp_lt_air: AssertLtSubAir,
 }
 
 impl MemoryOfflineChecker {
-    fn new(memory_bus: MemoryBus, clk_max_bits: usize, range_bus: VariableRangeCheckerBus) -> Self {
+    fn new(
+        memory_bus: MemoryBus,
+        timestamp_max_bits: usize,
+        range_bus: VariableRangeCheckerBus,
+    ) -> Self {
         Self {
             memory_bus,
-            timestamp_lt_air: AssertLtSubAir::new(range_bus, clk_max_bits),
+            timestamp_lt_air: AssertLtSubAir::new(range_bus, timestamp_max_bits),
         }
     }
 
diff --git a/crates/vm/src/system/memory/offline_checker/columns.rs b/crates/vm/src/system/memory/offline_checker/columns.rs
index 5a27b3e433..ef9821f859 100644
--- a/crates/vm/src/system/memory/offline_checker/columns.rs
+++ b/crates/vm/src/system/memory/offline_checker/columns.rs
@@ -9,37 +9,28 @@ use crate::system::memory::offline_checker::bridge::AUX_LEN;
 
 // repr(C) is needed to make sure that the compiler does not reorder the fields
 // we assume the order of the fields when using borrow or borrow_mut
-#[repr(C)]
 /// Base structure for auxiliary memory columns.
+#[repr(C)]
 #[derive(Clone, Copy, Debug, AlignedBorrow)]
 pub struct MemoryBaseAuxCols<T> {
     /// The previous timestamps in which the cells were accessed.
-    pub(in crate::system::memory) prev_timestamp: T,
+    pub prev_timestamp: T,
     /// The auxiliary columns to perform the less than check.
-    pub(in crate::system::memory) timestamp_lt_aux: LessThanAuxCols<T, AUX_LEN>,
+    pub timestamp_lt_aux: LessThanAuxCols<T, AUX_LEN>,
+}
+
+impl<F: PrimeField32> MemoryBaseAuxCols<F> {
+    #[inline(always)]
+    pub fn set_prev(&mut self, prev_timestamp: F) {
+        self.prev_timestamp = prev_timestamp;
+    }
 }
 
 #[repr(C)]
 #[derive(Clone, Copy, Debug, AlignedBorrow)]
 pub struct MemoryWriteAuxCols<T, const N: usize> {
-    pub(in crate::system::memory) base: MemoryBaseAuxCols<T>,
-    pub(in crate::system::memory) prev_data: [T; N],
-}
-
-impl<const N: usize, T> MemoryWriteAuxCols<T, N> {
-    pub(in crate::system::memory) fn new(
-        prev_data: [T; N],
-        prev_timestamp: T,
-        lt_aux: LessThanAuxCols<T, AUX_LEN>,
-    ) -> Self {
-        Self {
-            base: MemoryBaseAuxCols {
-                prev_timestamp,
-                timestamp_lt_aux: lt_aux,
-            },
-            prev_data,
-        }
-    }
+    pub base: MemoryBaseAuxCols<T>,
+    pub prev_data: [T; N],
 }
 
 impl<const N: usize, T> MemoryWriteAuxCols<T, N> {
@@ -47,13 +38,21 @@ impl<const N: usize, T> MemoryWriteAuxCols<T, N> {
         Self { base, prev_data }
     }
 
+    #[inline(always)]
     pub fn get_base(self) -> MemoryBaseAuxCols<T> {
         self.base
     }
 
+    #[inline(always)]
     pub fn prev_data(&self) -> &[T; N] {
         &self.prev_data
     }
+
+    /// Sets the previous data **without** updating the less than auxiliary columns.
+    #[inline(always)]
+    pub fn set_prev_data(&mut self, data: [T; N]) {
+        self.prev_data = data;
+    }
 }
 
 /// The auxiliary columns for a memory read operation with block size `N`.
@@ -67,10 +66,7 @@ pub struct MemoryReadAuxCols<T> {
 }
 
 impl<F: PrimeField32> MemoryReadAuxCols<F> {
-    pub(in crate::system::memory) fn new(
-        prev_timestamp: u32,
-        timestamp_lt_aux: LessThanAuxCols<F, AUX_LEN>,
-    ) -> Self {
+    pub fn new(prev_timestamp: u32, timestamp_lt_aux: LessThanAuxCols<F, AUX_LEN>) -> Self {
         Self {
             base: MemoryBaseAuxCols {
                 prev_timestamp: F::from_canonical_u32(prev_timestamp),
@@ -79,17 +75,24 @@ impl<F: PrimeField32> MemoryReadAuxCols<F> {
         }
     }
 
+    #[inline(always)]
     pub fn get_base(self) -> MemoryBaseAuxCols<F> {
         self.base
     }
+
+    /// Sets the previous timestamp **without** updating the less than auxiliary columns.
+    #[inline(always)]
+    pub fn set_prev(&mut self, timestamp: F) {
+        self.base.prev_timestamp = timestamp;
+    }
 }
 
 #[repr(C)]
 #[derive(Clone, Debug, AlignedBorrow)]
 pub struct MemoryReadOrImmediateAuxCols<T> {
-    pub(crate) base: MemoryBaseAuxCols<T>,
-    pub(crate) is_immediate: T,
-    pub(crate) is_zero_aux: T,
+    pub base: MemoryBaseAuxCols<T>,
+    pub is_immediate: T,
+    pub is_zero_aux: T,
 }
 
 impl<T, const N: usize> AsRef<MemoryReadAuxCols<T>> for MemoryWriteAuxCols<T, N> {
@@ -102,3 +105,21 @@ impl<T, const N: usize> AsRef<MemoryReadAuxCols<T>> for MemoryWriteAuxCols<T, N>
         unsafe { &*(self as *const MemoryWriteAuxCols<T, N> as *const MemoryReadAuxCols<T>) }
     }
 }
+
+impl<T, const N: usize> AsMut<MemoryBaseAuxCols<T>> for MemoryWriteAuxCols<T, N> {
+    fn as_mut(&mut self) -> &mut MemoryBaseAuxCols<T> {
+        &mut self.base
+    }
+}
+
+impl<T> AsMut<MemoryBaseAuxCols<T>> for MemoryReadAuxCols<T> {
+    fn as_mut(&mut self) -> &mut MemoryBaseAuxCols<T> {
+        &mut self.base
+    }
+}
+
+impl<T> AsMut<MemoryBaseAuxCols<T>> for MemoryReadOrImmediateAuxCols<T> {
+    fn as_mut(&mut self) -> &mut MemoryBaseAuxCols<T> {
+        &mut self.base
+    }
+}
diff --git a/crates/vm/src/system/memory/offline_checker/mod.rs b/crates/vm/src/system/memory/offline_checker/mod.rs
index ac9f32dc18..8b15328185 100644
--- a/crates/vm/src/system/memory/offline_checker/mod.rs
+++ b/crates/vm/src/system/memory/offline_checker/mod.rs
@@ -5,3 +5,18 @@ mod columns;
 pub use bridge::*;
 pub use bus::*;
 pub use columns::*;
+
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub struct MemoryReadAuxRecord {
+    pub prev_timestamp: u32,
+}
+
+#[repr(C)]
+#[derive(Debug, Clone)]
+pub struct MemoryWriteAuxRecord<T, const NUM_LIMBS: usize> {
+    pub prev_timestamp: u32,
+    pub prev_data: [T; NUM_LIMBS],
+}
+
+pub type MemoryWriteBytesAuxRecord<const NUM_LIMBS: usize> = MemoryWriteAuxRecord<u8, NUM_LIMBS>;
diff --git a/crates/vm/src/system/memory/online.rs b/crates/vm/src/system/memory/online.rs
index a5bf663e4c..eff0585bcc 100644
--- a/crates/vm/src/system/memory/online.rs
+++ b/crates/vm/src/system/memory/online.rs
@@ -1,151 +1,1119 @@
-use std::fmt::Debug;
+use std::{array::from_fn, fmt::Debug, num::NonZero};
 
-use openvm_stark_backend::p3_field::PrimeField32;
-use serde::{Deserialize, Serialize};
+use getset::Getters;
+use itertools::zip_eq;
+use openvm_instructions::exe::SparseMemoryImage;
+use openvm_stark_backend::{
+    p3_field::{Field, PrimeField32},
+    p3_maybe_rayon::prelude::*,
+    p3_util::log2_strict_usize,
+};
+use tracing::instrument;
 
-use super::paged_vec::{AddressMap, PAGE_SIZE};
 use crate::{
-    arch::MemoryConfig,
-    system::memory::{offline::INITIAL_TIMESTAMP, MemoryImage, RecordId},
+    arch::{
+        AddressSpaceHostConfig, AddressSpaceHostLayout, DenseRecordArena, MemoryConfig,
+        RecordArena, MAX_CELL_BYTE_SIZE,
+    },
+    system::{
+        memory::{
+            adapter::records::{AccessLayout, AccessRecordHeader, MERGE_AND_NOT_SPLIT_FLAG},
+            MemoryAddress, TimestampedEquipartition, TimestampedValues, CHUNK,
+        },
+        TouchedMemory,
+    },
+    utils::slice_as_bytes,
 };
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub enum MemoryLogEntry<T> {
-    Read {
-        address_space: u32,
-        pointer: u32,
+mod basic;
+#[cfg(any(unix, windows))]
+mod memmap;
+mod paged_vec;
+
+#[cfg(not(any(unix, windows)))]
+pub use basic::*;
+#[cfg(any(unix, windows))]
+pub use memmap::*;
+pub use paged_vec::PagedVec;
+
+#[cfg(all(any(unix, windows), not(feature = "basic-memory")))]
+pub type MemoryBackend = memmap::MmapMemory;
+#[cfg(any(not(any(unix, windows)), feature = "basic-memory"))]
+pub type MemoryBackend = basic::BasicMemory;
+
+pub const INITIAL_TIMESTAMP: u32 = 0;
+/// Default mmap page size. Change this if using THB.
+pub const PAGE_SIZE: usize = 4096;
+
+// Memory access constraints
+const MAX_BLOCK_SIZE: usize = 32;
+const MIN_ALIGN: usize = 1;
+const MAX_SEGMENTS: usize = MAX_BLOCK_SIZE / MIN_ALIGN;
+
+/// (address_space, pointer)
+pub type Address = (u32, u32);
+
+/// API for any memory implementation that allocates a contiguous region of memory.
+pub trait LinearMemory {
+    /// Create instance of `Self` with `size` bytes.
+    fn new(size: usize) -> Self;
+    /// Allocated size of the memory in bytes.
+    fn size(&self) -> usize;
+    /// Returns the entire memory as a raw byte slice.
+    fn as_slice(&self) -> &[u8];
+    /// Returns the entire memory as a raw byte slice.
+    fn as_mut_slice(&mut self) -> &mut [u8];
+    /// Fill the memory with zeros.
+    fn fill_zero(&mut self) {
+        self.as_mut_slice().fill(0);
+    }
+    /// Read `BLOCK` from `self` at `from` address without moving it.
+    ///
+    /// Panics or segfaults if `from..from + size_of::<BLOCK>()` is out of bounds.
+    ///
+    /// # Safety
+    /// - `BLOCK` should be "plain old data" (see [`Pod`](https://docs.rs/bytemuck/latest/bytemuck/trait.Pod.html)).
+    ///   We do not add a trait bound due to Plonky3 types not implementing the trait.
+    /// - See [`core::ptr::read`] for similar considerations.
+    /// - Memory at `from` must be properly aligned for `BLOCK`. Use [`Self::read_unaligned`] if
+    ///   alignment is not guaranteed.
+    unsafe fn read<BLOCK: Copy>(&self, from: usize) -> BLOCK;
+    /// Read `BLOCK` from `self` at `from` address without moving it.
+    /// Same as [`Self::read`] except that it does not require alignment.
+    ///
+    /// Panics or segfaults if `from..from + size_of::<BLOCK>()` is out of bounds.
+    ///
+    /// # Safety
+    /// - `BLOCK` should be "plain old data" (see [`Pod`](https://docs.rs/bytemuck/latest/bytemuck/trait.Pod.html)).
+    ///   We do not add a trait bound due to Plonky3 types not implementing the trait.
+    /// - See [`core::ptr::read`] for similar considerations.
+    unsafe fn read_unaligned<BLOCK: Copy>(&self, from: usize) -> BLOCK;
+    /// Write `BLOCK` to `self` at `start` address without reading the old value. Does not drop
+    /// `values`. Semantically, `values` is moved into the location pointed to by `start`.
+    ///
+    /// Panics or segfaults if `start..start + size_of::<BLOCK>()` is out of bounds.
+    ///
+    /// # Safety
+    /// - See [`core::ptr::write`] for similar considerations.
+    /// - Memory at `start` must be properly aligned for `BLOCK`. Use [`Self::write_unaligned`] if
+    ///   alignment is not guaranteed.
+    unsafe fn write<BLOCK: Copy>(&mut self, start: usize, values: BLOCK);
+    /// Write `BLOCK` to `self` at `start` address without reading the old value. Does not drop
+    /// `values`. Semantically, `values` is moved into the location pointed to by `start`.
+    /// Same as [`Self::write`] but without alignment requirement.
+    ///
+    /// Panics or segfaults if `start..start + size_of::<BLOCK>()` is out of bounds.
+    ///
+    /// # Safety
+    /// - See [`core::ptr::write`] for similar considerations.
+    unsafe fn write_unaligned<BLOCK: Copy>(&mut self, start: usize, values: BLOCK);
+    /// Swaps `values` with memory at `start..start + size_of::<BLOCK>()`.
+    ///
+    /// Panics or segfaults if `start..start + size_of::<BLOCK>()` is out of bounds.
+    ///
+    /// # Safety
+    /// - `BLOCK` should be "plain old data" (see [`Pod`](https://docs.rs/bytemuck/latest/bytemuck/trait.Pod.html)).
+    ///   We do not add a trait bound due to Plonky3 types not implementing the trait.
+    /// - Memory at `start` must be properly aligned for `BLOCK`.
+    /// - The data in `values` should not overlap with memory in `self`.
+    unsafe fn swap<BLOCK: Copy>(&mut self, start: usize, values: &mut BLOCK);
+    /// Copies `data` into memory at `to` address.
+    ///
+    /// Panics or segfaults if `to..to + size_of_val(data)` is out of bounds.
+    ///
+    /// # Safety
+    /// - `T` should be "plain old data" (see [`Pod`](https://docs.rs/bytemuck/latest/bytemuck/trait.Pod.html)).
+    ///   We do not add a trait bound due to Plonky3 types not implementing the trait.
+    /// - The underlying memory of `data` should not overlap with `self`.
+    /// - The starting pointer of `self` should be aligned to `T`.
+    /// - The memory pointer at `to` should be aligned to `T`.
+    unsafe fn copy_nonoverlapping<T: Copy>(&mut self, to: usize, data: &[T]);
+    /// Returns a slice `&[T]` for the memory region `start..start + len`.
+    ///
+    /// Panics or segfaults if `start..start + len * size_of::<T>()` is out of bounds.
+    ///
+    /// # Safety
+    /// - `T` should be "plain old data" (see [`Pod`](https://docs.rs/bytemuck/latest/bytemuck/trait.Pod.html)).
+    ///   We do not add a trait bound due to Plonky3 types not implementing the trait.
+    /// - Memory at `start` must be properly aligned for `T`.
+    unsafe fn get_aligned_slice<T: Copy>(&self, start: usize, len: usize) -> &[T];
+}
+
+/// Map from address space to linear memory.
+/// The underlying memory is typeless, stored as raw bytes, but usage implicitly assumes that each
+/// address space has memory cells of a fixed type (e.g., `u8, F`). We do not use a typemap for
+/// performance reasons, and it is up to the user to enforce types. Needless to say, this is a very
+/// `unsafe` API.
+#[derive(Debug, Clone)]
+pub struct AddressMap<M: LinearMemory = MemoryBackend> {
+    /// Underlying memory data.
+    pub mem: Vec<M>,
+    /// Host configuration for each address space.
+    pub config: Vec<AddressSpaceHostConfig>,
+}
+
+impl Default for AddressMap {
+    fn default() -> Self {
+        Self::from_mem_config(&MemoryConfig::default())
+    }
+}
+
+impl<M: LinearMemory> AddressMap<M> {
+    pub fn new(config: Vec<AddressSpaceHostConfig>) -> Self {
+        assert_eq!(config[0].num_cells, 0, "Address space 0 must have 0 cells");
+        let mem = config
+            .iter()
+            .map(|config| M::new(config.num_cells.checked_mul(config.layout.size()).unwrap()))
+            .collect();
+        Self { mem, config }
+    }
+
+    pub fn from_mem_config(mem_config: &MemoryConfig) -> Self {
+        Self::new(mem_config.addr_spaces.clone())
+    }
+
+    #[inline(always)]
+    pub fn get_memory(&self) -> &Vec<M> {
+        &self.mem
+    }
+
+    #[inline(always)]
+    pub fn get_memory_mut(&mut self) -> &mut Vec<M> {
+        &mut self.mem
+    }
+
+    /// Fill each address space memory with zeros. Does not change the config.
+    pub fn fill_zero(&mut self) {
+        for mem in &mut self.mem {
+            mem.fill_zero();
+        }
+    }
+
+    /// # Safety
+    /// - Assumes `addr_space` is within the configured memory and not out of bounds
+    pub unsafe fn get_f<F: PrimeField32>(&self, addr_space: u32, ptr: u32) -> F {
+        let layout = &self.config.get_unchecked(addr_space as usize).layout;
+        let start = ptr as usize * layout.size();
+        let bytes = self.get_u8_slice(addr_space, start, layout.size());
+        layout.to_field(bytes)
+    }
+
+    /// # Safety
+    /// - `T` **must** be the correct type for a single memory cell for `addr_space`
+    /// - Assumes `addr_space` is within the configured memory and not out of bounds
+    pub unsafe fn get<T: Copy>(&self, (addr_space, ptr): Address) -> T {
+        debug_assert_eq!(
+            size_of::<T>(),
+            self.config[addr_space as usize].layout.size()
+        );
+        // SAFETY:
+        // - alignment is automatic since we multiply by `size_of::<T>()`
+        self.mem
+            .get_unchecked(addr_space as usize)
+            .read((ptr as usize) * size_of::<T>())
+    }
+
+    /// Panics or segfaults if `ptr..ptr + len` is out of bounds
+    ///
+    /// # Safety
+    /// - `T` **must** be the correct type for a single memory cell for `addr_space`
+    /// - Assumes `addr_space` is within the configured memory and not out of bounds
+    pub unsafe fn get_slice<T: Copy + Debug>(
+        &self,
+        (addr_space, ptr): Address,
         len: usize,
-    },
-    Write {
-        address_space: u32,
-        pointer: u32,
-        data: Vec<T>,
-    },
-    IncrementTimestampBy(u32),
+    ) -> &[T] {
+        debug_assert_eq!(
+            size_of::<T>(),
+            self.config[addr_space as usize].layout.size()
+        );
+        let start = (ptr as usize) * size_of::<T>();
+        let mem = self.mem.get_unchecked(addr_space as usize);
+        // SAFETY:
+        // - alignment is automatic since we multiply by `size_of::<T>()`
+        mem.get_aligned_slice(start, len)
+    }
+
+    /// Reads the slice at **byte** addresses `start..start + len` from address space `addr_space`
+    /// linear memory. Panics or segfaults if `start..start + len` is out of bounds
+    ///
+    /// # Safety
+    /// - Assumes `addr_space` is within the configured memory and not out of bounds
+    pub unsafe fn get_u8_slice(&self, addr_space: u32, start: usize, len: usize) -> &[u8] {
+        let mem = self.mem.get_unchecked(addr_space as usize);
+        mem.get_aligned_slice(start, len)
+    }
+
+    /// Copies `data` into the memory at `(addr_space, ptr)`.
+    ///
+    /// Panics or segfaults if `ptr + size_of_val(data)` is out of bounds.
+    ///
+    /// # Safety
+    /// - `T` **must** be the correct type for a single memory cell for `addr_space`
+    /// - The linear memory in `addr_space` is aligned to `T`.
+    pub unsafe fn copy_slice_nonoverlapping<T: Copy>(
+        &mut self,
+        (addr_space, ptr): Address,
+        data: &[T],
+    ) {
+        let start = (ptr as usize) * size_of::<T>();
+        // SAFETY:
+        // - Linear memory is aligned to `T` and `start` is multiple of `size_of::<T>()` so
+        //   alignment is satisfied.
+        // - `data` and `self.mem` are non-overlapping
+        self.mem
+            .get_unchecked_mut(addr_space as usize)
+            .copy_nonoverlapping(start, data);
+    }
+
+    // TODO[jpw]: stabilize the boundary memory image format and how to construct
+    /// # Safety
+    /// - `T` **must** be the correct type for a single memory cell for `addr_space`
+    /// - Assumes `addr_space` is within the configured memory and not out of bounds
+    pub fn set_from_sparse(&mut self, sparse_map: &SparseMemoryImage) {
+        for (&(addr_space, index), &data_byte) in sparse_map.iter() {
+            // SAFETY:
+            // - safety assumptions in function doc comments
+            unsafe {
+                self.mem
+                    .get_unchecked_mut(addr_space as usize)
+                    .write_unaligned(index as usize, data_byte);
+            }
+        }
+    }
+}
+
+/// API for guest memory conforming to OpenVM ISA
+// @dev Note we don't make this a trait because phantom executors currently need a concrete type for
+// guest memory
+#[derive(Debug, Clone)]
+pub struct GuestMemory {
+    pub memory: AddressMap,
+}
+
+impl GuestMemory {
+    pub fn new(addr: AddressMap) -> Self {
+        Self { memory: addr }
+    }
+
+    /// Returns `[pointer:BLOCK_SIZE]_{address_space}`
+    ///
+    /// # Safety
+    /// The type `T` must be stack-allocated `repr(C)` or `repr(transparent)`,
+    /// and it must be the exact type used to represent a single memory cell in
+    /// address space `address_space`. For standard usage,
+    /// `T` is either `u8` or `F` where `F` is the base field of the ZK backend.
+    #[inline(always)]
+    pub unsafe fn read<T, const BLOCK_SIZE: usize>(
+        &self,
+        addr_space: u32,
+        ptr: u32,
+    ) -> [T; BLOCK_SIZE]
+    where
+        T: Copy + Debug,
+    {
+        self.debug_assert_cell_type::<T>(addr_space);
+        // SAFETY:
+        // - `T` should be "plain old data"
+        // - alignment for `[T; BLOCK_SIZE]` is automatic since we multiply by `size_of::<T>()`
+        self.memory
+            .get_memory()
+            .get_unchecked(addr_space as usize)
+            .read((ptr as usize) * size_of::<T>())
+    }
+
+    /// Writes `values` to `[pointer:BLOCK_SIZE]_{address_space}`
+    ///
+    /// # Safety
+    /// See [`GuestMemory::read`].
+    #[inline(always)]
+    pub unsafe fn write<T, const BLOCK_SIZE: usize>(
+        &mut self,
+        addr_space: u32,
+        ptr: u32,
+        values: [T; BLOCK_SIZE],
+    ) where
+        T: Copy + Debug,
+    {
+        self.debug_assert_cell_type::<T>(addr_space);
+        // SAFETY:
+        // - alignment for `[T; BLOCK_SIZE]` is automatic since we multiply by `size_of::<T>()`
+        self.memory
+            .get_memory_mut()
+            .get_unchecked_mut(addr_space as usize)
+            .write((ptr as usize) * size_of::<T>(), values);
+    }
+
+    /// Swaps `values` with `[pointer:BLOCK_SIZE]_{address_space}`.
+    ///
+    /// # Safety
+    /// See [`GuestMemory::read`] and [`LinearMemory::swap`].
+    #[inline(always)]
+    pub unsafe fn swap<T, const BLOCK_SIZE: usize>(
+        &mut self,
+        addr_space: u32,
+        ptr: u32,
+        values: &mut [T; BLOCK_SIZE],
+    ) where
+        T: Copy + Debug,
+    {
+        self.debug_assert_cell_type::<T>(addr_space);
+        // SAFETY:
+        // - alignment for `[T; BLOCK_SIZE]` is automatic since we multiply by `size_of::<T>()`
+        self.memory
+            .get_memory_mut()
+            .get_unchecked_mut(addr_space as usize)
+            .swap((ptr as usize) * size_of::<T>(), values);
+    }
+
+    #[inline(always)]
+    #[allow(clippy::missing_safety_doc)]
+    pub unsafe fn get_slice<T: Copy + Debug>(&self, addr_space: u32, ptr: u32, len: usize) -> &[T] {
+        self.memory.get_slice((addr_space, ptr), len)
+    }
+
+    #[inline(always)]
+    fn debug_assert_cell_type<T>(&self, addr_space: u32) {
+        debug_assert_eq!(
+            size_of::<T>(),
+            self.memory.config[addr_space as usize].layout.size()
+        );
+    }
 }
 
-/// A simple data structure to read to/write from memory.
-///
-/// Stores a log of memory accesses to reconstruct aspects of memory state for trace generation.
-#[derive(Debug)]
-pub struct Memory<F> {
-    pub(super) data: AddressMap<F, PAGE_SIZE>,
-    pub(super) log: Vec<MemoryLogEntry<F>>,
-    timestamp: u32,
+#[repr(C)]
+#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
+pub struct AccessMetadata {
+    /// Packed timestamp (29 bits) and log2(block_size) (3 bits)
+    pub timestamp_and_log_block_size: u32,
+    /// Offset to block start (in ALIGN units).
+    pub offset_to_start: u8,
 }
 
-impl<F: PrimeField32> Memory<F> {
-    pub fn new(mem_config: &MemoryConfig) -> Self {
+impl AccessMetadata {
+    const TIMESTAMP_MASK: u32 = (1 << 29) - 1;
+    const LOG_BLOCK_SIZE_SHIFT: u32 = 29;
+
+    pub fn new(timestamp: u32, block_size: u8, offset_to_start: u8) -> Self {
+        debug_assert!(timestamp < (1 << 29), "Timestamp must be less than 2^29");
+        debug_assert!(
+            block_size == 0 || (block_size.is_power_of_two() && block_size <= MAX_BLOCK_SIZE as u8),
+            "Block size must be 0 or power of 2 and <= {}",
+            MAX_BLOCK_SIZE
+        );
+
+        let encoded_block_size = if block_size == 0 {
+            0
+        } else {
+            // SAFETY: We already asserted that block_size is non-zero in this branch
+            unsafe { NonZero::new_unchecked(block_size) }.ilog2() + 1
+        };
+        let packed = timestamp | (encoded_block_size << Self::LOG_BLOCK_SIZE_SHIFT);
+
         Self {
-            data: AddressMap::from_mem_config(mem_config),
-            timestamp: INITIAL_TIMESTAMP + 1,
-            log: Vec::with_capacity(mem_config.access_capacity),
+            timestamp_and_log_block_size: packed,
+            offset_to_start,
         }
     }
 
-    /// Instantiates a new `Memory` data structure from an image.
-    pub fn from_image(image: MemoryImage<F>, access_capacity: usize) -> Self {
+    pub fn timestamp(&self) -> u32 {
+        self.timestamp_and_log_block_size & Self::TIMESTAMP_MASK
+    }
+
+    pub fn block_size(&self) -> u8 {
+        let encoded = self.timestamp_and_log_block_size >> Self::LOG_BLOCK_SIZE_SHIFT;
+        if encoded == 0 {
+            0
+        } else {
+            1 << (encoded - 1)
+        }
+    }
+}
+
+/// Online memory that stores additional information for trace generation purposes.
+/// In particular, keeps track of timestamp.
+#[derive(Getters)]
+pub struct TracingMemory {
+    pub timestamp: u32,
+    /// The initial block size -- this depends on the type of boundary chip.
+    initial_block_size: usize,
+    /// The underlying data memory, with memory cells typed by address space: see [AddressMap].
+    #[getset(get = "pub")]
+    pub data: GuestMemory,
+    /// Maps addr_space to (ptr / min_block_size[addr_space] -> AccessMetadata) for latest access
+    /// metadata. Uses paged storage for memory efficiency. AccessMetadata stores offset_to_start
+    /// (in ALIGN units), block_size, and timestamp (latter two only valid at offset_to_start ==
+    /// 0).
+    pub(super) meta: Vec<PagedVec<AccessMetadata, PAGE_SIZE>>,
+    /// For each `addr_space`, the minimum block size allowed for memory accesses. In other words,
+    /// all memory accesses in `addr_space` must be aligned to this block size.
+    pub min_block_size: Vec<u32>,
+    pub access_adapter_records: DenseRecordArena,
+}
+
+// min_block_size * cell_size never exceeds 8
+const INITIAL_CELL_BUFFER: &[u8] = &[0u8; 8];
+// min_block_size never exceeds 8
+const INITIAL_TIMESTAMP_BUFFER: &[u32] = &[INITIAL_TIMESTAMP; 8];
+
+impl TracingMemory {
+    pub fn new(
+        mem_config: &MemoryConfig,
+        initial_block_size: usize,
+        access_adapter_arena_size_bound: usize,
+    ) -> Self {
+        let image = GuestMemory::new(AddressMap::from_mem_config(mem_config));
+        Self::from_image(image, initial_block_size, access_adapter_arena_size_bound)
+    }
+
+    /// Constructor from pre-existing memory image.
+    pub fn from_image(
+        image: GuestMemory,
+        initial_block_size: usize,
+        access_adapter_arena_size_bound: usize,
+    ) -> Self {
+        let (meta, min_block_size): (Vec<_>, Vec<_>) =
+            zip_eq(image.memory.get_memory(), &image.memory.config)
+                .map(|(mem, addr_sp)| {
+                    let num_cells = mem.size() / addr_sp.layout.size();
+                    let min_block_size = addr_sp.min_block_size;
+                    let total_metadata_len = num_cells.div_ceil(min_block_size);
+                    (PagedVec::new(total_metadata_len), min_block_size as u32)
+                })
+                .unzip();
+        let access_adapter_records =
+            DenseRecordArena::with_byte_capacity(access_adapter_arena_size_bound);
         Self {
             data: image,
+            meta,
+            min_block_size,
             timestamp: INITIAL_TIMESTAMP + 1,
-            log: Vec::with_capacity(access_capacity),
+            initial_block_size,
+            access_adapter_records,
         }
     }
 
-    fn last_record_id(&self) -> RecordId {
-        RecordId(self.log.len() - 1)
+    #[inline(always)]
+    fn assert_alignment(&self, block_size: usize, align: usize, addr_space: u32, ptr: u32) {
+        debug_assert!(block_size.is_power_of_two());
+        debug_assert_eq!(block_size % align, 0);
+        debug_assert_ne!(addr_space, 0);
+        debug_assert_eq!(align as u32, self.min_block_size[addr_space as usize]);
+        assert_eq!(
+            ptr % (align as u32),
+            0,
+            "pointer={ptr} not aligned to {align}"
+        );
     }
 
-    /// Writes an array of values to the memory at the specified address space and start index.
-    ///
-    /// Returns the `RecordId` for the memory record and the previous data.
-    pub fn write<const N: usize>(
+    /// Get block metadata by jumping to the start of the block.
+    /// Returns (block_start_pointer, block_metadata).
+    #[inline(always)]
+    fn get_block_metadata<const ALIGN: usize>(
         &mut self,
-        address_space: u32,
-        pointer: u32,
-        values: [F; N],
-    ) -> (RecordId, [F; N]) {
-        assert!(N.is_power_of_two());
+        address_space: usize,
+        pointer: usize,
+    ) -> (u32, AccessMetadata) {
+        let ptr_index = pointer / ALIGN;
+        let meta_page = unsafe { self.meta.get_unchecked_mut(address_space) };
+        let current_meta = meta_page.get(ptr_index);
 
-        let prev_data = self.data.set_range(&(address_space, pointer), &values);
+        let (block_start_index, block_metadata) = if current_meta.offset_to_start == 0 {
+            (ptr_index, current_meta)
+        } else {
+            let offset = current_meta.offset_to_start;
+            let start_idx = ptr_index - offset as usize;
+            let start_meta = meta_page.get(start_idx);
+            (start_idx, start_meta)
+        };
 
-        self.log.push(MemoryLogEntry::Write {
-            address_space,
-            pointer,
-            data: values.to_vec(),
-        });
-        self.timestamp += 1;
+        let block_start_pointer = (block_start_index * ALIGN) as u32;
+
+        (block_start_pointer, block_metadata)
+    }
+
+    #[inline(always)]
+    fn get_timestamp<const ALIGN: usize>(&mut self, address_space: usize, pointer: usize) -> u32 {
+        let ptr_index = pointer / ALIGN;
+        let meta_page = unsafe { self.meta.get_unchecked_mut(address_space) };
+        let current_meta = meta_page.get(ptr_index);
+
+        if current_meta.offset_to_start == 0 {
+            current_meta.timestamp()
+        } else {
+            let offset = current_meta.offset_to_start;
+            let block_start_index = ptr_index - offset as usize;
+            meta_page.get(block_start_index).timestamp()
+        }
+    }
+
+    /// Updates the metadata with the given block.
+    /// Stores timestamp and block_size only at block start, offsets elsewhere.
+    #[inline(always)]
+    fn set_meta_block<const BLOCK_SIZE: usize, const ALIGN: usize>(
+        &mut self,
+        address_space: usize,
+        pointer: usize,
+        timestamp: u32,
+    ) {
+        let ptr = pointer / ALIGN;
+        // SAFETY: address_space is assumed to be valid and within bounds
+        let meta_page = unsafe { self.meta.get_unchecked_mut(address_space) };
+
+        // Store full metadata at the block start
+        meta_page.set(ptr, AccessMetadata::new(timestamp, BLOCK_SIZE as u8, 0));
+
+        // Store offsets for other positions in the block
+        for i in 1..(BLOCK_SIZE / ALIGN) {
+            meta_page.set(ptr + i, AccessMetadata::new(0, 0, i as u8));
+        }
+    }
 
-        (self.last_record_id(), prev_data)
+    pub(crate) fn add_split_record(&mut self, header: AccessRecordHeader) {
+        if header.block_size == header.lowest_block_size {
+            return;
+        }
+        let data_slice = unsafe {
+            self.data.memory.get_u8_slice(
+                header.address_space,
+                (header.pointer * header.type_size) as usize,
+                (header.block_size * header.type_size) as usize,
+            )
+        };
+
+        let record_mut = self
+            .access_adapter_records
+            .alloc(AccessLayout::from_record_header(&header));
+        *record_mut.header = header;
+        record_mut.data.copy_from_slice(data_slice);
+        // we don't mind garbage values in prev_*
+    }
+
+    /// `data_slice` is the underlying data of the record in raw host memory format.
+    pub(crate) fn add_merge_record(
+        &mut self,
+        header: AccessRecordHeader,
+        data_slice: &[u8],
+        prev_ts: &[u32],
+    ) {
+        if header.block_size == header.lowest_block_size {
+            return;
+        }
+
+        let record_mut = self
+            .access_adapter_records
+            .alloc(AccessLayout::from_record_header(&header));
+        *record_mut.header = header;
+        record_mut.header.timestamp_and_mask |= MERGE_AND_NOT_SPLIT_FLAG;
+        record_mut.data.copy_from_slice(data_slice);
+        record_mut.timestamps.copy_from_slice(prev_ts);
     }
 
-    /// Reads an array of values from the memory at the specified address space and start index.
-    pub fn read<const N: usize>(&mut self, address_space: u32, pointer: u32) -> (RecordId, [F; N]) {
-        assert!(N.is_power_of_two());
+    /// Calculate splits and merges needed for a memory access.
+    /// Returns Some((splits, merge)) or None if no operations needed.
+    #[inline(always)]
+    #[allow(clippy::type_complexity)]
+    fn calculate_splits_and_merges<const BLOCK_SIZE: usize, const ALIGN: usize>(
+        &mut self,
+        address_space: usize,
+        pointer: usize,
+    ) -> Option<(Vec<(usize, usize)>, (usize, usize))> {
+        // Skip adapters if this is a repeated access to the same location with same size
+        let (start_ptr, block_meta) = self.get_block_metadata::<ALIGN>(address_space, pointer);
+        if block_meta.block_size() == BLOCK_SIZE as u8 && start_ptr == pointer as u32 {
+            return None;
+        }
+
+        // Split intersecting blocks to align bytes
+        let mut splits_buf = [(0usize, 0usize); MAX_SEGMENTS];
+        let mut splits_count = 0;
+        let mut current_ptr = pointer;
+        let end_ptr = pointer + BLOCK_SIZE;
+
+        while current_ptr < end_ptr {
+            let (start_ptr, block_metadata) =
+                self.get_block_metadata::<ALIGN>(address_space, current_ptr);
+
+            if block_metadata.block_size() == 0 {
+                current_ptr += ALIGN;
+                continue;
+            }
 
-        self.log.push(MemoryLogEntry::Read {
-            address_space,
-            pointer,
-            len: N,
+            if block_metadata.block_size() > ALIGN as u8 {
+                // SAFETY: splits_count < MAX_SEGMENTS by construction since we iterate over
+                // at most BLOCK_SIZE/ALIGN segments and BLOCK_SIZE <= MAX_BLOCK_SIZE
+                unsafe {
+                    *splits_buf.get_unchecked_mut(splits_count) =
+                        (start_ptr as usize, block_metadata.block_size() as usize);
+                }
+                splits_count += 1;
+            }
+
+            // Skip to the next segment after this block ends
+            current_ptr = start_ptr as usize + block_metadata.block_size() as usize;
+        }
+
+        let merge = (pointer, BLOCK_SIZE);
+
+        Some((splits_buf[..splits_count].to_vec(), merge))
+    }
+
+    #[inline(always)]
+    fn split_by_meta<T: Copy, const MIN_BLOCK_SIZE: usize>(
+        &mut self,
+        start_ptr: u32,
+        timestamp: u32,
+        block_size: u8,
+        address_space: usize,
+    ) {
+        if block_size == MIN_BLOCK_SIZE as u8 {
+            return;
+        }
+        let begin = start_ptr as usize / MIN_BLOCK_SIZE;
+        let meta_page = unsafe { self.meta.get_unchecked_mut(address_space) };
+
+        for i in 0..(block_size as usize / MIN_BLOCK_SIZE) {
+            // Each split piece becomes its own block start
+            meta_page.set(
+                begin + i,
+                AccessMetadata::new(timestamp, MIN_BLOCK_SIZE as u8, 0),
+            );
+        }
+        self.add_split_record(AccessRecordHeader {
+            timestamp_and_mask: timestamp,
+            address_space: address_space as u32,
+            pointer: start_ptr,
+            block_size: block_size as u32,
+            lowest_block_size: MIN_BLOCK_SIZE as u32,
+            type_size: size_of::<T>() as u32,
         });
+    }
 
-        let values = if address_space == 0 {
-            assert_eq!(N, 1, "cannot batch read from address space 0");
-            [F::from_canonical_u32(pointer); N]
+    /// Returns the timestamp of the previous access to `[pointer:BLOCK_SIZE]_{address_space}`.
+    ///
+    /// Caller must ensure alignment (e.g. via `assert_alignment`) prior to calling this function.
+    #[inline(always)]
+    fn prev_access_time<T: Copy, const BLOCK_SIZE: usize, const ALIGN: usize>(
+        &mut self,
+        address_space: usize,
+        pointer: usize,
+        prev_values: &[T; BLOCK_SIZE],
+    ) -> u32 {
+        debug_assert_eq!(ALIGN, self.data.memory.config[address_space].min_block_size);
+        debug_assert_eq!(
+            unsafe {
+                self.data
+                    .memory
+                    .config
+                    .get_unchecked(address_space)
+                    .layout
+                    .size()
+            },
+            size_of::<T>()
+        );
+        // Calculate what splits and merges are needed for this memory access
+        let result = if let Some((splits, (merge_ptr, merge_size))) =
+            self.calculate_splits_and_merges::<BLOCK_SIZE, ALIGN>(address_space, pointer)
+        {
+            // Process all splits first
+            for (split_ptr, split_size) in splits {
+                let (_, block_metadata) =
+                    self.get_block_metadata::<ALIGN>(address_space, split_ptr);
+                let timestamp = block_metadata.timestamp();
+                self.split_by_meta::<T, ALIGN>(
+                    split_ptr as u32,
+                    timestamp,
+                    split_size as u8,
+                    address_space,
+                );
+            }
+
+            // Process merge
+            let mut prev_ts_buf = [0u32; MAX_SEGMENTS];
+
+            let mut max_timestamp = INITIAL_TIMESTAMP;
+
+            let mut ptr = merge_ptr;
+            let end_ptr = merge_ptr + merge_size;
+            let mut seg_idx = 0;
+            while ptr < end_ptr {
+                let (_, block_metadata) = self.get_block_metadata::<ALIGN>(address_space, ptr);
+
+                let timestamp = if block_metadata.block_size() > 0 {
+                    block_metadata.timestamp()
+                } else {
+                    self.handle_uninitialized_memory::<T, ALIGN>(address_space, ptr);
+                    INITIAL_TIMESTAMP
+                };
+
+                // SAFETY: seg_idx < MAX_SEGMENTS since we iterate at most merge_size/ALIGN times
+                // and merge_size <= BLOCK_SIZE <= MAX_BLOCK_SIZE
+                unsafe {
+                    *prev_ts_buf.get_unchecked_mut(seg_idx) = timestamp;
+                }
+                max_timestamp = max_timestamp.max(timestamp);
+                ptr += ALIGN;
+                seg_idx += 1;
+            }
+
+            // Create the merge record
+            self.add_merge_record(
+                AccessRecordHeader {
+                    timestamp_and_mask: max_timestamp,
+                    address_space: address_space as u32,
+                    pointer: merge_ptr as u32,
+                    block_size: merge_size as u32,
+                    lowest_block_size: ALIGN as u32,
+                    type_size: size_of::<T>() as u32,
+                },
+                // SAFETY: T is plain old data
+                unsafe { slice_as_bytes(prev_values) },
+                &prev_ts_buf[..seg_idx],
+            );
+
+            max_timestamp
         } else {
-            self.range_array::<N>(address_space, pointer)
+            self.get_timestamp::<ALIGN>(address_space, pointer)
         };
+
+        // Update the metadata for this access
+        self.set_meta_block::<BLOCK_SIZE, ALIGN>(address_space, pointer, self.timestamp);
+        result
+    }
+
+    /// Handle uninitialized memory by creating appropriate split or merge records.
+    #[inline(always)]
+    fn handle_uninitialized_memory<T: Copy, const ALIGN: usize>(
+        &mut self,
+        address_space: usize,
+        pointer: usize,
+    ) {
+        if self.initial_block_size >= ALIGN {
+            // Split the initial block into chunks
+            let segment_index = pointer / ALIGN;
+            let block_start = segment_index & !(self.initial_block_size / ALIGN - 1);
+            let start_ptr = (block_start * ALIGN) as u32;
+            self.split_by_meta::<T, ALIGN>(
+                start_ptr,
+                INITIAL_TIMESTAMP,
+                self.initial_block_size as u8,
+                address_space,
+            );
+        } else {
+            // Create a merge record for single-byte initialization
+            debug_assert_eq!(self.initial_block_size, 1);
+            self.add_merge_record(
+                AccessRecordHeader {
+                    timestamp_and_mask: INITIAL_TIMESTAMP,
+                    address_space: address_space as u32,
+                    pointer: pointer as u32,
+                    block_size: ALIGN as u32,
+                    lowest_block_size: self.initial_block_size as u32,
+                    type_size: size_of::<T>() as u32,
+                },
+                &INITIAL_CELL_BUFFER[..ALIGN],
+                &INITIAL_TIMESTAMP_BUFFER[..ALIGN],
+            );
+        }
+    }
+
+    /// Atomic read operation which increments the timestamp by 1.
+    /// Returns `(t_prev, [pointer:BLOCK_SIZE]_{address_space})` where `t_prev` is the
+    /// timestamp of the last memory access.
+    ///
+    /// The previous memory access is treated as atomic even if previous accesses were for
+    /// a smaller block size. This is made possible by internal memory access adapters
+    /// that split/merge memory blocks. More specifically, the last memory access corresponding
+    /// to `t_prev` may refer to an atomic access inserted by the memory access adapters.
+    ///
+    /// # Assumptions
+    /// The `BLOCK_SIZE` is a multiple of `ALIGN`, which must equal the minimum block size
+    /// of `address_space`.
+    ///
+    /// # Safety
+    /// The type `T` must be stack-allocated `repr(C)` or `repr(transparent)`,
+    /// plain old data, and it must be the exact type used to represent a single memory cell in
+    /// address space `address_space`. For standard usage,
+    /// `T` is either `u8` or `F` where `F` is the base field of the ZK backend.
+    ///
+    /// In addition:
+    /// - `address_space` must be valid.
+    #[inline(always)]
+    pub unsafe fn read<T, const BLOCK_SIZE: usize, const ALIGN: usize>(
+        &mut self,
+        address_space: u32,
+        pointer: u32,
+    ) -> (u32, [T; BLOCK_SIZE])
+    where
+        T: Copy + Debug,
+    {
+        self.assert_alignment(BLOCK_SIZE, ALIGN, address_space, pointer);
+        let values = self.data.read(address_space, pointer);
+        let t_prev = self.prev_access_time::<T, BLOCK_SIZE, ALIGN>(
+            address_space as usize,
+            pointer as usize,
+            &values,
+        );
+        self.timestamp += 1;
+
+        (t_prev, values)
+    }
+
+    /// Atomic write operation that writes `values` into `[pointer:BLOCK_SIZE]_{address_space}` and
+    /// then increments the timestamp by 1. Returns `(t_prev, values_prev)` which equal the
+    /// timestamp and value `[pointer:BLOCK_SIZE]_{address_space}` of the last memory access.
+    ///
+    /// The previous memory access is treated as atomic even if previous accesses were for
+    /// a smaller block size. This is made possible by internal memory access adapters
+    /// that split/merge memory blocks. More specifically, the last memory access corresponding
+    /// to `t_prev` may refer to an atomic access inserted by the memory access adapters.
+    ///
+    /// # Assumptions
+    /// The `BLOCK_SIZE` is a multiple of `ALIGN`, which must equal the minimum block size
+    /// of `address_space`.
+    ///
+    /// # Safety
+    /// The type `T` must be stack-allocated `repr(C)` or `repr(transparent)`,
+    /// and it must be the exact type used to represent a single memory cell in
+    /// address space `address_space`. For standard usage,
+    /// `T` is either `u8` or `F` where `F` is the base field of the ZK backend.
+    ///
+    /// In addition:
+    /// - `address_space` must be valid.
+    #[inline(always)]
+    pub unsafe fn write<T, const BLOCK_SIZE: usize, const ALIGN: usize>(
+        &mut self,
+        address_space: u32,
+        pointer: u32,
+        values: [T; BLOCK_SIZE],
+    ) -> (u32, [T; BLOCK_SIZE])
+    where
+        T: Copy + Debug,
+    {
+        self.assert_alignment(BLOCK_SIZE, ALIGN, address_space, pointer);
+        let values_prev = self.data.read(address_space, pointer);
+        let t_prev = self.prev_access_time::<T, BLOCK_SIZE, ALIGN>(
+            address_space as usize,
+            pointer as usize,
+            &values_prev,
+        );
+        self.data.write(address_space, pointer, values);
+        self.timestamp += 1;
+
+        (t_prev, values_prev)
+    }
+
+    pub fn increment_timestamp(&mut self) {
         self.timestamp += 1;
-        (self.last_record_id(), values)
     }
 
     pub fn increment_timestamp_by(&mut self, amount: u32) {
         self.timestamp += amount;
-        self.log.push(MemoryLogEntry::IncrementTimestampBy(amount))
     }
 
     pub fn timestamp(&self) -> u32 {
         self.timestamp
     }
 
-    #[inline(always)]
-    pub fn get(&self, address_space: u32, pointer: u32) -> F {
-        *self.data.get(&(address_space, pointer)).unwrap_or(&F::ZERO)
+    /// Finalize the boundary and merkle chips.
+    #[instrument(name = "memory_finalize", skip_all)]
+    pub fn finalize<F: Field>(&mut self, is_persistent: bool) -> TouchedMemory<F> {
+        let touched_blocks = self.touched_blocks();
+
+        match is_persistent {
+            false => TouchedMemory::Volatile(
+                self.touched_blocks_to_equipartition::<F, 1>(touched_blocks),
+            ),
+            true => TouchedMemory::Persistent(
+                self.touched_blocks_to_equipartition::<F, CHUNK>(touched_blocks),
+            ),
+        }
     }
 
-    #[inline(always)]
-    fn range_array<const N: usize>(&self, address_space: u32, pointer: u32) -> [F; N] {
-        self.data.get_range(&(address_space, pointer))
+    /// Returns the list of all touched blocks. The list is sorted by address.
+    fn touched_blocks(&self) -> Vec<(Address, AccessMetadata)> {
+        assert_eq!(self.meta.len(), self.min_block_size.len());
+        self.meta
+            .par_iter()
+            .zip(self.min_block_size.par_iter())
+            .enumerate()
+            .flat_map(|(addr_space, (meta_page, &align))| {
+                meta_page
+                    .par_iter()
+                    .filter_map(move |(idx, metadata)| {
+                        let ptr = idx as u32 * align;
+                        if metadata.offset_to_start == 0 && metadata.block_size() != 0 {
+                            Some(((addr_space as u32, ptr), metadata))
+                        } else {
+                            None
+                        }
+                    })
+                    .collect::<Vec<_>>()
+            })
+            .collect()
     }
-}
 
-#[cfg(test)]
-mod tests {
-    use openvm_stark_backend::p3_field::FieldAlgebra;
-    use openvm_stark_sdk::p3_baby_bear::BabyBear;
+    /// Returns the equipartition of the touched blocks.
+    /// Modifies records and adds new to account for the initial/final segments.
+    fn touched_blocks_to_equipartition<F: Field, const CHUNK: usize>(
+        &mut self,
+        touched_blocks: Vec<((u32, u32), AccessMetadata)>,
+    ) -> TimestampedEquipartition<F, CHUNK> {
+        // [perf] We can `.with_capacity()` if we keep track of the number of segments we initialize
+        let mut final_memory = Vec::new();
 
-    use super::Memory;
-    use crate::arch::MemoryConfig;
+        debug_assert!(touched_blocks.is_sorted_by_key(|(addr, _)| addr));
+        self.handle_touched_blocks::<F, CHUNK>(&mut final_memory, touched_blocks);
 
-    macro_rules! bba {
-        [$($x:expr),*] => {
-            [$(BabyBear::from_canonical_u32($x)),*]
-        }
+        debug_assert!(final_memory.is_sorted_by_key(|(key, _)| *key));
+        final_memory
     }
 
-    #[test]
-    fn test_write_read() {
-        let mut memory = Memory::new(&MemoryConfig::default());
-        let address_space = 1;
-
-        memory.write(address_space, 0, bba![1, 2, 3, 4]);
+    fn handle_touched_blocks<F: Field, const CHUNK: usize>(
+        &mut self,
+        final_memory: &mut Vec<((u32, u32), TimestampedValues<F, CHUNK>)>,
+        touched_blocks: Vec<((u32, u32), AccessMetadata)>,
+    ) {
+        let mut current_values = vec![0u8; MAX_CELL_BYTE_SIZE * CHUNK];
+        let mut current_cnt = 0;
+        let mut current_address = MemoryAddress::new(0, 0);
+        let mut current_timestamps = vec![0; CHUNK];
+        for ((addr_space, ptr), access_metadata) in touched_blocks {
+            // SAFETY: addr_space of touched blocks are all in bounds
+            let addr_space_config =
+                unsafe { *self.data.memory.config.get_unchecked(addr_space as usize) };
+            let min_block_size = addr_space_config.min_block_size;
+            let cell_size = addr_space_config.layout.size();
+            let timestamp = access_metadata.timestamp();
+            let block_size = access_metadata.block_size();
+            assert!(
+                current_cnt == 0
+                    || (current_address.address_space == addr_space
+                        && current_address.pointer + current_cnt as u32 == ptr),
+                "The union of all touched blocks must consist of blocks with sizes divisible by `CHUNK`"
+            );
+            debug_assert!(block_size >= min_block_size as u8);
+            debug_assert!(ptr % min_block_size as u32 == 0);
 
-        let (_, data) = memory.read::<2>(address_space, 0);
-        assert_eq!(data, bba![1, 2]);
+            if current_cnt == 0 {
+                assert_eq!(
+                    ptr & (CHUNK as u32 - 1),
+                    0,
+                    "The union of all touched blocks must consist of `CHUNK`-aligned blocks"
+                );
+                current_address = MemoryAddress::new(addr_space, ptr);
+            }
 
-        memory.write(address_space, 2, bba![100]);
+            if block_size > min_block_size as u8 {
+                self.add_split_record(AccessRecordHeader {
+                    timestamp_and_mask: timestamp,
+                    address_space: addr_space,
+                    pointer: ptr,
+                    block_size: block_size as u32,
+                    lowest_block_size: min_block_size as u32,
+                    type_size: cell_size as u32,
+                });
+            }
+            if min_block_size > CHUNK {
+                assert_eq!(current_cnt, 0);
+                for i in (0..block_size as u32).step_by(min_block_size) {
+                    self.add_split_record(AccessRecordHeader {
+                        timestamp_and_mask: timestamp,
+                        address_space: addr_space,
+                        pointer: ptr + i,
+                        block_size: min_block_size as u32,
+                        lowest_block_size: CHUNK as u32,
+                        type_size: cell_size as u32,
+                    });
+                }
+                // SAFETY: touched blocks are in bounds
+                let values = unsafe {
+                    self.data.memory.get_u8_slice(
+                        addr_space,
+                        ptr as usize * cell_size,
+                        block_size as usize * cell_size,
+                    )
+                };
+                for i in (0..block_size as u32).step_by(CHUNK) {
+                    final_memory.push((
+                        (addr_space, ptr + i),
+                        TimestampedValues {
+                            timestamp,
+                            values: from_fn(|j| {
+                                let byte_idx = (i as usize + j) * cell_size;
+                                // SAFETY: block_size is multiple of CHUNK and we are reading chunks
+                                // of cells within bounds
+                                unsafe {
+                                    addr_space_config
+                                        .layout
+                                        .to_field(&values[byte_idx..byte_idx + cell_size])
+                                }
+                            }),
+                        },
+                    ));
+                }
+            } else {
+                for i in 0..block_size as u32 {
+                    // SAFETY: getting cell data
+                    let cell_data = unsafe {
+                        self.data.memory.get_u8_slice(
+                            addr_space,
+                            (ptr + i) as usize * cell_size,
+                            cell_size,
+                        )
+                    };
+                    current_values[current_cnt * cell_size..current_cnt * cell_size + cell_size]
+                        .copy_from_slice(cell_data);
+                    if current_cnt & (min_block_size - 1) == 0 {
+                        // SAFETY: current_cnt / min_block_size < CHUNK / min_block_size <= CHUNK
+                        unsafe {
+                            *current_timestamps.get_unchecked_mut(current_cnt / min_block_size) =
+                                timestamp;
+                        }
+                    }
+                    current_cnt += 1;
+                    if current_cnt == CHUNK {
+                        let timestamp = *current_timestamps[..CHUNK / min_block_size]
+                            .iter()
+                            .max()
+                            .unwrap();
+                        self.add_merge_record(
+                            AccessRecordHeader {
+                                timestamp_and_mask: timestamp,
+                                address_space: addr_space,
+                                pointer: current_address.pointer,
+                                block_size: CHUNK as u32,
+                                lowest_block_size: min_block_size as u32,
+                                type_size: cell_size as u32,
+                            },
+                            &current_values[..CHUNK * cell_size],
+                            &current_timestamps[..CHUNK / min_block_size],
+                        );
+                        final_memory.push((
+                            (current_address.address_space, current_address.pointer),
+                            TimestampedValues {
+                                timestamp,
+                                values: from_fn(|i| unsafe {
+                                    // SAFETY: cell_size is correct, and alignment is guaranteed
+                                    addr_space_config.layout.to_field(
+                                        &current_values[i * cell_size..i * cell_size + cell_size],
+                                    )
+                                }),
+                            },
+                        ));
+                        current_address.pointer += current_cnt as u32;
+                        current_cnt = 0;
+                    }
+                }
+            }
+        }
+        assert_eq!(current_cnt, 0, "The union of all touched blocks must consist of blocks with sizes divisible by `CHUNK`");
+    }
 
-        let (_, data) = memory.read::<4>(address_space, 0);
-        assert_eq!(data, bba![1, 2, 100, 4]);
+    pub fn address_space_alignment(&self) -> Vec<u8> {
+        self.min_block_size
+            .iter()
+            .map(|&x| log2_strict_usize(x as usize) as u8)
+            .collect()
     }
 }
diff --git a/crates/vm/src/system/memory/online/basic.rs b/crates/vm/src/system/memory/online/basic.rs
new file mode 100644
index 0000000000..b5cddeb775
--- /dev/null
+++ b/crates/vm/src/system/memory/online/basic.rs
@@ -0,0 +1,243 @@
+use std::{
+    alloc::{alloc_zeroed, dealloc, Layout},
+    ptr::NonNull,
+};
+
+use crate::system::memory::online::{LinearMemory, PAGE_SIZE};
+
+pub struct BasicMemory {
+    ptr: NonNull<u8>,
+    size: usize,
+    layout: Layout,
+}
+
+impl BasicMemory {
+    #[inline(always)]
+    pub fn as_ptr(&self) -> *const u8 {
+        self.ptr.as_ptr()
+    }
+
+    #[inline(always)]
+    pub fn as_mut_ptr(&mut self) -> *mut u8 {
+        self.ptr.as_ptr()
+    }
+}
+
+impl Drop for BasicMemory {
+    fn drop(&mut self) {
+        if self.size > 0 {
+            unsafe {
+                dealloc(self.ptr.as_ptr(), self.layout);
+            }
+        }
+    }
+}
+
+impl Clone for BasicMemory {
+    fn clone(&self) -> Self {
+        if self.size == 0 {
+            // Ensure we maintain the same aligned pointer for zero-size
+            let aligned_ptr = PAGE_SIZE as *mut u8;
+            let ptr = unsafe { NonNull::new_unchecked(aligned_ptr) };
+            return Self {
+                ptr,
+                size: 0,
+                layout: self.layout,
+            };
+        }
+
+        let layout = self.layout;
+        let ptr = unsafe {
+            let new_ptr = alloc_zeroed(layout);
+            if new_ptr.is_null() {
+                std::alloc::handle_alloc_error(layout);
+            }
+            std::ptr::copy_nonoverlapping(self.ptr.as_ptr(), new_ptr, self.size);
+            NonNull::new_unchecked(new_ptr)
+        };
+        Self {
+            ptr,
+            size: self.size,
+            layout,
+        }
+    }
+}
+
+impl std::fmt::Debug for BasicMemory {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("BasicMemory")
+            .field("size", &self.size)
+            .field("alignment", &self.layout.align())
+            .finish()
+    }
+}
+
+impl LinearMemory for BasicMemory {
+    fn new(size: usize) -> Self {
+        if size == 0 {
+            // For zero-size allocation, use a dangling pointer with proper alignment
+            // We need to ensure the pointer is aligned to PAGE_SIZE
+            let aligned_ptr = PAGE_SIZE as *mut u8;
+            let ptr = unsafe { NonNull::new_unchecked(aligned_ptr) };
+            let layout = Layout::from_size_align(0, PAGE_SIZE)
+                .expect("Failed to create layout with PAGE_SIZE alignment");
+            return Self {
+                ptr,
+                size: 0,
+                layout,
+            };
+        }
+
+        // Use PAGE_SIZE alignment for consistency with MmapMemory
+        // This also ensures good alignment for any type we might store
+        let layout = Layout::from_size_align(size, PAGE_SIZE)
+            .expect("Failed to create layout with PAGE_SIZE alignment");
+
+        let ptr = unsafe {
+            let raw_ptr = alloc_zeroed(layout);
+            if raw_ptr.is_null() {
+                std::alloc::handle_alloc_error(layout);
+            }
+            NonNull::new_unchecked(raw_ptr)
+        };
+
+        Self { ptr, size, layout }
+    }
+
+    fn size(&self) -> usize {
+        self.size
+    }
+
+    fn as_slice(&self) -> &[u8] {
+        unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.size) }
+    }
+
+    fn as_mut_slice(&mut self) -> &mut [u8] {
+        unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.size) }
+    }
+
+    #[inline(always)]
+    unsafe fn read<BLOCK: Copy>(&self, from: usize) -> BLOCK {
+        let size = std::mem::size_of::<BLOCK>();
+        assert!(
+            from + size <= self.size,
+            "read from={from} of size={size} out of bounds: memory size={}",
+            self.size
+        );
+
+        let src = self.as_ptr().add(from) as *const BLOCK;
+        // SAFETY:
+        // - Bounds check is done via assert above
+        // - We assume `src` is aligned to `BLOCK`
+        // - We assume `BLOCK` is "plain old data" so the underlying `src` bytes is valid to read as
+        //   an initialized value of `BLOCK`
+        core::ptr::read(src)
+    }
+
+    #[inline(always)]
+    unsafe fn read_unaligned<BLOCK: Copy>(&self, from: usize) -> BLOCK {
+        let size = std::mem::size_of::<BLOCK>();
+        assert!(
+            from + size <= self.size,
+            "read_unaligned from={from} of size={size} out of bounds: memory size={}",
+            self.size
+        );
+
+        let src = self.as_ptr().add(from) as *const BLOCK;
+        // SAFETY:
+        // - Bounds check is done via assert above
+        // - We assume `BLOCK` is "plain old data" so the underlying `src` bytes is valid to read as
+        //   an initialized value of `BLOCK`
+        core::ptr::read_unaligned(src)
+    }
+
+    #[inline(always)]
+    unsafe fn write<BLOCK: Copy>(&mut self, start: usize, values: BLOCK) {
+        let size = std::mem::size_of::<BLOCK>();
+        assert!(
+            start + size <= self.size,
+            "write start={start} of size={size} out of bounds: memory size={}",
+            self.size
+        );
+
+        let dst = self.as_mut_ptr().add(start) as *mut BLOCK;
+        // SAFETY:
+        // - Bounds check is done via assert above
+        // - We assume `dst` is aligned to `BLOCK`
+        core::ptr::write(dst, values);
+    }
+
+    #[inline(always)]
+    unsafe fn write_unaligned<BLOCK: Copy>(&mut self, start: usize, values: BLOCK) {
+        let size = std::mem::size_of::<BLOCK>();
+        assert!(
+            start + size <= self.size,
+            "write_unaligned start={start} of size={size} out of bounds: memory size={}",
+            self.size
+        );
+
+        // Use slice's copy_from_slice for safe byte-level copy
+        let src_bytes = std::slice::from_raw_parts(&values as *const BLOCK as *const u8, size);
+        self.as_mut_slice()[start..start + size].copy_from_slice(src_bytes);
+    }
+
+    #[inline(always)]
+    unsafe fn swap<BLOCK: Copy>(&mut self, start: usize, values: &mut BLOCK) {
+        let size = std::mem::size_of::<BLOCK>();
+        assert!(
+            start + size <= self.size,
+            "swap start={start} of size={size} out of bounds: memory size={}",
+            self.size
+        );
+
+        // SAFETY:
+        // - Bounds check is done via assert above
+        // - We assume `start` is aligned to `BLOCK`
+        core::ptr::swap(
+            self.as_mut_ptr().add(start) as *mut BLOCK,
+            values as *mut BLOCK,
+        );
+    }
+
+    #[inline(always)]
+    unsafe fn copy_nonoverlapping<T: Copy>(&mut self, to: usize, data: &[T]) {
+        let byte_len = std::mem::size_of_val(data);
+        assert!(
+            to + byte_len <= self.size,
+            "copy_nonoverlapping to={to} of size={byte_len} out of bounds: memory size={}",
+            self.size
+        );
+
+        // Use slice's copy_from_slice for safe byte-level copy
+        let src_bytes = std::slice::from_raw_parts(data.as_ptr() as *const u8, byte_len);
+        self.as_mut_slice()[to..to + byte_len].copy_from_slice(src_bytes);
+    }
+
+    #[inline(always)]
+    unsafe fn get_aligned_slice<T: Copy>(&self, start: usize, len: usize) -> &[T] {
+        let byte_len = len * std::mem::size_of::<T>();
+        assert!(
+            start + byte_len <= self.size,
+            "get_aligned_slice start={start} of size={byte_len} out of bounds: memory size={}",
+            self.size
+        );
+        assert!(
+            start % std::mem::align_of::<T>() == 0,
+            "get_aligned_slice: misaligned start"
+        );
+
+        let data = self.as_ptr().add(start) as *const T;
+        // SAFETY:
+        // - Bounds check is done via assert above
+        // - Alignment check is done via assert above
+        // - `T` is "plain old data" (POD), so conversion from underlying bytes is properly
+        //   initialized
+        // - `self` will not be mutated while borrowed
+        core::slice::from_raw_parts(data, len)
+    }
+}
+
+// SAFETY: BasicMemory properly manages its allocation and can be sent between threads
+unsafe impl Send for BasicMemory {}
+// SAFETY: BasicMemory has no interior mutability and can be shared between threads
+unsafe impl Sync for BasicMemory {}
diff --git a/crates/vm/src/system/memory/online/memmap.rs b/crates/vm/src/system/memory/online/memmap.rs
new file mode 100644
index 0000000000..709342daec
--- /dev/null
+++ b/crates/vm/src/system/memory/online/memmap.rs
@@ -0,0 +1,200 @@
+use std::fmt::Debug;
+
+use memmap2::MmapMut;
+
+use super::{LinearMemory, PAGE_SIZE};
+
+pub const CELL_STRIDE: usize = 1;
+
+/// Mmap-backed linear memory. OS-memory pages are paged in on-demand and zero-initialized.
+#[derive(Debug)]
+pub struct MmapMemory {
+    mmap: MmapMut,
+}
+
+impl Clone for MmapMemory {
+    fn clone(&self) -> Self {
+        let mut new_mmap = MmapMut::map_anon(self.mmap.len()).unwrap();
+        new_mmap.copy_from_slice(&self.mmap);
+        Self { mmap: new_mmap }
+    }
+}
+
+impl MmapMemory {
+    #[inline(always)]
+    pub fn as_ptr(&self) -> *const u8 {
+        self.mmap.as_ptr()
+    }
+
+    #[inline(always)]
+    pub fn as_mut_ptr(&mut self) -> *mut u8 {
+        self.mmap.as_mut_ptr()
+    }
+}
+
+impl LinearMemory for MmapMemory {
+    /// Create a new MmapMemory with the given `size` in bytes.
+    /// We round `size` up to be a multiple of the mmap page size (4kb by default) so that OS-level
+    /// MMU protection corresponds to out of bounds protection.
+    fn new(mut size: usize) -> Self {
+        size = size.div_ceil(PAGE_SIZE) * PAGE_SIZE;
+        // anonymous mapping means pages are zero-initialized on first use
+        Self {
+            mmap: MmapMut::map_anon(size).unwrap(),
+        }
+    }
+
+    fn size(&self) -> usize {
+        self.mmap.len()
+    }
+
+    fn as_slice(&self) -> &[u8] {
+        &self.mmap
+    }
+
+    fn as_mut_slice(&mut self) -> &mut [u8] {
+        &mut self.mmap
+    }
+
+    #[cfg(target_os = "linux")]
+    fn fill_zero(&mut self) {
+        use libc::{madvise, MADV_DONTNEED};
+
+        let mmap = &mut self.mmap;
+        // SAFETY: our mmap is a memory-backed (not file-backed) anonymous private mapping.
+        // When we madvise MADV_DONTNEED, according to https://man7.org/linux/man-pages/man2/madvise.2.html
+        // > subsequent accesses of pages in the range will succeed, but
+        // > will result in either repopulating the memory contents from
+        // > the up-to-date contents of the underlying mapped file (for
+        // > shared file mappings, shared anonymous mappings, and shmem-
+        // > based techniques such as System V shared memory segments)
+        // > or zero-fill-on-demand pages for anonymous private
+        // > mappings.
+        unsafe {
+            let ret = madvise(
+                mmap.as_ptr() as *mut libc::c_void,
+                mmap.len(),
+                MADV_DONTNEED,
+            );
+            if ret != 0 {
+                // Fallback to write_bytes if madvise fails
+                std::ptr::write_bytes(mmap.as_mut_ptr(), 0, mmap.len());
+            }
+        }
+    }
+
+    #[inline(always)]
+    unsafe fn read<BLOCK: Copy>(&self, from: usize) -> BLOCK {
+        debug_assert!(
+            from + size_of::<BLOCK>() <= self.size(),
+            "read from={from} of size={} out of bounds: memory size={}",
+            size_of::<BLOCK>(),
+            self.size()
+        );
+        let src = self.as_ptr().add(from) as *const BLOCK;
+        // SAFETY:
+        // - MMU will segfault if `src` access is out of bounds.
+        // - We assume `src` is aligned to `BLOCK`
+        // - We assume `BLOCK` is "plain old data" so the underlying `src` bytes is valid to read as
+        //   an initialized value of `BLOCK`
+        core::ptr::read(src)
+    }
+
+    #[inline(always)]
+    unsafe fn read_unaligned<BLOCK: Copy>(&self, from: usize) -> BLOCK {
+        debug_assert!(
+            from + size_of::<BLOCK>() <= self.size(),
+            "read_unaligned from={from} of size={} out of bounds: memory size={}",
+            size_of::<BLOCK>(),
+            self.size()
+        );
+        let src = self.as_ptr().add(from) as *const BLOCK;
+        // SAFETY:
+        // - MMU will segfault if `src` access is out of bounds.
+        // - We assume `BLOCK` is "plain old data" so the underlying `src` bytes is valid to read as
+        //   an initialized value of `BLOCK`
+        core::ptr::read_unaligned(src)
+    }
+
+    #[inline(always)]
+    unsafe fn write<BLOCK: Copy>(&mut self, start: usize, values: BLOCK) {
+        debug_assert!(
+            start + size_of::<BLOCK>() <= self.size(),
+            "write start={start} of size={} out of bounds: memory size={}",
+            size_of::<BLOCK>(),
+            self.size()
+        );
+        let dst = self.as_mut_ptr().add(start) as *mut BLOCK;
+        // SAFETY:
+        // - MMU will segfault if `dst` access is out of bounds.
+        // - We assume `dst` is aligned to `BLOCK`
+        core::ptr::write(dst, values);
+    }
+
+    #[inline(always)]
+    unsafe fn write_unaligned<BLOCK: Copy>(&mut self, start: usize, values: BLOCK) {
+        debug_assert!(
+            start + size_of::<BLOCK>() <= self.size(),
+            "write_unaligned start={start} of size={} out of bounds: memory size={}",
+            size_of::<BLOCK>(),
+            self.size()
+        );
+        let dst = self.as_mut_ptr().add(start) as *mut BLOCK;
+        // SAFETY:
+        // - MMU will segfault if `dst` access is out of bounds.
+        core::ptr::write_unaligned(dst, values);
+    }
+
+    #[inline(always)]
+    unsafe fn swap<BLOCK: Copy>(&mut self, start: usize, values: &mut BLOCK) {
+        debug_assert!(
+            start + size_of::<BLOCK>() <= self.size(),
+            "swap start={start} of size={} out of bounds: memory size={}",
+            size_of::<BLOCK>(),
+            self.size()
+        );
+        // SAFETY:
+        // - MMU will segfault if `start` access is out of bounds.
+        // - We assume `start` is aligned to `BLOCK`
+        core::ptr::swap(
+            self.as_mut_ptr().add(start) as *mut BLOCK,
+            values as *mut BLOCK,
+        );
+    }
+
+    #[inline(always)]
+    unsafe fn copy_nonoverlapping<T: Copy>(&mut self, to: usize, data: &[T]) {
+        debug_assert!(
+            to + size_of_val(data) <= self.size(),
+            "copy_nonoverlapping to={to} of size={} out of bounds: memory size={}",
+            size_of_val(data),
+            self.size()
+        );
+        debug_assert_eq!(PAGE_SIZE % align_of::<T>(), 0);
+        let src = data.as_ptr();
+        let dst = self.as_mut_ptr().add(to) as *mut T;
+        // SAFETY:
+        // - MMU will segfault if `dst..dst + size_of_val(data)` is out of bounds.
+        // - Assumes `to` is aligned to `T` and `self.as_mut_ptr()` is aligned to `T`, which implies
+        //   the same for `dst`.
+        core::ptr::copy_nonoverlapping::<T>(src, dst, data.len());
+    }
+
+    #[inline(always)]
+    unsafe fn get_aligned_slice<T: Copy>(&self, start: usize, len: usize) -> &[T] {
+        debug_assert!(
+            start + len * size_of::<T>() <= self.size(),
+            "get_aligned_slice start={start} of size={} out of bounds: memory size={}",
+            len * size_of::<T>(),
+            self.size()
+        );
+        let data = self.as_ptr().add(start) as *const T;
+        // SAFETY:
+        // - MMU will segfault if `data..data + len * size_of::<T>()` is out of bounds.
+        // - Assumes `data` is aligned to `T`
+        // - `T` is "plain old data" (POD), so conversion from underlying bytes is properly
+        //   initialized
+        // - `self` will not be mutated while borrowed
+        core::slice::from_raw_parts(data, len)
+    }
+}
diff --git a/crates/vm/src/system/memory/online/paged_vec.rs b/crates/vm/src/system/memory/online/paged_vec.rs
new file mode 100644
index 0000000000..30fe77297d
--- /dev/null
+++ b/crates/vm/src/system/memory/online/paged_vec.rs
@@ -0,0 +1,93 @@
+use std::fmt::Debug;
+
+use openvm_stark_backend::p3_maybe_rayon::prelude::*;
+
+#[derive(Debug, Clone)]
+pub struct PagedVec<T, const PAGE_SIZE: usize> {
+    pages: Vec<Option<Box<[T; PAGE_SIZE]>>>,
+}
+
+unsafe impl<T: Send, const PAGE_SIZE: usize> Send for PagedVec<T, PAGE_SIZE> {}
+unsafe impl<T: Sync, const PAGE_SIZE: usize> Sync for PagedVec<T, PAGE_SIZE> {}
+
+impl<T: Copy + Default, const PAGE_SIZE: usize> PagedVec<T, PAGE_SIZE> {
+    #[inline]
+    /// `total_size` is the capacity of elements of type `T`.
+    pub fn new(total_size: usize) -> Self {
+        let num_pages = total_size.div_ceil(PAGE_SIZE);
+        Self {
+            pages: vec![None; num_pages],
+        }
+    }
+
+    #[cold]
+    #[inline(never)]
+    fn create_zeroed_page() -> Box<[T; PAGE_SIZE]> {
+        unsafe {
+            let layout = std::alloc::Layout::array::<T>(PAGE_SIZE).unwrap();
+            let ptr = std::alloc::alloc_zeroed(layout) as *mut [T; PAGE_SIZE];
+            Box::from_raw(ptr)
+        }
+    }
+
+    /// Get value at index without allocating new pages.
+    /// Panics if index is out of bounds. Returns default value if page doesn't exist.
+    #[inline]
+    pub fn get(&self, index: usize) -> T {
+        let page_idx = index / PAGE_SIZE;
+        let offset = index % PAGE_SIZE;
+
+        self.pages[page_idx]
+            .as_ref()
+            .map(|page| unsafe { *page.get_unchecked(offset) })
+            .unwrap_or_default()
+    }
+
+    /// Panics if the index is out of bounds. Creates new page before write when necessary.
+    #[inline]
+    pub fn set(&mut self, index: usize, value: T) {
+        let page_idx = index / PAGE_SIZE;
+        let offset = index % PAGE_SIZE;
+
+        let page = self.pages[page_idx].get_or_insert_with(Self::create_zeroed_page);
+
+        // SAFETY: offset < PAGE_SIZE by construction
+        unsafe {
+            *page.get_unchecked_mut(offset) = value;
+        }
+    }
+
+    pub fn par_iter(&self) -> impl ParallelIterator<Item = (usize, T)> + '_
+    where
+        T: Send + Sync,
+    {
+        self.pages
+            .par_iter()
+            .enumerate()
+            .filter_map(move |(page_idx, page)| {
+                page.as_ref().map(move |p| {
+                    p.par_iter()
+                        .enumerate()
+                        .map(move |(offset, &value)| (page_idx * PAGE_SIZE + offset, value))
+                })
+            })
+            .flatten()
+    }
+
+    pub fn iter(&self) -> impl Iterator<Item = (usize, T)> + '_
+    where
+        T: Send + Sync,
+    {
+        self.pages
+            .iter()
+            .enumerate()
+            .filter_map(move |(page_idx, page)| {
+                page.as_ref().map(move |p| {
+                    p.iter()
+                        .enumerate()
+                        .map(move |(offset, &value)| (page_idx * PAGE_SIZE + offset, value))
+                })
+            })
+            .flatten()
+    }
+}
diff --git a/crates/vm/src/system/memory/paged_vec.rs b/crates/vm/src/system/memory/paged_vec.rs
deleted file mode 100644
index 8a8b030970..0000000000
--- a/crates/vm/src/system/memory/paged_vec.rs
+++ /dev/null
@@ -1,447 +0,0 @@
-use std::{mem::MaybeUninit, ops::Range, ptr};
-
-use serde::{Deserialize, Serialize};
-
-use crate::arch::MemoryConfig;
-
-/// (address_space, pointer)
-pub type Address = (u32, u32);
-pub const PAGE_SIZE: usize = 1 << 12;
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct PagedVec<T, const PAGE_SIZE: usize> {
-    pub pages: Vec<Option<Vec<T>>>,
-}
-
-// ------------------------------------------------------------------
-// Common Helper Functions
-// These functions encapsulate the common logic for copying ranges
-// across pages, both for read-only and read-write (set) cases.
-impl<T: Default + Clone, const PAGE_SIZE: usize> PagedVec<T, PAGE_SIZE> {
-    // Copies a range of length `len` starting at index `start`
-    // into the memory pointed to by `dst`. If the relevant page is not
-    // initialized, fills that portion with T::default().
-    fn read_range_generic(&self, start: usize, len: usize, dst: *mut T) {
-        let start_page = start / PAGE_SIZE;
-        let end_page = (start + len - 1) / PAGE_SIZE;
-        unsafe {
-            if start_page == end_page {
-                let offset = start % PAGE_SIZE;
-                if let Some(page) = self.pages[start_page].as_ref() {
-                    ptr::copy_nonoverlapping(page.as_ptr().add(offset), dst, len);
-                } else {
-                    std::slice::from_raw_parts_mut(dst, len).fill(T::default());
-                }
-            } else {
-                let offset = start % PAGE_SIZE;
-                let first_part = PAGE_SIZE - offset;
-                if let Some(page) = self.pages[start_page].as_ref() {
-                    ptr::copy_nonoverlapping(page.as_ptr().add(offset), dst, first_part);
-                } else {
-                    std::slice::from_raw_parts_mut(dst, first_part).fill(T::default());
-                }
-                let second_part = len - first_part;
-                if let Some(page) = self.pages[end_page].as_ref() {
-                    ptr::copy_nonoverlapping(page.as_ptr(), dst.add(first_part), second_part);
-                } else {
-                    std::slice::from_raw_parts_mut(dst.add(first_part), second_part)
-                        .fill(T::default());
-                }
-            }
-        }
-    }
-
-    // Updates a range of length `len` starting at index `start` with new values.
-    // It copies the current values into the memory pointed to by `dst`
-    // and then writes the new values into the underlying pages,
-    // allocating pages (with defaults) if necessary.
-    fn set_range_generic(&mut self, start: usize, len: usize, new: *const T, dst: *mut T) {
-        let start_page = start / PAGE_SIZE;
-        let end_page = (start + len - 1) / PAGE_SIZE;
-        unsafe {
-            if start_page == end_page {
-                let offset = start % PAGE_SIZE;
-                let page =
-                    self.pages[start_page].get_or_insert_with(|| vec![T::default(); PAGE_SIZE]);
-                ptr::copy_nonoverlapping(page.as_ptr().add(offset), dst, len);
-                ptr::copy_nonoverlapping(new, page.as_mut_ptr().add(offset), len);
-            } else {
-                let offset = start % PAGE_SIZE;
-                let first_part = PAGE_SIZE - offset;
-                {
-                    let page =
-                        self.pages[start_page].get_or_insert_with(|| vec![T::default(); PAGE_SIZE]);
-                    ptr::copy_nonoverlapping(page.as_ptr().add(offset), dst, first_part);
-                    ptr::copy_nonoverlapping(new, page.as_mut_ptr().add(offset), first_part);
-                }
-                let second_part = len - first_part;
-                {
-                    let page =
-                        self.pages[end_page].get_or_insert_with(|| vec![T::default(); PAGE_SIZE]);
-                    ptr::copy_nonoverlapping(page.as_ptr(), dst.add(first_part), second_part);
-                    ptr::copy_nonoverlapping(new.add(first_part), page.as_mut_ptr(), second_part);
-                }
-            }
-        }
-    }
-}
-
-// ------------------------------------------------------------------
-// Implementation for types requiring Default + Clone
-impl<T: Default + Clone, const PAGE_SIZE: usize> PagedVec<T, PAGE_SIZE> {
-    pub fn new(num_pages: usize) -> Self {
-        Self {
-            pages: vec![None; num_pages],
-        }
-    }
-
-    pub fn get(&self, index: usize) -> Option<&T> {
-        let page_idx = index / PAGE_SIZE;
-        self.pages[page_idx]
-            .as_ref()
-            .map(|page| &page[index % PAGE_SIZE])
-    }
-
-    pub fn get_mut(&mut self, index: usize) -> Option<&mut T> {
-        let page_idx = index / PAGE_SIZE;
-        self.pages[page_idx]
-            .as_mut()
-            .map(|page| &mut page[index % PAGE_SIZE])
-    }
-
-    pub fn set(&mut self, index: usize, value: T) -> Option<T> {
-        let page_idx = index / PAGE_SIZE;
-        if let Some(page) = self.pages[page_idx].as_mut() {
-            Some(std::mem::replace(&mut page[index % PAGE_SIZE], value))
-        } else {
-            let page = self.pages[page_idx].get_or_insert_with(|| vec![T::default(); PAGE_SIZE]);
-            page[index % PAGE_SIZE] = value;
-            None
-        }
-    }
-
-    #[inline(always)]
-    pub fn range_vec(&self, range: Range<usize>) -> Vec<T> {
-        let len = range.end - range.start;
-        // Create a vector for uninitialized values.
-        let mut result: Vec<MaybeUninit<T>> = Vec::with_capacity(len);
-        // SAFETY: We set the length and then initialize every element via read_range_generic.
-        unsafe {
-            result.set_len(len);
-            self.read_range_generic(range.start, len, result.as_mut_ptr() as *mut T);
-            std::mem::transmute::<Vec<MaybeUninit<T>>, Vec<T>>(result)
-        }
-    }
-
-    pub fn set_range(&mut self, range: Range<usize>, values: &[T]) -> Vec<T> {
-        let len = range.end - range.start;
-        assert_eq!(values.len(), len);
-        let mut result: Vec<MaybeUninit<T>> = Vec::with_capacity(len);
-        // SAFETY: We will write to every element in result via set_range_generic.
-        unsafe {
-            result.set_len(len);
-            self.set_range_generic(
-                range.start,
-                len,
-                values.as_ptr(),
-                result.as_mut_ptr() as *mut T,
-            );
-            std::mem::transmute::<Vec<MaybeUninit<T>>, Vec<T>>(result)
-        }
-    }
-
-    pub fn memory_size(&self) -> usize {
-        self.pages.len() * PAGE_SIZE
-    }
-
-    pub fn is_empty(&self) -> bool {
-        self.pages.iter().all(|page| page.is_none())
-    }
-}
-
-// ------------------------------------------------------------------
-// Implementation for types requiring Default + Copy
-impl<T: Default + Copy, const PAGE_SIZE: usize> PagedVec<T, PAGE_SIZE> {
-    #[inline(always)]
-    pub fn range_array<const N: usize>(&self, from: usize) -> [T; N] {
-        // Create an uninitialized array of MaybeUninit<T>
-        let mut result: [MaybeUninit<T>; N] = unsafe {
-            // SAFETY: An uninitialized `[MaybeUninit<T>; N]` is valid.
-            MaybeUninit::uninit().assume_init()
-        };
-        self.read_range_generic(from, N, result.as_mut_ptr() as *mut T);
-        // SAFETY: All elements have been initialized.
-        unsafe { ptr::read(&result as *const _ as *const [T; N]) }
-    }
-
-    #[inline(always)]
-    pub fn set_range_array<const N: usize>(&mut self, from: usize, values: &[T; N]) -> [T; N] {
-        // Create an uninitialized array for old values.
-        let mut result: [MaybeUninit<T>; N] = unsafe { MaybeUninit::uninit().assume_init() };
-        self.set_range_generic(from, N, values.as_ptr(), result.as_mut_ptr() as *mut T);
-        unsafe { ptr::read(&result as *const _ as *const [T; N]) }
-    }
-}
-
-impl<T, const PAGE_SIZE: usize> PagedVec<T, PAGE_SIZE> {
-    pub fn iter(&self) -> PagedVecIter<'_, T, PAGE_SIZE> {
-        PagedVecIter {
-            vec: self,
-            current_page: 0,
-            current_index_in_page: 0,
-        }
-    }
-}
-
-pub struct PagedVecIter<'a, T, const PAGE_SIZE: usize> {
-    vec: &'a PagedVec<T, PAGE_SIZE>,
-    current_page: usize,
-    current_index_in_page: usize,
-}
-
-impl<T: Clone, const PAGE_SIZE: usize> Iterator for PagedVecIter<'_, T, PAGE_SIZE> {
-    type Item = (usize, T);
-
-    fn next(&mut self) -> Option<Self::Item> {
-        while self.current_page < self.vec.pages.len()
-            && self.vec.pages[self.current_page].is_none()
-        {
-            self.current_page += 1;
-            debug_assert_eq!(self.current_index_in_page, 0);
-            self.current_index_in_page = 0;
-        }
-        if self.current_page >= self.vec.pages.len() {
-            return None;
-        }
-        let global_index = self.current_page * PAGE_SIZE + self.current_index_in_page;
-
-        let page = self.vec.pages[self.current_page].as_ref()?;
-        let value = page[self.current_index_in_page].clone();
-
-        self.current_index_in_page += 1;
-        if self.current_index_in_page == PAGE_SIZE {
-            self.current_page += 1;
-            self.current_index_in_page = 0;
-        }
-        Some((global_index, value))
-    }
-}
-
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct AddressMap<T, const PAGE_SIZE: usize> {
-    pub paged_vecs: Vec<PagedVec<T, PAGE_SIZE>>,
-    pub as_offset: u32,
-}
-
-impl<T: Clone + Default, const PAGE_SIZE: usize> Default for AddressMap<T, PAGE_SIZE> {
-    fn default() -> Self {
-        Self::from_mem_config(&MemoryConfig::default())
-    }
-}
-
-impl<T: Clone + Default, const PAGE_SIZE: usize> AddressMap<T, PAGE_SIZE> {
-    pub fn new(as_offset: u32, as_cnt: usize, mem_size: usize) -> Self {
-        Self {
-            paged_vecs: vec![PagedVec::new(mem_size.div_ceil(PAGE_SIZE)); as_cnt],
-            as_offset,
-        }
-    }
-    pub fn from_mem_config(mem_config: &MemoryConfig) -> Self {
-        Self::new(
-            mem_config.as_offset,
-            1 << mem_config.as_height,
-            1 << mem_config.pointer_max_bits,
-        )
-    }
-    pub fn items(&self) -> impl Iterator<Item = (Address, T)> + '_ {
-        self.paged_vecs
-            .iter()
-            .enumerate()
-            .flat_map(move |(as_idx, page)| {
-                page.iter()
-                    .map(move |(ptr_idx, x)| ((as_idx as u32 + self.as_offset, ptr_idx as u32), x))
-            })
-    }
-    pub fn get(&self, address: &Address) -> Option<&T> {
-        self.paged_vecs[(address.0 - self.as_offset) as usize].get(address.1 as usize)
-    }
-    pub fn get_mut(&mut self, address: &Address) -> Option<&mut T> {
-        self.paged_vecs[(address.0 - self.as_offset) as usize].get_mut(address.1 as usize)
-    }
-    pub fn insert(&mut self, address: &Address, data: T) -> Option<T> {
-        self.paged_vecs[(address.0 - self.as_offset) as usize].set(address.1 as usize, data)
-    }
-    pub fn is_empty(&self) -> bool {
-        self.paged_vecs.iter().all(|page| page.is_empty())
-    }
-
-    pub fn from_iter(
-        as_offset: u32,
-        as_cnt: usize,
-        mem_size: usize,
-        iter: impl IntoIterator<Item = (Address, T)>,
-    ) -> Self {
-        let mut vec = Self::new(as_offset, as_cnt, mem_size);
-        for (address, data) in iter {
-            vec.insert(&address, data);
-        }
-        vec
-    }
-}
-
-impl<T: Copy + Default, const PAGE_SIZE: usize> AddressMap<T, PAGE_SIZE> {
-    pub fn get_range<const N: usize>(&self, address: &Address) -> [T; N] {
-        self.paged_vecs[(address.0 - self.as_offset) as usize].range_array(address.1 as usize)
-    }
-    pub fn set_range<const N: usize>(&mut self, address: &Address, values: &[T; N]) -> [T; N] {
-        self.paged_vecs[(address.0 - self.as_offset) as usize]
-            .set_range_array(address.1 as usize, values)
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_basic_get_set() {
-        let mut v = PagedVec::<_, 4>::new(3);
-        assert_eq!(v.get(0), None);
-        v.set(0, 42);
-        assert_eq!(v.get(0), Some(&42));
-    }
-
-    #[test]
-    fn test_cross_page_operations() {
-        let mut v = PagedVec::<_, 4>::new(3);
-        v.set(3, 10); // Last element of first page
-        v.set(4, 20); // First element of second page
-        assert_eq!(v.get(3), Some(&10));
-        assert_eq!(v.get(4), Some(&20));
-    }
-
-    #[test]
-    fn test_page_boundaries() {
-        let mut v = PagedVec::<_, 4>::new(2);
-        // Fill first page
-        v.set(0, 1);
-        v.set(1, 2);
-        v.set(2, 3);
-        v.set(3, 4);
-        // Fill second page
-        v.set(4, 5);
-        v.set(5, 6);
-        v.set(6, 7);
-        v.set(7, 8);
-
-        // Verify all values
-        assert_eq!(v.range_vec(0..8), [1, 2, 3, 4, 5, 6, 7, 8]);
-    }
-
-    #[test]
-    fn test_range_cross_page_boundary() {
-        let mut v = PagedVec::<_, 4>::new(2);
-        v.set_range(2..8, &[10, 11, 12, 13, 14, 15]);
-        assert_eq!(v.range_vec(2..8), [10, 11, 12, 13, 14, 15]);
-    }
-
-    #[test]
-    fn test_large_indices() {
-        let mut v = PagedVec::<_, 4>::new(100);
-        let large_index = 399;
-        v.set(large_index, 42);
-        assert_eq!(v.get(large_index), Some(&42));
-    }
-
-    #[test]
-    fn test_range_operations_with_defaults() {
-        let mut v = PagedVec::<_, 4>::new(3);
-        v.set(2, 5);
-        v.set(5, 10);
-
-        // Should include both set values and defaults
-        assert_eq!(v.range_vec(1..7), [0, 5, 0, 0, 10, 0]);
-    }
-
-    #[test]
-    fn test_non_zero_default_type() {
-        let mut v: PagedVec<bool, 4> = PagedVec::new(2);
-        assert_eq!(v.get(0), None); // bool's default
-        v.set(0, true);
-        assert_eq!(v.get(0), Some(&true));
-        assert_eq!(v.get(1), Some(&false)); // because we created the page
-    }
-
-    #[test]
-    fn test_set_range_overlapping_pages() {
-        let mut v = PagedVec::<_, 4>::new(3);
-        let test_data = [1, 2, 3, 4, 5, 6];
-        v.set_range(2..8, &test_data);
-
-        // Verify first page
-        assert_eq!(v.get(2), Some(&1));
-        assert_eq!(v.get(3), Some(&2));
-
-        // Verify second page
-        assert_eq!(v.get(4), Some(&3));
-        assert_eq!(v.get(5), Some(&4));
-        assert_eq!(v.get(6), Some(&5));
-        assert_eq!(v.get(7), Some(&6));
-    }
-
-    #[test]
-    fn test_overlapping_set_ranges() {
-        let mut v = PagedVec::<_, 4>::new(3);
-
-        // Initial set_range
-        v.set_range(0..5, &[1, 2, 3, 4, 5]);
-        assert_eq!(v.range_vec(0..5), [1, 2, 3, 4, 5]);
-
-        // Overlap from beginning
-        v.set_range(0..3, &[10, 20, 30]);
-        assert_eq!(v.range_vec(0..5), [10, 20, 30, 4, 5]);
-
-        // Overlap in middle
-        v.set_range(2..4, &[42, 43]);
-        assert_eq!(v.range_vec(0..5), [10, 20, 42, 43, 5]);
-
-        // Overlap at end
-        v.set_range(4..6, &[91, 92]);
-        assert_eq!(v.range_vec(0..6), [10, 20, 42, 43, 91, 92]);
-    }
-
-    #[test]
-    fn test_overlapping_set_ranges_cross_pages() {
-        let mut v = PagedVec::<_, 4>::new(3);
-
-        // Fill across first two pages
-        v.set_range(0..8, &[1, 2, 3, 4, 5, 6, 7, 8]);
-
-        // Overlap end of first page and start of second
-        v.set_range(2..6, &[21, 22, 23, 24]);
-        assert_eq!(v.range_vec(0..8), [1, 2, 21, 22, 23, 24, 7, 8]);
-
-        // Overlap multiple pages
-        v.set_range(1..7, &[31, 32, 33, 34, 35, 36]);
-        assert_eq!(v.range_vec(0..8), [1, 31, 32, 33, 34, 35, 36, 8]);
-    }
-
-    #[test]
-    fn test_iterator() {
-        let mut v = PagedVec::<_, 4>::new(3);
-
-        v.set_range(4..10, &[1, 2, 3, 4, 5, 6]);
-        let contents: Vec<_> = v.iter().collect();
-        assert_eq!(contents.len(), 8); // two pages
-
-        contents
-            .iter()
-            .take(6)
-            .enumerate()
-            .for_each(|(i, &(idx, val))| {
-                assert_eq!((idx, val), (4 + i, 1 + i));
-            });
-        assert_eq!(contents[6], (10, 0));
-        assert_eq!(contents[7], (11, 0));
-    }
-}
diff --git a/crates/vm/src/system/memory/persistent.rs b/crates/vm/src/system/memory/persistent.rs
index 55a178be4d..eeb22cbfd6 100644
--- a/crates/vm/src/system/memory/persistent.rs
+++ b/crates/vm/src/system/memory/persistent.rs
@@ -13,18 +13,19 @@ use openvm_stark_backend::{
     p3_field::{FieldAlgebra, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
     p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
     rap::{BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    Chip, ChipUsageGetter,
 };
 use rustc_hash::FxHashSet;
+use tracing::instrument;
 
-use super::merkle::SerialReceiver;
+use super::{merkle::SerialReceiver, online::INITIAL_TIMESTAMP, TimestampedValues};
 use crate::{
-    arch::hasher::Hasher,
+    arch::{hasher::Hasher, ADDR_SPACE_OFFSET},
     system::memory::{
         dimensions::MemoryDimensions, offline_checker::MemoryBus, MemoryAddress, MemoryImage,
-        TimestampedEquipartition, INITIAL_TIMESTAMP,
+        TimestampedEquipartition,
     },
 };
 
@@ -92,7 +93,7 @@ impl<const CHUNK: usize, AB: InteractionBuilder> Air<AB> for PersistentBoundaryA
             // direction = -1 => is_final = 1
             local.expand_direction.into(),
             AB::Expr::ZERO,
-            local.address_space - AB::F::from_canonical_u32(self.memory_dims.as_offset),
+            local.address_space - AB::F::from_canonical_u32(ADDR_SPACE_OFFSET),
             local.leaf_label.into(),
         ];
         expand_fields.extend(local.hash.map(Into::into));
@@ -123,18 +124,18 @@ impl<const CHUNK: usize, AB: InteractionBuilder> Air<AB> for PersistentBoundaryA
 
 pub struct PersistentBoundaryChip<F, const CHUNK: usize> {
     pub air: PersistentBoundaryAir<CHUNK>,
-    touched_labels: TouchedLabels<F, CHUNK>,
+    pub touched_labels: TouchedLabels<F, CHUNK>,
     overridden_height: Option<usize>,
 }
 
 #[derive(Debug)]
-enum TouchedLabels<F, const CHUNK: usize> {
+pub enum TouchedLabels<F, const CHUNK: usize> {
     Running(FxHashSet<(u32, u32)>),
     Final(Vec<FinalTouchedLabel<F, CHUNK>>),
 }
 
 #[derive(Debug)]
-struct FinalTouchedLabel<F, const CHUNK: usize> {
+pub struct FinalTouchedLabel<F, const CHUNK: usize> {
     address_space: u32,
     label: u32,
     init_values: [F; CHUNK],
@@ -159,7 +160,15 @@ impl<F: PrimeField32, const CHUNK: usize> TouchedLabels<F, CHUNK> {
             _ => panic!("Cannot touch after finalization"),
         }
     }
-    fn len(&self) -> usize {
+
+    pub fn is_empty(&self) -> bool {
+        match self {
+            TouchedLabels::Running(touched_labels) => touched_labels.is_empty(),
+            TouchedLabels::Final(touched_labels) => touched_labels.is_empty(),
+        }
+    }
+
+    pub fn len(&self) -> usize {
         match self {
             TouchedLabels::Running(touched_labels) => touched_labels.len(),
             TouchedLabels::Final(touched_labels) => touched_labels.len(),
@@ -198,59 +207,51 @@ impl<const CHUNK: usize, F: PrimeField32> PersistentBoundaryChip<F, CHUNK> {
         }
     }
 
-    pub fn finalize<H>(
+    #[instrument(name = "boundary_finalize", level = "debug", skip_all)]
+    pub(crate) fn finalize<H>(
         &mut self,
-        initial_memory: &MemoryImage<F>,
+        initial_memory: &MemoryImage,
+        // Only touched stuff
         final_memory: &TimestampedEquipartition<F, CHUNK>,
-        hasher: &mut H,
+        hasher: &H,
     ) where
         H: Hasher<CHUNK, F> + Sync + for<'a> SerialReceiver<&'a [F]>,
     {
-        match &mut self.touched_labels {
-            TouchedLabels::Running(touched_labels) => {
-                let final_touched_labels: Vec<_> = touched_labels
-                    .par_iter()
-                    .map(|&(address_space, label)| {
-                        let pointer = label * CHUNK as u32;
-                        let init_values = array::from_fn(|i| {
-                            *initial_memory
-                                .get(&(address_space, pointer + i as u32))
-                                .unwrap_or(&F::ZERO)
-                        });
-                        let initial_hash = hasher.hash(&init_values);
-                        let timestamped_values = final_memory.get(&(address_space, label)).unwrap();
-                        let final_hash = hasher.hash(&timestamped_values.values);
-                        FinalTouchedLabel {
-                            address_space,
-                            label,
-                            init_values,
-                            final_values: timestamped_values.values,
-                            init_hash: initial_hash,
-                            final_hash,
-                            final_timestamp: timestamped_values.timestamp,
-                        }
-                    })
-                    .collect();
-                for l in &final_touched_labels {
-                    hasher.receive(&l.init_values);
-                    hasher.receive(&l.final_values);
+        let final_touched_labels: Vec<_> = final_memory
+            .par_iter()
+            .map(|&((addr_space, ptr), ts_values)| {
+                let TimestampedValues { timestamp, values } = ts_values;
+                // SAFETY: addr_space from `final_memory` are all in bounds
+                let init_values = array::from_fn(|i| unsafe {
+                    initial_memory.get_f::<F>(addr_space, ptr + i as u32)
+                });
+                let initial_hash = hasher.hash(&init_values);
+                let final_hash = hasher.hash(&values);
+                FinalTouchedLabel {
+                    address_space: addr_space,
+                    label: ptr / CHUNK as u32,
+                    init_values,
+                    final_values: values,
+                    init_hash: initial_hash,
+                    final_hash,
+                    final_timestamp: timestamp,
                 }
-                self.touched_labels = TouchedLabels::Final(final_touched_labels);
-            }
-            _ => panic!("Cannot finalize after finalization"),
+            })
+            .collect();
+        for l in &final_touched_labels {
+            hasher.receive(&l.init_values);
+            hasher.receive(&l.final_values);
         }
+        self.touched_labels = TouchedLabels::Final(final_touched_labels);
     }
 }
 
-impl<const CHUNK: usize, SC: StarkGenericConfig> Chip<SC> for PersistentBoundaryChip<Val<SC>, CHUNK>
+impl<const CHUNK: usize, RA, SC> Chip<RA, CpuBackend<SC>> for PersistentBoundaryChip<Val<SC>, CHUNK>
 where
+    SC: StarkGenericConfig,
     Val<SC>: PrimeField32,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    fn generate_proving_ctx(&self, _: RA) -> AirProvingContext<CpuBackend<SC>> {
         let trace = {
             let width = PersistentBoundaryCols::<Val<SC>, CHUNK>::width();
             // Boundary AIR should always present in order to fix the AIR ID of merkle AIR.
@@ -265,13 +266,13 @@ where
             }
             let mut rows = Val::<SC>::zero_vec(height * width);
 
-            let touched_labels = match self.touched_labels {
+            let touched_labels = match &self.touched_labels {
                 TouchedLabels::Final(touched_labels) => touched_labels,
                 _ => panic!("Cannot generate trace before finalization"),
             };
 
             rows.par_chunks_mut(2 * width)
-                .zip(touched_labels.into_par_iter())
+                .zip(touched_labels.par_iter())
                 .for_each(|(row, touched_label)| {
                     let (initial_row, final_row) = row.split_at_mut(width);
                     *initial_row.borrow_mut() = PersistentBoundaryCols {
@@ -292,9 +293,9 @@ where
                         timestamp: Val::<SC>::from_canonical_u32(touched_label.final_timestamp),
                     };
                 });
-            RowMajorMatrix::new(rows, width)
+            Arc::new(RowMajorMatrix::new(rows, width))
         };
-        AirProofInput::simple_no_pis(trace)
+        AirProvingContext::simple_no_pis(trace)
     }
 }
 
diff --git a/crates/vm/src/system/memory/tests.rs b/crates/vm/src/system/memory/tests.rs
index 9ebb9306aa..fcdc709c22 100644
--- a/crates/vm/src/system/memory/tests.rs
+++ b/crates/vm/src/system/memory/tests.rs
@@ -1,329 +1,122 @@
-use std::{
-    array,
-    borrow::{Borrow, BorrowMut},
-    sync::Arc,
-};
+use std::{array, fmt::Debug};
 
-use itertools::Itertools;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_poseidon2_air::Poseidon2Config;
-use openvm_stark_backend::{
-    interaction::{BusIndex, InteractionBuilder, PermutationCheckBus},
-    p3_air::{Air, BaseAir},
-    p3_field::{FieldAlgebra, PrimeField32},
-    p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
-    rap::{BaseAirWithPublicValues, PartitionedBaseAir},
-    Chip,
-};
-use openvm_stark_sdk::{
-    config::baby_bear_poseidon2::BabyBearPoseidon2Engine, engine::StarkFriEngine,
-    p3_baby_bear::BabyBear, utils::create_seeded_rng,
-};
-use rand::{
-    prelude::{SliceRandom, StdRng},
-    Rng,
+use openvm_instructions::{
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    NATIVE_AS,
 };
+use openvm_stark_backend::p3_field::FieldAlgebra;
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{distributions::Standard, prelude::Distribution, thread_rng, Rng};
 
-use super::MemoryController;
 use crate::{
-    arch::{
-        testing::{memory::gen_pointer, MEMORY_BUS, MEMORY_MERKLE_BUS, POSEIDON2_DIRECT_BUS},
-        MemoryConfig,
-    },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryBus, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, OfflineMemory, RecordId,
-        },
-        poseidon2::Poseidon2PeripheryChip,
-    },
+    arch::{testing::VmChipTestBuilder, MemoryConfig},
+    system::memory::{merkle::public_values::PUBLIC_VALUES_AS, online::TracingMemory},
 };
 
-const MAX: usize = 32;
-const RANGE_CHECKER_BUS: BusIndex = 3;
-
-#[repr(C)]
-#[derive(AlignedBorrow)]
-struct MemoryRequesterCols<T> {
-    address_space: T,
-    pointer: T,
-    data_1: [T; 1],
-    data_4: [T; 4],
-    data_max: [T; MAX],
-    timestamp: T,
-    write_1_aux: MemoryWriteAuxCols<T, 1>,
-    write_4_aux: MemoryWriteAuxCols<T, 4>,
-    read_1_aux: MemoryReadAuxCols<T>,
-    read_4_aux: MemoryReadAuxCols<T>,
-    read_max_aux: MemoryReadAuxCols<T>,
-    is_write_1: T,
-    is_write_4: T,
-    is_read_1: T,
-    is_read_4: T,
-    is_read_max: T,
-}
-
-struct MemoryRequesterAir {
-    memory_bridge: MemoryBridge,
-}
-
-impl<T> BaseAirWithPublicValues<T> for MemoryRequesterAir {}
-impl<T> PartitionedBaseAir<T> for MemoryRequesterAir {}
-impl<T> BaseAir<T> for MemoryRequesterAir {
-    fn width(&self) -> usize {
-        MemoryRequesterCols::<T>::width()
-    }
-}
+type F = BabyBear;
 
-impl<AB: InteractionBuilder> Air<AB> for MemoryRequesterAir {
-    fn eval(&self, builder: &mut AB) {
-        let main = builder.main();
-        let local = main.row_slice(0);
-        let local: &MemoryRequesterCols<AB::Var> = (*local).borrow();
-
-        let flags = [
-            local.is_read_1,
-            local.is_write_1,
-            local.is_read_4,
-            local.is_write_4,
-            local.is_read_max,
-        ];
+fn test_memory_write_by_tester(mut tester: VmChipTestBuilder<F>, its: usize) {
+    let mut rng = create_seeded_rng();
 
-        let mut sum = AB::Expr::ZERO;
-        for flag in flags {
-            builder.assert_bool(flag);
-            sum += flag.into();
+    // The point here is to have a lot of equal
+    // and intersecting/overlapping blocks,
+    // by limiting the space of valid pointers.
+    let max_ptr = 20;
+    let aligns = [4, 4, 4, 1];
+    let value_bounds = [256, 256, 256, (1 << 30)];
+    let max_log_block_size = 4;
+    for _ in 0..its {
+        let addr_sp = rng.gen_range(1..=aligns.len());
+        let align: usize = aligns[addr_sp - 1];
+        let value_bound: u32 = value_bounds[addr_sp - 1];
+        let ptr = rng.gen_range(0..max_ptr / align) * align;
+        let log_len = rng.gen_range(align.trailing_zeros()..=max_log_block_size);
+        match log_len {
+            0 => tester.write::<1>(
+                addr_sp,
+                ptr,
+                array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..value_bound))),
+            ),
+            1 => tester.write::<2>(
+                addr_sp,
+                ptr,
+                array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..value_bound))),
+            ),
+            2 => tester.write::<4>(
+                addr_sp,
+                ptr,
+                array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..value_bound))),
+            ),
+            3 => tester.write::<8>(
+                addr_sp,
+                ptr,
+                array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..value_bound))),
+            ),
+            4 => tester.write::<16>(
+                addr_sp,
+                ptr,
+                array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..value_bound))),
+            ),
+            _ => unreachable!(),
         }
-        builder.assert_one(sum);
-
-        self.memory_bridge
-            .read(
-                MemoryAddress::new(local.address_space, local.pointer),
-                local.data_1,
-                local.timestamp,
-                &local.read_1_aux,
-            )
-            .eval(builder, local.is_read_1);
-
-        self.memory_bridge
-            .read(
-                MemoryAddress::new(local.address_space, local.pointer),
-                local.data_4,
-                local.timestamp,
-                &local.read_4_aux,
-            )
-            .eval(builder, local.is_read_4);
-
-        self.memory_bridge
-            .write(
-                MemoryAddress::new(local.address_space, local.pointer),
-                local.data_1,
-                local.timestamp,
-                &local.write_1_aux,
-            )
-            .eval(builder, local.is_write_1);
-
-        self.memory_bridge
-            .write(
-                MemoryAddress::new(local.address_space, local.pointer),
-                local.data_4,
-                local.timestamp,
-                &local.write_4_aux,
-            )
-            .eval(builder, local.is_write_4);
-
-        self.memory_bridge
-            .read(
-                MemoryAddress::new(local.address_space, local.pointer),
-                local.data_max,
-                local.timestamp,
-                &local.read_max_aux,
-            )
-            .eval(builder, local.is_read_max);
     }
-}
-
-fn generate_trace<F: PrimeField32>(
-    records: Vec<RecordId>,
-    offline_memory: &OfflineMemory<F>,
-) -> RowMajorMatrix<F> {
-    let height = records.len().next_power_of_two();
-    let width = MemoryRequesterCols::<F>::width();
-    let mut values = F::zero_vec(height * width);
-
-    let aux_factory = offline_memory.aux_cols_factory();
 
-    for (row, record_id) in values.chunks_mut(width).zip(records) {
-        let record = offline_memory.record_by_id(record_id).clone();
-
-        let row: &mut MemoryRequesterCols<F> = row.borrow_mut();
-        row.address_space = record.address_space;
-        row.pointer = record.pointer;
-        row.timestamp = F::from_canonical_u32(record.timestamp);
-
-        match (record.data_slice().len(), &record.prev_data_slice()) {
-            (1, &None) => {
-                aux_factory.generate_read_aux(&record, &mut row.read_1_aux);
-                row.data_1 = record.data_slice().try_into().unwrap();
-                row.is_read_1 = F::ONE;
-            }
-            (1, &Some(_)) => {
-                aux_factory.generate_write_aux(&record, &mut row.write_1_aux);
-                row.data_1 = record.data_slice().try_into().unwrap();
-                row.is_write_1 = F::ONE;
-            }
-            (4, &None) => {
-                aux_factory.generate_read_aux(&record, &mut row.read_4_aux);
-                row.data_4 = record.data_slice().try_into().unwrap();
-                row.is_read_4 = F::ONE;
-            }
-            (4, &Some(_)) => {
-                aux_factory.generate_write_aux(&record, &mut row.write_4_aux);
-                row.data_4 = record.data_slice().try_into().unwrap();
-                row.is_write_4 = F::ONE;
-            }
-            (MAX, &None) => {
-                aux_factory.generate_read_aux(&record, &mut row.read_max_aux);
-                row.data_max = record.data_slice().try_into().unwrap();
-                row.is_read_max = F::ONE;
-            }
-            _ => panic!("unexpected pattern"),
-        }
-    }
-    RowMajorMatrix::new(values, width)
+    let tester = tester.build().finalize();
+    tester.simple_test().expect("Verification failed");
 }
 
-/// Simple integration test for memory chip.
-///
-/// Creates a bunch of random read/write records, used to generate a trace for [MemoryRequesterAir],
-/// which sends reads/writes over [MemoryBridge].
 #[test]
-fn test_memory_controller() {
-    let memory_bus = MemoryBus::new(MEMORY_BUS);
-    let memory_config = MemoryConfig::default();
-    let range_bus = VariableRangeCheckerBus::new(RANGE_CHECKER_BUS, memory_config.decomp);
-    let range_checker = SharedVariableRangeCheckerChip::new(range_bus);
-
-    let mut memory_controller =
-        MemoryController::with_volatile_memory(memory_bus, memory_config, range_checker.clone());
-
-    let mut rng = create_seeded_rng();
-    let records = make_random_accesses(&mut memory_controller, &mut rng);
-    let memory_requester_air = Arc::new(MemoryRequesterAir {
-        memory_bridge: memory_controller.memory_bridge(),
-    });
-
-    memory_controller.finalize(None::<&mut Poseidon2PeripheryChip<BabyBear>>);
-
-    let memory_requester_trace = {
-        let offline_memory = memory_controller.offline_memory();
-        let trace = generate_trace(records, &offline_memory.lock().unwrap());
-        trace
-    };
-
-    let mut airs = memory_controller.airs();
-    let mut air_proof_inputs = memory_controller.generate_air_proof_inputs();
-    airs.push(memory_requester_air);
-    air_proof_inputs.push(AirProofInput::simple_no_pis(memory_requester_trace));
-    airs.push(range_checker.air());
-    air_proof_inputs.push(range_checker.generate_air_proof_input());
-
-    BabyBearPoseidon2Engine::run_test_fast(airs, air_proof_inputs).expect("Verification failed");
+fn test_memory_write_volatile() {
+    test_memory_write_by_tester(
+        VmChipTestBuilder::<F>::volatile(MemoryConfig::default()),
+        1000,
+    );
+    test_memory_write_by_tester(VmChipTestBuilder::<F>::volatile(MemoryConfig::default()), 0);
 }
 
 #[test]
-fn test_memory_controller_persistent() {
-    let memory_bus = MemoryBus::new(MEMORY_BUS);
-    let merkle_bus = PermutationCheckBus::new(MEMORY_MERKLE_BUS);
-    let compression_bus = PermutationCheckBus::new(POSEIDON2_DIRECT_BUS);
-    let memory_config = MemoryConfig::default();
-    let range_bus = VariableRangeCheckerBus::new(RANGE_CHECKER_BUS, memory_config.decomp);
-    let range_checker = SharedVariableRangeCheckerChip::new(range_bus);
-
-    let mut memory_controller = MemoryController::with_persistent_memory(
-        memory_bus,
-        memory_config,
-        range_checker.clone(),
-        merkle_bus,
-        compression_bus,
+fn test_memory_write_persistent() {
+    test_memory_write_by_tester(
+        VmChipTestBuilder::<F>::persistent(MemoryConfig::default()),
+        1000,
     );
+    test_memory_write_by_tester(
+        VmChipTestBuilder::<F>::persistent(MemoryConfig::default()),
+        0,
+    );
+}
 
-    let mut rng = create_seeded_rng();
-    let records = make_random_accesses(&mut memory_controller, &mut rng);
-
-    let memory_requester_air = MemoryRequesterAir {
-        memory_bridge: memory_controller.memory_bridge(),
-    };
-
-    let mut poseidon_chip =
-        Poseidon2PeripheryChip::new(Poseidon2Config::default(), POSEIDON2_DIRECT_BUS, 3);
-
-    memory_controller.finalize(Some(&mut poseidon_chip));
-
-    let memory_requester_trace = {
-        let offline_memory = memory_controller.offline_memory();
-        let trace = generate_trace(records, &offline_memory.lock().unwrap());
-        trace
-    };
+fn test_no_adapter_records_for_singleton_accesses<T, const BLOCK_SIZE: usize>(address_space: u32)
+where
+    T: Copy + Debug,
+    Standard: Distribution<T>,
+{
+    let memory_config = MemoryConfig::default();
+    let mut memory = TracingMemory::new(&memory_config, BLOCK_SIZE, 0);
+    let max_ptr = (memory_config.addr_spaces[address_space as usize].num_cells / BLOCK_SIZE) as u32;
 
-    let mut airs = memory_controller.airs();
-    let mut air_proof_inputs = memory_controller.generate_air_proof_inputs();
-    airs.extend([
-        Arc::new(memory_requester_air),
-        poseidon_chip.air(),
-        range_checker.air(),
-    ]);
-    air_proof_inputs.extend([
-        AirProofInput::simple_no_pis(memory_requester_trace),
-        poseidon_chip.generate_air_proof_input(),
-        range_checker.generate_air_proof_input(),
-    ]);
+    let mut rng = thread_rng();
+    for _ in 0..1000 {
+        let pointer = rng.gen_range(0..max_ptr) * BLOCK_SIZE as u32;
 
-    BabyBearPoseidon2Engine::run_test_fast(airs, air_proof_inputs).expect("Verification failed");
+        if rng.gen_bool(0.5) {
+            let data: [T; BLOCK_SIZE] = array::from_fn(|_| rng.gen());
+            unsafe {
+                memory.write::<T, BLOCK_SIZE, BLOCK_SIZE>(address_space, pointer, data);
+            }
+        } else {
+            unsafe {
+                memory.read::<T, BLOCK_SIZE, BLOCK_SIZE>(address_space, pointer);
+            }
+        }
+    }
+    assert!(memory.access_adapter_records.allocated().is_empty());
 }
 
-fn make_random_accesses<F: PrimeField32>(
-    memory_controller: &mut MemoryController<F>,
-    mut rng: &mut StdRng,
-) -> Vec<RecordId> {
-    (0..1024)
-        .map(|_| {
-            let address_space = F::from_canonical_u32(*[1, 2].choose(&mut rng).unwrap());
-
-            match rng.gen_range(0..5) {
-                0 => {
-                    let pointer = F::from_canonical_usize(gen_pointer(rng, 1));
-                    let data = F::from_canonical_u32(rng.gen_range(0..1 << 30));
-                    let (record_id, _) = memory_controller.write(address_space, pointer, [data]);
-                    record_id
-                }
-                1 => {
-                    let pointer = F::from_canonical_usize(gen_pointer(rng, 1));
-                    let (record_id, _) = memory_controller.read::<1>(address_space, pointer);
-                    record_id
-                }
-                2 => {
-                    let pointer = F::from_canonical_usize(gen_pointer(rng, 4));
-                    let (record_id, _) = memory_controller.read::<4>(address_space, pointer);
-                    record_id
-                }
-                3 => {
-                    let pointer = F::from_canonical_usize(gen_pointer(rng, 4));
-                    let data = array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..1 << 30)));
-                    let (record_id, _) = memory_controller.write::<4>(address_space, pointer, data);
-                    record_id
-                }
-                4 => {
-                    let pointer = F::from_canonical_usize(gen_pointer(rng, MAX));
-                    let (record_id, _) = memory_controller.read::<MAX>(address_space, pointer);
-                    record_id
-                }
-                _ => unreachable!(),
-            }
-        })
-        .collect_vec()
+#[test]
+fn test_no_adapter_records() {
+    test_no_adapter_records_for_singleton_accesses::<u8, 4>(RV32_REGISTER_AS);
+    test_no_adapter_records_for_singleton_accesses::<u8, 4>(RV32_MEMORY_AS);
+    test_no_adapter_records_for_singleton_accesses::<u8, 4>(PUBLIC_VALUES_AS);
+    test_no_adapter_records_for_singleton_accesses::<F, 1>(NATIVE_AS);
 }
diff --git a/crates/vm/src/system/memory/tree/mod.rs b/crates/vm/src/system/memory/tree/mod.rs
deleted file mode 100644
index fcdb86d8ee..0000000000
--- a/crates/vm/src/system/memory/tree/mod.rs
+++ /dev/null
@@ -1,177 +0,0 @@
-pub mod public_values;
-
-use std::{ops::Range, sync::Arc};
-
-use openvm_stark_backend::{p3_field::PrimeField32, p3_maybe_rayon::prelude::*};
-use MemoryNode::*;
-
-use super::controller::dimensions::MemoryDimensions;
-use crate::{
-    arch::hasher::{Hasher, HasherChip},
-    system::memory::MemoryImage,
-};
-
-#[derive(Clone, Debug, PartialEq)]
-pub enum MemoryNode<const CHUNK: usize, F: PrimeField32> {
-    Leaf {
-        values: [F; CHUNK],
-    },
-    NonLeaf {
-        hash: [F; CHUNK],
-        left: Arc<MemoryNode<CHUNK, F>>,
-        right: Arc<MemoryNode<CHUNK, F>>,
-    },
-}
-
-impl<const CHUNK: usize, F: PrimeField32> MemoryNode<CHUNK, F> {
-    pub fn hash(&self) -> [F; CHUNK] {
-        match self {
-            Leaf { values: hash } => *hash,
-            NonLeaf { hash, .. } => *hash,
-        }
-    }
-
-    pub fn new_leaf(values: [F; CHUNK]) -> Self {
-        Leaf { values }
-    }
-
-    pub fn new_nonleaf(
-        left: Arc<MemoryNode<CHUNK, F>>,
-        right: Arc<MemoryNode<CHUNK, F>>,
-        hasher: &mut impl HasherChip<CHUNK, F>,
-    ) -> Self {
-        NonLeaf {
-            hash: hasher.compress_and_record(&left.hash(), &right.hash()),
-            left,
-            right,
-        }
-    }
-
-    /// Returns a tree of height `height` with all leaves set to `leaf_value`.
-    pub fn construct_uniform(
-        height: usize,
-        leaf_value: [F; CHUNK],
-        hasher: &impl Hasher<CHUNK, F>,
-    ) -> MemoryNode<CHUNK, F> {
-        if height == 0 {
-            Self::new_leaf(leaf_value)
-        } else {
-            let child = Arc::new(Self::construct_uniform(height - 1, leaf_value, hasher));
-            NonLeaf {
-                hash: hasher.compress(&child.hash(), &child.hash()),
-                left: child.clone(),
-                right: child,
-            }
-        }
-    }
-
-    fn from_memory(
-        memory: &[(u64, F)],
-        lookup_range: Range<usize>,
-        length: u64,
-        from: u64,
-        hasher: &(impl Hasher<CHUNK, F> + Sync),
-        zero_leaf: &MemoryNode<CHUNK, F>,
-    ) -> MemoryNode<CHUNK, F> {
-        if length == CHUNK as u64 {
-            if lookup_range.is_empty() {
-                zero_leaf.clone()
-            } else {
-                debug_assert_eq!(memory[lookup_range.start].0, from);
-                let mut values = [F::ZERO; CHUNK];
-                for (index, value) in memory[lookup_range].iter() {
-                    values[(index % CHUNK as u64) as usize] = *value;
-                }
-                MemoryNode::new_leaf(hasher.hash(&values))
-            }
-        } else if lookup_range.is_empty() {
-            let leaf_value = hasher.hash(&[F::ZERO; CHUNK]);
-            MemoryNode::construct_uniform(
-                (length / CHUNK as u64).trailing_zeros() as usize,
-                leaf_value,
-                hasher,
-            )
-        } else {
-            let midpoint = from + length / 2;
-            let mid = {
-                let mut left = lookup_range.start;
-                let mut right = lookup_range.end;
-                if memory[left].0 >= midpoint {
-                    left
-                } else {
-                    while left + 1 < right {
-                        let mid = left + (right - left) / 2;
-                        if memory[mid].0 < midpoint {
-                            left = mid;
-                        } else {
-                            right = mid;
-                        }
-                    }
-                    right
-                }
-            };
-            let (left, right) = join(
-                || {
-                    Self::from_memory(
-                        memory,
-                        lookup_range.start..mid,
-                        length >> 1,
-                        from,
-                        hasher,
-                        zero_leaf,
-                    )
-                },
-                || {
-                    Self::from_memory(
-                        memory,
-                        mid..lookup_range.end,
-                        length >> 1,
-                        midpoint,
-                        hasher,
-                        zero_leaf,
-                    )
-                },
-            );
-            NonLeaf {
-                hash: hasher.compress(&left.hash(), &right.hash()),
-                left: Arc::new(left),
-                right: Arc::new(right),
-            }
-        }
-    }
-
-    pub fn tree_from_memory(
-        memory_dimensions: MemoryDimensions,
-        memory: &MemoryImage<F>,
-        hasher: &(impl Hasher<CHUNK, F> + Sync),
-    ) -> MemoryNode<CHUNK, F> {
-        // Construct a Vec that includes the address space in the label calculation,
-        // representing the entire memory tree.
-        let memory_items = memory
-            .items()
-            .filter(|((_, ptr), _)| *ptr as usize / CHUNK < (1 << memory_dimensions.address_height))
-            .map(|((address_space, pointer), value)| {
-                (
-                    memory_dimensions.label_to_index((address_space, pointer / CHUNK as u32))
-                        * CHUNK as u64
-                        + (pointer % CHUNK as u32) as u64,
-                    value,
-                )
-            })
-            .collect::<Vec<_>>();
-        debug_assert!(memory_items.is_sorted_by_key(|(addr, _)| addr));
-        debug_assert!(
-            memory_items.last().map_or(0, |(addr, _)| *addr)
-                < ((CHUNK as u64) << memory_dimensions.overall_height())
-        );
-        let zero_leaf = MemoryNode::new_leaf(hasher.hash(&[F::ZERO; CHUNK]));
-        Self::from_memory(
-            &memory_items,
-            0..memory_items.len(),
-            (CHUNK as u64) << memory_dimensions.overall_height(),
-            0,
-            hasher,
-            &zero_leaf,
-        )
-    }
-}
diff --git a/crates/vm/src/system/memory/volatile/mod.rs b/crates/vm/src/system/memory/volatile/mod.rs
index e01162c789..9296c91247 100644
--- a/crates/vm/src/system/memory/volatile/mod.rs
+++ b/crates/vm/src/system/memory/volatile/mod.rs
@@ -21,11 +21,12 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
     p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
     rap::{BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    Chip, ChipUsageGetter,
 };
 use static_assertions::const_assert;
+use tracing::instrument;
 
 use super::TimestampedEquipartition;
 use crate::system::memory::{
@@ -183,7 +184,7 @@ pub struct VolatileBoundaryChip<F> {
     pub air: VolatileBoundaryAir,
     range_checker: SharedVariableRangeCheckerChip,
     overridden_height: Option<usize>,
-    final_memory: Option<TimestampedEquipartition<F, 1>>,
+    pub final_memory: Option<TimestampedEquipartition<F, 1>>,
     addr_space_max_bits: usize,
     pointer_max_bits: usize,
 }
@@ -218,27 +219,26 @@ impl<F: PrimeField32> VolatileBoundaryChip<F> {
     }
     /// Volatile memory requires the starting and final memory to be in equipartition with block
     /// size `1`. When block size is `1`, then the `label` is the same as the address pointer.
+    #[instrument(name = "boundary_finalize", level = "debug", skip_all)]
     pub fn finalize(&mut self, final_memory: TimestampedEquipartition<F, 1>) {
         self.final_memory = Some(final_memory);
     }
 }
 
-impl<SC: StarkGenericConfig> Chip<SC> for VolatileBoundaryChip<Val<SC>>
+impl<RA, SC: StarkGenericConfig> Chip<RA, CpuBackend<SC>> for VolatileBoundaryChip<Val<SC>>
 where
     Val<SC>: PrimeField32,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    fn generate_proving_ctx(&self, _: RA) -> AirProvingContext<CpuBackend<SC>> {
         // Volatile memory requires the starting and final memory to be in equipartition with block
         // size `1`. When block size is `1`, then the `label` is the same as the address
         // pointer.
         let width = self.trace_width();
-        let air = Arc::new(self.air);
+        let addr_lt_air = &self.air.addr_lt_air;
+        // TEMP[jpw]: clone
         let final_memory = self
             .final_memory
+            .clone()
             .expect("Trace generation should be called after finalize");
         let trace_height = if let Some(height) = self.overridden_height {
             assert!(
@@ -279,7 +279,7 @@ where
                 if i != memory_len - 1 {
                     let (next_addr_space, next_ptr) = sorted_final_memory[i + 1].0;
                     let mut out = Val::<SC>::ZERO;
-                    air.addr_lt_air.0.generate_subrow(
+                    addr_lt_air.0.generate_subrow(
                         (
                             self.range_checker.as_ref(),
                             &[
@@ -300,7 +300,7 @@ where
         if memory_len > 0 {
             let mut out = Val::<SC>::ZERO;
             let row: &mut VolatileBoundaryCols<_> = rows[width * (trace_height - 1)..].borrow_mut();
-            air.addr_lt_air.0.generate_subrow(
+            addr_lt_air.0.generate_subrow(
                 (
                     self.range_checker.as_ref(),
                     &[Val::<SC>::ZERO, Val::<SC>::ZERO],
@@ -310,8 +310,8 @@ where
             );
         }
 
-        let trace = RowMajorMatrix::new(rows, width);
-        AirProofInput::simple_no_pis(trace)
+        let trace = Arc::new(RowMajorMatrix::new(rows, width));
+        AirProvingContext::simple_no_pis(trace)
     }
 }
 
diff --git a/crates/vm/src/system/memory/volatile/tests.rs b/crates/vm/src/system/memory/volatile/tests.rs
index 29917d219d..a0c484793b 100644
--- a/crates/vm/src/system/memory/volatile/tests.rs
+++ b/crates/vm/src/system/memory/volatile/tests.rs
@@ -1,11 +1,12 @@
 use std::{collections::HashSet, iter, sync::Arc};
 
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
+use openvm_circuit_primitives::var_range::{VariableRangeCheckerBus, VariableRangeCheckerChip};
 use openvm_stark_backend::{
-    interaction::BusIndex, p3_field::FieldAlgebra, p3_matrix::dense::RowMajorMatrix,
-    prover::types::AirProofInput, Chip,
+    interaction::BusIndex,
+    p3_field::FieldAlgebra,
+    p3_matrix::dense::RowMajorMatrix,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
+    AirRef, Chip,
 };
 use openvm_stark_sdk::{
     config::baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
@@ -45,7 +46,7 @@ fn boundary_air_test() {
     }
 
     let range_bus = VariableRangeCheckerBus::new(RANGE_CHECKER_BUS, DECOMP);
-    let range_checker = SharedVariableRangeCheckerChip::new(range_bus);
+    let range_checker = Arc::new(VariableRangeCheckerChip::new(range_bus));
     let mut boundary_chip =
         VolatileBoundaryChip::new(memory_bus, 2, LIMB_BITS, range_checker.clone());
 
@@ -55,21 +56,22 @@ fn boundary_air_test() {
         let final_data = Val::from_canonical_u32(rng.gen_range(0..MAX_VAL));
         let final_clk = rng.gen_range(1..MAX_VAL) as u32;
 
-        final_memory.insert(
+        final_memory.push((
             (addr_space, pointer),
             TimestampedValues {
                 values: [final_data],
                 timestamp: final_clk,
             },
-        );
+        ));
     }
+    final_memory.sort_by_key(|(key, _)| *key);
 
     let diff_height = num_addresses.next_power_of_two() - num_addresses;
 
     let init_memory_dummy_air = DummyInteractionAir::new(4, false, MEMORY_BUS);
     let final_memory_dummy_air = DummyInteractionAir::new(4, true, MEMORY_BUS);
 
-    let init_memory_trace = RowMajorMatrix::new(
+    let init_memory_trace = Arc::new(RowMajorMatrix::new(
         distinct_addresses
             .iter()
             .flat_map(|(addr_space, pointer)| {
@@ -84,13 +86,16 @@ fn boundary_air_test() {
             .chain(iter::repeat_n(Val::ZERO, 5 * diff_height))
             .collect(),
         5,
-    );
+    ));
 
-    let final_memory_trace = RowMajorMatrix::new(
+    let final_memory_trace = Arc::new(RowMajorMatrix::new(
         distinct_addresses
             .iter()
             .flat_map(|(addr_space, pointer)| {
-                let timestamped_value = final_memory.get(&(*addr_space, *pointer)).unwrap();
+                let timestamped_value = final_memory[final_memory
+                    .binary_search_by(|(key, _)| key.cmp(&(*addr_space, *pointer)))
+                    .unwrap()]
+                .1;
 
                 vec![
                     Val::ONE,
@@ -103,24 +108,24 @@ fn boundary_air_test() {
             .chain(iter::repeat_n(Val::ZERO, 5 * diff_height))
             .collect(),
         5,
-    );
+    ));
 
     boundary_chip.finalize(final_memory.clone());
-    let boundary_air = boundary_chip.air();
-    let boundary_api: AirProofInput<BabyBearPoseidon2Config> =
-        boundary_chip.generate_air_proof_input();
+    let boundary_air = Arc::new(boundary_chip.air.clone()) as AirRef<_>;
+    let boundary_ctx: AirProvingContext<CpuBackend<BabyBearPoseidon2Config>> =
+        boundary_chip.generate_proving_ctx(());
     // test trace height override
     {
-        let overridden_height = boundary_api.main_trace_height() * 2;
-        let range_checker = SharedVariableRangeCheckerChip::new(range_bus);
+        let overridden_height = boundary_ctx.main_trace_height() * 2;
+        let range_checker = Arc::new(VariableRangeCheckerChip::new(range_bus));
         let mut boundary_chip =
             VolatileBoundaryChip::new(memory_bus, 2, LIMB_BITS, range_checker.clone());
         boundary_chip.set_overridden_height(overridden_height);
         boundary_chip.finalize(final_memory.clone());
-        let boundary_api: AirProofInput<BabyBearPoseidon2Config> =
-            boundary_chip.generate_air_proof_input();
+        let boundary_ctx: AirProvingContext<CpuBackend<BabyBearPoseidon2Config>> =
+            boundary_chip.generate_proving_ctx(());
         assert_eq!(
-            boundary_api.main_trace_height(),
+            boundary_ctx.main_trace_height(),
             overridden_height.next_power_of_two()
         );
     }
@@ -128,15 +133,15 @@ fn boundary_air_test() {
     BabyBearPoseidon2Engine::run_test_fast(
         vec![
             boundary_air,
-            range_checker.air(),
+            Arc::new(range_checker.air),
             Arc::new(init_memory_dummy_air),
             Arc::new(final_memory_dummy_air),
         ],
         vec![
-            boundary_api,
-            range_checker.generate_air_proof_input(),
-            AirProofInput::simple_no_pis(init_memory_trace),
-            AirProofInput::simple_no_pis(final_memory_trace),
+            boundary_ctx,
+            range_checker.generate_proving_ctx(()),
+            AirProvingContext::simple_no_pis(init_memory_trace),
+            AirProvingContext::simple_no_pis(final_memory_trace),
         ],
     )
     .expect("Verification failed");
diff --git a/crates/vm/src/system/mod.rs b/crates/vm/src/system/mod.rs
index a1038ac86a..6f198f4bbf 100644
--- a/crates/vm/src/system/mod.rs
+++ b/crates/vm/src/system/mod.rs
@@ -1,11 +1,623 @@
+use std::sync::Arc;
+
+use derive_more::derive::From;
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
+use openvm_circuit_primitives::var_range::{
+    SharedVariableRangeCheckerChip, VariableRangeCheckerAir, VariableRangeCheckerBus,
+    VariableRangeCheckerChip,
+};
+use openvm_instructions::{
+    LocalOpcode, PhantomDiscriminant, PublishOpcode, SysPhantom, SystemOpcode,
+};
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    interaction::{LookupBus, PermutationCheckBus},
+    p3_field::{Field, PrimeField32},
+    prover::{
+        cpu::{CpuBackend, CpuDevice},
+        hal::{MatrixDimensions, ProverBackend},
+        types::{AirProvingContext, CommittedTraceData},
+    },
+    AirRef, Chip,
+};
+use rustc_hash::FxHashMap;
+
+use self::{connector::VmConnectorAir, program::ProgramAir, public_values::PublicValuesAir};
+use crate::{
+    arch::{
+        vm_poseidon2_config, AirInventory, AirInventoryError, BusIndexManager, ChipInventory,
+        ChipInventoryError, DenseRecordArena, ExecutionBridge, ExecutionBus, ExecutionState,
+        ExecutorInventory, ExecutorInventoryError, MatrixRecordArena, PhantomSubExecutor,
+        RowMajorMatrixArena, SystemConfig, VmAirWrapper, VmBuilder, VmChipComplex, VmChipWrapper,
+        VmCircuitConfig, VmExecutionConfig, CONNECTOR_AIR_ID, PROGRAM_AIR_ID, PUBLIC_VALUES_AIR_ID,
+    },
+    system::{
+        connector::VmConnectorChip,
+        memory::{
+            interface::MemoryInterfaceAirs,
+            offline_checker::{MemoryBridge, MemoryBus},
+            online::GuestMemory,
+            MemoryAirInventory, MemoryController, TimestampedEquipartition, CHUNK,
+        },
+        native_adapter::{NativeAdapterAir, NativeAdapterExecutor},
+        phantom::{
+            CycleEndPhantomExecutor, CycleStartPhantomExecutor, NopPhantomExecutor, PhantomAir,
+            PhantomChip, PhantomExecutor, PhantomFiller,
+        },
+        poseidon2::{
+            air::Poseidon2PeripheryAir, new_poseidon2_periphery_air, Poseidon2PeripheryChip,
+        },
+        program::{ProgramBus, ProgramChip},
+        public_values::{
+            PublicValuesChip, PublicValuesCoreAir, PublicValuesExecutor, PublicValuesFiller,
+        },
+    },
+};
+
 pub mod connector;
 pub mod memory;
+// Necessary for the PublicValuesChip
 pub mod native_adapter;
-/// Chip to handle phantom instructions.
-/// The Air will always constrain a NOP which advances pc by DEFAULT_PC_STEP.
-/// The runtime executor will execute different phantom instructions that may
-/// affect trace generation based on the operand.
 pub mod phantom;
 pub mod poseidon2;
 pub mod program;
 pub mod public_values;
+
+/// **If** internal poseidon2 chip exists, then its insertion index is 1.
+const POSEIDON2_INSERTION_IDX: usize = 1;
+/// **If** public values chip exists, then its executor index is 0.
+pub(crate) const PV_EXECUTOR_IDX: usize = 0;
+
+/// Trait for trace generation of all system AIRs. The system chip complex is special because we may
+/// not exactly following the exact matching between `Air` and `Chip`. Moreover we may require more
+/// flexibility than what is provided through the trait object [`AnyChip`].
+///
+/// The [SystemChipComplex] is meant to be constructible once the VM configuration is known, and it
+/// can be loaded with arbitrary programs supported by the instruction set available to its
+/// configuration. The [SystemChipComplex] is meant to persistent between instances of proof
+/// generation.
+pub trait SystemChipComplex<RA, PB: ProverBackend> {
+    /// Loads the program in the form of a cached trace with prover data.
+    fn load_program(&mut self, cached_program_trace: CommittedTraceData<PB>);
+
+    /// Transport the initial memory state to device. This may be called before preflight execution
+    /// begins and start async device processes in parallel to execution.
+    fn transport_init_memory_to_device(&mut self, memory: &GuestMemory);
+
+    /// The caller must guarantee that `record_arenas` has length equal to the number of system
+    /// AIRs, although some arenas may be empty if they are unused.
+    fn generate_proving_ctx(
+        &mut self,
+        system_records: SystemRecords<PB::Val>,
+        record_arenas: Vec<RA>,
+    ) -> Vec<AirProvingContext<PB>>;
+
+    /// This function is only used for metric collection purposes and custom implementations are
+    /// free to ignore it.
+    ///
+    /// Since system chips (primarily memory) will only have all information needed to compute the
+    /// true used trace heights after `generate_proving_ctx` is called, this method will be called
+    /// after `generate_proving_ctx` on the trace `heights` of all AIRs (including non-system AIRs)
+    /// in the AIR ID order.
+    ///
+    /// The default implementation does nothing.
+    #[cfg(feature = "metrics")]
+    fn finalize_trace_heights(&self, _heights: &mut [usize]) {}
+}
+
+/// Trait meant to be implemented on a SystemChipComplex.
+pub trait SystemWithFixedTraceHeights {
+    /// `heights` will have length equal to number of system AIRs, in AIR ID order. This function
+    /// must guarantee that the system trace matrices generated have the required heights.
+    fn override_trace_heights(&mut self, heights: &[u32]);
+}
+
+pub struct SystemRecords<F> {
+    pub from_state: ExecutionState<u32>,
+    pub to_state: ExecutionState<u32>,
+    pub exit_code: Option<u32>,
+    /// `i` -> frequency of instruction in `i`th row of trace matrix. This requires filtering
+    /// `program.instructions_and_debug_infos` to remove gaps.
+    pub filtered_exec_frequencies: Vec<u32>,
+    // We always use a [DenseRecordArena] here, regardless of the generic `RA` used for other
+    // execution records.
+    pub access_adapter_records: DenseRecordArena,
+    // Perf[jpw]: this should be computed on-device and changed to just touched blocks
+    pub touched_memory: TouchedMemory<F>,
+    /// The public values of the [PublicValuesChip]. These should only be non-empty if
+    /// continuations are disabled.
+    pub public_values: Vec<F>,
+}
+
+pub enum TouchedMemory<F> {
+    Persistent(TimestampedEquipartition<F, CHUNK>),
+    Volatile(TimestampedEquipartition<F, 1>),
+}
+
+#[derive(Clone, AnyEnum, Executor, MeteredExecutor, PreflightExecutor, From)]
+pub enum SystemExecutor<F: Field> {
+    PublicValues(PublicValuesExecutor<F>),
+    Phantom(PhantomExecutor<F>),
+}
+
+/// SystemPort combines system resources needed by most extensions
+#[derive(Clone, Copy)]
+pub struct SystemPort {
+    pub execution_bus: ExecutionBus,
+    pub program_bus: ProgramBus,
+    pub memory_bridge: MemoryBridge,
+}
+
+#[derive(Clone)]
+pub struct SystemAirInventory<SC: StarkGenericConfig> {
+    pub program: ProgramAir,
+    pub connector: VmConnectorAir,
+    pub memory: MemoryAirInventory<SC>,
+    /// Public values AIR exists if and only if continuations is disabled and `num_public_values`
+    /// is greater than 0.
+    pub public_values: Option<PublicValuesAir>,
+}
+
+impl<SC: StarkGenericConfig> SystemAirInventory<SC> {
+    pub fn new(
+        config: &SystemConfig,
+        port: SystemPort,
+        merkle_compression_buses: Option<(PermutationCheckBus, PermutationCheckBus)>,
+    ) -> Self {
+        let SystemPort {
+            execution_bus,
+            program_bus,
+            memory_bridge,
+        } = port;
+        let range_bus = memory_bridge.range_bus();
+        let program = ProgramAir::new(program_bus);
+        let connector = VmConnectorAir::new(
+            execution_bus,
+            program_bus,
+            range_bus,
+            config.memory_config.timestamp_max_bits,
+        );
+        assert_eq!(
+            config.continuation_enabled,
+            merkle_compression_buses.is_some()
+        );
+
+        let memory = MemoryAirInventory::new(
+            memory_bridge,
+            &config.memory_config,
+            range_bus,
+            merkle_compression_buses,
+        );
+
+        let public_values = if config.has_public_values_chip() {
+            let air = VmAirWrapper::new(
+                NativeAdapterAir::new(
+                    ExecutionBridge::new(execution_bus, program_bus),
+                    memory_bridge,
+                ),
+                PublicValuesCoreAir::new(
+                    config.num_public_values,
+                    config.max_constraint_degree as u32 - 1,
+                ),
+            );
+            Some(air)
+        } else {
+            None
+        };
+
+        Self {
+            program,
+            connector,
+            memory,
+            public_values,
+        }
+    }
+
+    pub fn port(&self) -> SystemPort {
+        SystemPort {
+            memory_bridge: self.memory.bridge,
+            program_bus: self.program.bus,
+            execution_bus: self.connector.execution_bus,
+        }
+    }
+
+    pub fn into_airs(self) -> Vec<AirRef<SC>> {
+        let mut airs: Vec<AirRef<SC>> = Vec::new();
+        airs.push(Arc::new(self.program));
+        airs.push(Arc::new(self.connector));
+        if let Some(public_values) = self.public_values {
+            airs.push(Arc::new(public_values));
+        }
+        airs.extend(self.memory.into_airs());
+        airs
+    }
+}
+
+impl<F: PrimeField32> VmExecutionConfig<F> for SystemConfig {
+    type Executor = SystemExecutor<F>;
+
+    /// The only way to create an [ExecutorInventory] is from a [SystemConfig]. This will add an
+    /// executor for [PublicValuesExecutor] if continuations is disabled. It will always add an
+    /// executor for [PhantomChip], which handles all phantom sub-executors.
+    fn create_executors(
+        &self,
+    ) -> Result<ExecutorInventory<Self::Executor>, ExecutorInventoryError> {
+        let mut inventory = ExecutorInventory::new(self.clone());
+        // PublicValuesChip is required when num_public_values > 0 in single segment mode.
+        if self.has_public_values_chip() {
+            assert_eq!(inventory.executors().len(), PV_EXECUTOR_IDX);
+
+            let public_values = PublicValuesExecutor::new(NativeAdapterExecutor::default());
+            inventory.add_executor(public_values, [PublishOpcode::PUBLISH.global_opcode()])?;
+        }
+        let phantom_opcode = SystemOpcode::PHANTOM.global_opcode();
+        let mut phantom_executors: FxHashMap<PhantomDiscriminant, Arc<dyn PhantomSubExecutor<F>>> =
+            FxHashMap::default();
+        // Use NopPhantomExecutor so the discriminant is set but `DebugPanic` is handled specially.
+        phantom_executors.insert(
+            PhantomDiscriminant(SysPhantom::DebugPanic as u16),
+            Arc::new(NopPhantomExecutor),
+        );
+        phantom_executors.insert(
+            PhantomDiscriminant(SysPhantom::Nop as u16),
+            Arc::new(NopPhantomExecutor),
+        );
+        phantom_executors.insert(
+            PhantomDiscriminant(SysPhantom::CtStart as u16),
+            Arc::new(CycleStartPhantomExecutor),
+        );
+        phantom_executors.insert(
+            PhantomDiscriminant(SysPhantom::CtEnd as u16),
+            Arc::new(CycleEndPhantomExecutor),
+        );
+        let phantom = PhantomExecutor::new(phantom_executors, phantom_opcode);
+        inventory.add_executor(phantom, [phantom_opcode])?;
+
+        Ok(inventory)
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitConfig<SC> for SystemConfig {
+    /// Every VM circuit within the OpenVM circuit architecture **must** be initialized from the
+    /// [SystemConfig].
+    fn create_airs(&self) -> Result<AirInventory<SC>, AirInventoryError> {
+        let mut bus_idx_mgr = BusIndexManager::new();
+        let execution_bus = ExecutionBus::new(bus_idx_mgr.new_bus_idx());
+        let memory_bus = MemoryBus::new(bus_idx_mgr.new_bus_idx());
+        let program_bus = ProgramBus::new(bus_idx_mgr.new_bus_idx());
+        let range_bus =
+            VariableRangeCheckerBus::new(bus_idx_mgr.new_bus_idx(), self.memory_config.decomp);
+
+        let merkle_compression_buses = if self.continuation_enabled {
+            let merkle_bus = PermutationCheckBus::new(bus_idx_mgr.new_bus_idx());
+            let compression_bus = PermutationCheckBus::new(bus_idx_mgr.new_bus_idx());
+            Some((merkle_bus, compression_bus))
+        } else {
+            None
+        };
+        let memory_bridge =
+            MemoryBridge::new(memory_bus, self.memory_config.timestamp_max_bits, range_bus);
+        let system_port = SystemPort {
+            execution_bus,
+            program_bus,
+            memory_bridge,
+        };
+        let system = SystemAirInventory::new(self, system_port, merkle_compression_buses);
+
+        let mut inventory = AirInventory::new(self.clone(), system, bus_idx_mgr);
+
+        let range_checker = VariableRangeCheckerAir::new(range_bus);
+        // Range checker is always the first AIR in the inventory
+        inventory.add_air(range_checker);
+
+        if self.continuation_enabled {
+            assert_eq!(inventory.ext_airs().len(), POSEIDON2_INSERTION_IDX);
+            // Add direct poseidon2 AIR for persistent memory.
+            // Currently we never use poseidon2 opcodes when continuations is enabled: we will need
+            // special handling when that happens
+            let (_, compression_bus) = merkle_compression_buses.unwrap();
+            let direct_bus_idx = compression_bus.index;
+            let air = new_poseidon2_periphery_air(
+                vm_poseidon2_config(),
+                LookupBus::new(direct_bus_idx),
+                self.max_constraint_degree,
+            );
+            inventory.add_air_ref(air);
+        }
+        let execution_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let phantom = PhantomAir {
+            execution_bridge,
+            phantom_opcode: SystemOpcode::PHANTOM.global_opcode(),
+        };
+        inventory.add_air(phantom);
+
+        Ok(inventory)
+    }
+}
+
+// =================== CPU Backend Specific System Chip Complex Constructor ==================
+
+/// Base system chips for CPU backend. These chips must exactly correspond to the AIRs in
+/// [SystemAirInventory].
+pub struct SystemChipInventory<SC: StarkGenericConfig> {
+    pub program_chip: ProgramChip<SC>,
+    pub connector_chip: VmConnectorChip<Val<SC>>,
+    /// Contains all memory chips
+    pub memory_controller: MemoryController<Val<SC>>,
+    pub public_values_chip: Option<PublicValuesChip<Val<SC>>>,
+}
+
+// Note[jpw]: We could get rid of the `mem_inventory` input because `MemoryController` doesn't need
+// the buses for tracegen. We leave it to use old interfaces.
+impl<SC: StarkGenericConfig> SystemChipInventory<SC>
+where
+    Val<SC>: PrimeField32,
+{
+    pub fn new(
+        config: &SystemConfig,
+        mem_inventory: &MemoryAirInventory<SC>,
+        range_checker: SharedVariableRangeCheckerChip,
+        hasher_chip: Option<Arc<Poseidon2PeripheryChip<Val<SC>>>>,
+    ) -> Self {
+        // We create an empty program chip: the program should be loaded later (and can be swapped
+        // out). The execution frequencies are supplied only after execution.
+        let program_chip = ProgramChip::unloaded();
+        let connector_chip = VmConnectorChip::<Val<SC>>::new(
+            range_checker.clone(),
+            config.memory_config.timestamp_max_bits,
+        );
+        let memory_bus = mem_inventory.bridge.memory_bus();
+        let memory_controller = match &mem_inventory.interface {
+            MemoryInterfaceAirs::Persistent {
+                boundary: _,
+                merkle,
+            } => {
+                assert!(config.continuation_enabled);
+                MemoryController::<Val<SC>>::with_persistent_memory(
+                    memory_bus,
+                    config.memory_config.clone(),
+                    range_checker.clone(),
+                    merkle.merkle_bus,
+                    merkle.compression_bus,
+                    hasher_chip.unwrap(),
+                )
+            }
+            MemoryInterfaceAirs::Volatile { boundary: _ } => {
+                assert!(!config.continuation_enabled);
+                MemoryController::with_volatile_memory(
+                    memory_bus,
+                    config.memory_config.clone(),
+                    range_checker.clone(),
+                )
+            }
+        };
+
+        let public_values_chip = config.has_public_values_chip().then(|| {
+            VmChipWrapper::new(
+                PublicValuesFiller::new(
+                    NativeAdapterExecutor::default(),
+                    config.num_public_values,
+                    (config.max_constraint_degree as u32)
+                        .checked_sub(1)
+                        .unwrap(),
+                ),
+                memory_controller.helper(),
+            )
+        });
+
+        Self {
+            program_chip,
+            connector_chip,
+            memory_controller,
+            public_values_chip,
+        }
+    }
+}
+
+impl<RA, SC> SystemChipComplex<RA, CpuBackend<SC>> for SystemChipInventory<SC>
+where
+    RA: RowMajorMatrixArena<Val<SC>>,
+    SC: StarkGenericConfig,
+    Val<SC>: PrimeField32,
+{
+    fn load_program(&mut self, cached_program_trace: CommittedTraceData<CpuBackend<SC>>) {
+        let _ = self.program_chip.cached.replace(cached_program_trace);
+    }
+
+    fn transport_init_memory_to_device(&mut self, memory: &GuestMemory) {
+        self.memory_controller
+            .set_initial_memory(memory.memory.clone());
+    }
+
+    fn generate_proving_ctx(
+        &mut self,
+        system_records: SystemRecords<Val<SC>>,
+        mut record_arenas: Vec<RA>,
+    ) -> Vec<AirProvingContext<CpuBackend<SC>>> {
+        let SystemRecords {
+            from_state,
+            to_state,
+            exit_code,
+            filtered_exec_frequencies,
+            access_adapter_records,
+            touched_memory,
+            public_values,
+        } = system_records;
+
+        if let Some(chip) = &mut self.public_values_chip {
+            chip.inner.set_public_values(public_values);
+        }
+        self.program_chip.filtered_exec_frequencies = filtered_exec_frequencies;
+        let program_ctx = self.program_chip.generate_proving_ctx(());
+        self.connector_chip.begin(from_state);
+        self.connector_chip.end(to_state, exit_code);
+        let connector_ctx = self.connector_chip.generate_proving_ctx(());
+
+        let pv_ctx = self.public_values_chip.as_ref().map(|chip| {
+            let arena = record_arenas.remove(PUBLIC_VALUES_AIR_ID);
+            chip.generate_proving_ctx(arena)
+        });
+
+        let memory_ctxs = self
+            .memory_controller
+            .generate_proving_ctx(access_adapter_records, touched_memory);
+
+        [program_ctx, connector_ctx]
+            .into_iter()
+            .chain(pv_ctx)
+            .chain(memory_ctxs)
+            .collect()
+    }
+
+    #[cfg(feature = "metrics")]
+    fn finalize_trace_heights(&self, heights: &mut [usize]) {
+        use openvm_stark_backend::ChipUsageGetter;
+
+        use crate::system::memory::interface::MemoryInterface;
+
+        let boundary_idx = PUBLIC_VALUES_AIR_ID + usize::from(self.public_values_chip.is_some());
+        let mut access_adapter_offset = boundary_idx + 1;
+        match &self.memory_controller.interface_chip {
+            MemoryInterface::Volatile { boundary_chip } => {
+                let boundary_height = boundary_chip
+                    .final_memory
+                    .as_ref()
+                    .map(|m| m.len())
+                    .unwrap_or(0);
+                heights[boundary_idx] = boundary_height;
+            }
+            MemoryInterface::Persistent {
+                boundary_chip,
+                merkle_chip,
+                ..
+            } => {
+                let boundary_height = 2 * boundary_chip.touched_labels.len();
+                heights[boundary_idx] = boundary_height;
+                heights[boundary_idx + 1] = merkle_chip.current_height;
+                access_adapter_offset += 1;
+
+                // Poseidon2Periphery height also varies based on memory, so set it now even though
+                // it's not a system chip:
+                let poseidon_chip = self.memory_controller.hasher_chip.as_ref().unwrap();
+                let poseidon_height = poseidon_chip.current_trace_height();
+                // We know the chip insertion index, which starts from *the end* of the the AIR
+                // ordering
+                let poseidon_idx = heights.len() - 1 - POSEIDON2_INSERTION_IDX;
+                heights[poseidon_idx] = poseidon_height;
+            }
+        }
+        let access_heights = &self
+            .memory_controller
+            .access_adapter_inventory
+            .trace_heights;
+        heights[access_adapter_offset..access_adapter_offset + access_heights.len()]
+            .copy_from_slice(access_heights);
+    }
+}
+
+#[derive(Clone)]
+pub struct SystemCpuBuilder;
+
+impl<SC, E> VmBuilder<E> for SystemCpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = SystemConfig;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+    type SystemChipInventory = SystemChipInventory<SC>;
+
+    fn create_chip_complex(
+        &self,
+        config: &SystemConfig,
+        airs: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, MatrixRecordArena<Val<SC>>, CpuBackend<SC>, SystemChipInventory<SC>>,
+        ChipInventoryError,
+    > {
+        let range_bus = airs.range_checker().bus;
+        let range_checker = Arc::new(VariableRangeCheckerChip::new(range_bus));
+
+        let mut inventory = ChipInventory::new(airs);
+        // PublicValuesChip is required when num_public_values > 0 in single segment mode.
+        if config.has_public_values_chip() {
+            assert_eq!(
+                inventory.executor_idx_to_insertion_idx.len(),
+                PV_EXECUTOR_IDX
+            );
+            // We set insertion_idx so that air_idx = num_airs - (insertion_idx + 1) =
+            // PUBLIC_VALUES_AIR_ID in `VmChipComplex::executor_idx_to_air_idx`. We need to do this
+            // because this chip is special and not part of the normal inventory.
+            let insertion_idx = inventory
+                .airs()
+                .num_airs()
+                .checked_sub(1 + PUBLIC_VALUES_AIR_ID)
+                .unwrap();
+            inventory.executor_idx_to_insertion_idx.push(insertion_idx);
+        }
+        inventory.next_air::<VariableRangeCheckerAir>()?;
+        inventory.add_periphery_chip(range_checker.clone());
+
+        let hasher_chip = if config.continuation_enabled {
+            assert_eq!(inventory.chips().len(), POSEIDON2_INSERTION_IDX);
+            // ATTENTION: The threshold 7 here must match the one in `new_poseidon2_periphery_air`
+            let direct_bus = if config.max_constraint_degree >= 7 {
+                inventory
+                    .next_air::<Poseidon2PeripheryAir<Val<SC>, 0>>()?
+                    .bus
+            } else {
+                inventory
+                    .next_air::<Poseidon2PeripheryAir<Val<SC>, 1>>()?
+                    .bus
+            };
+            let chip = Arc::new(Poseidon2PeripheryChip::new(
+                vm_poseidon2_config(),
+                direct_bus.index,
+                config.max_constraint_degree,
+            ));
+            inventory.add_periphery_chip(chip.clone());
+            Some(chip)
+        } else {
+            None
+        };
+        let system = SystemChipInventory::new(
+            config,
+            &inventory.airs().system().memory,
+            range_checker,
+            hasher_chip,
+        );
+
+        let phantom_chip = PhantomChip::new(PhantomFiller, system.memory_controller.helper());
+        inventory.add_executor_chip(phantom_chip);
+
+        Ok(VmChipComplex { system, inventory })
+    }
+}
+
+impl<SC: StarkGenericConfig> SystemWithFixedTraceHeights for SystemChipInventory<SC>
+where
+    Val<SC>: PrimeField32,
+{
+    /// Warning: this does not set the override for the PublicValuesChip. The PublicValuesChip
+    /// override must be set via the RecordArena.
+    fn override_trace_heights(&mut self, heights: &[u32]) {
+        assert_eq!(
+            heights[PROGRAM_AIR_ID] as usize,
+            self.program_chip
+                .cached
+                .as_ref()
+                .expect("program not loaded")
+                .trace
+                .height()
+        );
+        assert_eq!(heights[CONNECTOR_AIR_ID], 2);
+        let mut memory_start_idx = PUBLIC_VALUES_AIR_ID;
+        if self.public_values_chip.is_some() {
+            memory_start_idx += 1;
+        }
+        self.memory_controller
+            .set_override_trace_heights(&heights[memory_start_idx..]);
+    }
+}
diff --git a/crates/vm/src/system/native_adapter/mod.rs b/crates/vm/src/system/native_adapter/mod.rs
index 95c2c7c4a4..7dcf8c1b2e 100644
--- a/crates/vm/src/system/native_adapter/mod.rs
+++ b/crates/vm/src/system/native_adapter/mod.rs
@@ -1,3 +1,5 @@
+pub mod util;
+
 use std::{
     borrow::{Borrow, BorrowMut},
     marker::PhantomData,
@@ -5,86 +7,31 @@ use std::{
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        AdapterAirContext, BasicAdapterInterface, ExecutionBridge, ExecutionState,
+        MinimalInstruction, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadOrImmediateAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController,
-        },
-        program::ProgramBus,
+    system::memory::{
+        offline_checker::{MemoryBridge, MemoryReadOrImmediateAuxCols, MemoryWriteAuxCols},
+        MemoryAddress,
     },
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP};
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_IMM_AS, NATIVE_AS,
+};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
-
-use crate::system::memory::{OfflineMemory, RecordId};
-
-/// R reads(R<=2), W writes(W<=1).
-/// Operands: b for the first read, c for the second read, a for the first write.
-/// If an operand is not used, its address space and pointer should be all 0.
-#[derive(Debug)]
-pub struct NativeAdapterChip<F, const R: usize, const W: usize> {
-    pub air: NativeAdapterAir<R, W>,
-    _phantom: PhantomData<F>,
-}
-
-impl<F: PrimeField32, const R: usize, const W: usize> NativeAdapterChip<F, R, W> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: NativeAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _phantom: PhantomData,
-        }
-    }
-}
+use util::{tracing_read_or_imm_native, tracing_write_native};
 
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct NativeReadRecord<F: Field, const R: usize> {
-    #[serde(with = "BigArray")]
-    pub reads: [(RecordId, [F; 1]); R],
-}
-
-impl<F: Field, const R: usize> NativeReadRecord<F, R> {
-    pub fn b(&self) -> &[F; 1] {
-        &self.reads[0].1
-    }
-
-    pub fn c(&self) -> &[F; 1] {
-        &self.reads[1].1
-    }
-}
-
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct NativeWriteRecord<F: Field, const W: usize> {
-    pub from_state: ExecutionState<u32>,
-    #[serde(with = "BigArray")]
-    pub writes: [(RecordId, [F; 1]); W],
-}
-
-impl<F: Field, const W: usize> NativeWriteRecord<F, W> {
-    pub fn a(&self) -> &[F; 1] {
-        &self.writes[0].1
-    }
-}
+use super::memory::{online::TracingMemory, MemoryAuxColsFactory};
+use crate::{
+    arch::{get_record_from_slice, AdapterTraceExecutor, AdapterTraceFiller},
+    system::memory::offline_checker::{MemoryReadAuxRecord, MemoryWriteAuxRecord},
+};
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -205,101 +152,160 @@ impl<AB: InteractionBuilder, const R: usize, const W: usize> VmAdapterAir<AB>
     }
 }
 
-impl<F: PrimeField32, const R: usize, const W: usize> VmAdapterChip<F>
-    for NativeAdapterChip<F, R, W>
-{
-    type ReadRecord = NativeReadRecord<F, R>;
-    type WriteRecord = NativeWriteRecord<F, W>;
-    type Air = NativeAdapterAir<R, W>;
-    type Interface = BasicAdapterInterface<F, MinimalInstruction<F>, R, W, 1, 1>;
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct NativeAdapterRecord<F, const R: usize, const W: usize> {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        assert!(R <= 2);
-        let Instruction { b, c, e, f, .. } = *instruction;
+    // These are either a pointer to native memory or an immediate value
+    pub read_ptr_or_imm: [F; R],
+    // Will set prev_timestamp to `u32::MAX` if the read is from RV32_IMM_AS
+    pub reads_aux: [MemoryReadAuxRecord; R],
+    pub write_ptr: [F; W],
+    pub writes_aux: [MemoryWriteAuxRecord<F, 1>; W],
+}
 
-        let mut reads = Vec::with_capacity(R);
-        if R >= 1 {
-            reads.push(memory.read::<1>(e, b));
-        }
-        if R >= 2 {
-            reads.push(memory.read::<1>(f, c));
+/// R reads(R<=2), W writes(W<=1).
+/// Operands: b for the first read, c for the second read, a for the first write.
+/// If an operand is not used, its address space and pointer should be all 0.
+#[derive(Clone, Debug)]
+pub struct NativeAdapterExecutor<F, const R: usize, const W: usize> {
+    _phantom: PhantomData<F>,
+}
+
+impl<F, const R: usize, const W: usize> Default for NativeAdapterExecutor<F, R, W> {
+    fn default() -> Self {
+        Self {
+            _phantom: PhantomData,
         }
-        let i_reads: [_; R] = std::array::from_fn(|i| reads[i].1);
+    }
+}
 
-        Ok((
-            i_reads,
-            Self::ReadRecord {
-                reads: reads.try_into().unwrap(),
-            },
-        ))
+impl<F, const R: usize, const W: usize> AdapterTraceExecutor<F> for NativeAdapterExecutor<F, R, W>
+where
+    F: PrimeField32,
+{
+    const WIDTH: usize = size_of::<NativeAdapterCols<u8, R, W>>();
+    type ReadData = [[F; 1]; R];
+    type WriteData = [[F; 1]; W];
+    type RecordMut<'a> = &'a mut NativeAdapterRecord<F, R, W>;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        assert!(W <= 1);
-        let Instruction { a, d, .. } = *instruction;
-        let mut writes = Vec::with_capacity(W);
-        if W >= 1 {
-            let (record_id, _) = memory.write(d, a, output.writes[0]);
-            writes.push((record_id, output.writes[0]));
-        }
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        debug_assert!(R <= 2);
+        let &Instruction { b, c, e, f, .. } = instruction;
 
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord {
-                from_state,
-                writes: writes.try_into().unwrap(),
-            },
-        ))
+        let mut reads = [[F::ZERO; 1]; R];
+        record
+            .read_ptr_or_imm
+            .iter_mut()
+            .enumerate()
+            .zip(record.reads_aux.iter_mut())
+            .for_each(|((i, ptr_or_imm), read_aux)| {
+                *ptr_or_imm = if i == 0 { b } else { c };
+                let addr_space = if i == 0 { e } else { f };
+                reads[i][0] = tracing_read_or_imm_native(
+                    memory,
+                    addr_space,
+                    *ptr_or_imm,
+                    &mut read_aux.prev_timestamp,
+                );
+            });
+        reads
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        let row_slice: &mut NativeAdapterCols<_, R, W> = row_slice.borrow_mut();
-        let aux_cols_factory = memory.aux_cols_factory();
-
-        row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
+        let &Instruction { a, d, .. } = instruction;
+        debug_assert!(W <= 1);
+        debug_assert_eq!(d.as_canonical_u32(), NATIVE_AS);
 
-        for (i, read) in read_record.reads.iter().enumerate() {
-            let (id, _) = read;
-            let record = memory.record_by_id(*id);
-            aux_cols_factory
-                .generate_read_or_immediate_aux(record, &mut row_slice.reads_aux[i].read_aux);
-            row_slice.reads_aux[i].address =
-                MemoryAddress::new(record.address_space, record.pointer);
+        if W >= 1 {
+            record.write_ptr[0] = a;
+            tracing_write_native(
+                memory,
+                a.as_canonical_u32(),
+                data[0],
+                &mut record.writes_aux[0].prev_timestamp,
+                &mut record.writes_aux[0].prev_data,
+            );
         }
+    }
+}
+
+impl<F: PrimeField32, const R: usize, const W: usize> AdapterTraceFiller<F>
+    for NativeAdapterExecutor<F, R, W>
+{
+    const WIDTH: usize = size_of::<NativeAdapterCols<u8, R, W>>();
 
-        for (i, write) in write_record.writes.iter().enumerate() {
-            let (id, _) = write;
-            let record = memory.record_by_id(*id);
-            aux_cols_factory.generate_write_aux(record, &mut row_slice.writes_aux[i].write_aux);
-            row_slice.writes_aux[i].address =
-                MemoryAddress::new(record.address_space, record.pointer);
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &NativeAdapterRecord<F, R, W> =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut NativeAdapterCols<_, R, W> = adapter_row.borrow_mut();
+        // Writing in reverse order to avoid overwriting the `record`
+        if W >= 1 {
+            adapter_row.writes_aux[0]
+                .write_aux
+                .set_prev_data(record.writes_aux[0].prev_data);
+            mem_helper.fill(
+                record.writes_aux[0].prev_timestamp,
+                record.from_timestamp + R as u32,
+                adapter_row.writes_aux[0].write_aux.as_mut(),
+            );
+            adapter_row.writes_aux[0].address.pointer = record.write_ptr[0];
+            adapter_row.writes_aux[0].address.address_space = F::from_canonical_u32(NATIVE_AS);
         }
-    }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        adapter_row
+            .reads_aux
+            .iter_mut()
+            .enumerate()
+            .zip(record.reads_aux.iter().zip(record.read_ptr_or_imm.iter()))
+            .rev()
+            .for_each(|((i, read_cols), (read_record, ptr_or_imm))| {
+                if read_record.prev_timestamp == u32::MAX {
+                    read_cols.read_aux.is_zero_aux = F::ZERO;
+                    read_cols.read_aux.is_immediate = F::ONE;
+                    mem_helper.fill(
+                        0,
+                        record.from_timestamp + i as u32,
+                        read_cols.read_aux.as_mut(),
+                    );
+                    read_cols.address.pointer = *ptr_or_imm;
+                    read_cols.address.address_space = F::from_canonical_u32(RV32_IMM_AS);
+                } else {
+                    read_cols.read_aux.is_zero_aux = F::from_canonical_u32(NATIVE_AS).inverse();
+                    read_cols.read_aux.is_immediate = F::ZERO;
+                    mem_helper.fill(
+                        read_record.prev_timestamp,
+                        record.from_timestamp + i as u32,
+                        read_cols.read_aux.as_mut(),
+                    );
+                    read_cols.address.pointer = *ptr_or_imm;
+                    read_cols.address.address_space = F::from_canonical_u32(NATIVE_AS);
+                }
+            });
+
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/crates/vm/src/system/native_adapter/util.rs b/crates/vm/src/system/native_adapter/util.rs
new file mode 100644
index 0000000000..ad430c8adb
--- /dev/null
+++ b/crates/vm/src/system/native_adapter/util.rs
@@ -0,0 +1,198 @@
+use openvm_circuit::system::memory::online::TracingMemory;
+use openvm_instructions::{riscv::RV32_IMM_AS, NATIVE_AS};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::{
+    arch::{execution_mode::ExecutionCtxTrait, VmStateMut},
+    system::memory::{offline_checker::MemoryWriteAuxCols, online::GuestMemory},
+};
+
+#[inline(always)]
+pub fn memory_read_native<F, const N: usize>(memory: &GuestMemory, ptr: u32) -> [F; N]
+where
+    F: PrimeField32,
+{
+    // SAFETY:
+    // - address space `NATIVE_AS` will always have cell type `F` and minimum alignment of `1`
+    unsafe { memory.read::<F, N>(NATIVE_AS, ptr) }
+}
+
+#[inline(always)]
+pub fn memory_read_or_imm_native<F>(memory: &GuestMemory, addr_space: u32, ptr_or_imm: F) -> F
+where
+    F: PrimeField32,
+{
+    debug_assert!(addr_space == RV32_IMM_AS || addr_space == NATIVE_AS);
+
+    if addr_space == NATIVE_AS {
+        let [result]: [F; 1] = memory_read_native(memory, ptr_or_imm.as_canonical_u32());
+        result
+    } else {
+        ptr_or_imm
+    }
+}
+
+#[inline(always)]
+pub fn memory_write_native<F, const N: usize>(memory: &mut GuestMemory, ptr: u32, data: [F; N])
+where
+    F: PrimeField32,
+{
+    // SAFETY:
+    // - address space `NATIVE_AS` will always have cell type `F` and minimum alignment of `1`
+    unsafe { memory.write::<F, N>(NATIVE_AS, ptr, data) }
+}
+
+#[inline(always)]
+pub fn memory_read_native_from_state<Ctx, F, const N: usize>(
+    state: &mut VmStateMut<F, GuestMemory, Ctx>,
+    ptr: u32,
+) -> [F; N]
+where
+    F: PrimeField32,
+    Ctx: ExecutionCtxTrait,
+{
+    state.ctx.on_memory_operation(NATIVE_AS, ptr, N as u32);
+
+    memory_read_native(state.memory, ptr)
+}
+
+#[inline(always)]
+pub fn memory_read_or_imm_native_from_state<Ctx, F>(
+    state: &mut VmStateMut<F, GuestMemory, Ctx>,
+    addr_space: u32,
+    ptr_or_imm: F,
+) -> F
+where
+    F: PrimeField32,
+    Ctx: ExecutionCtxTrait,
+{
+    debug_assert!(addr_space == RV32_IMM_AS || addr_space == NATIVE_AS);
+
+    if addr_space == NATIVE_AS {
+        let [result]: [F; 1] = memory_read_native_from_state(state, ptr_or_imm.as_canonical_u32());
+        result
+    } else {
+        ptr_or_imm
+    }
+}
+
+#[inline(always)]
+pub fn memory_write_native_from_state<Ctx, F, const N: usize>(
+    state: &mut VmStateMut<F, GuestMemory, Ctx>,
+    ptr: u32,
+    data: [F; N],
+) where
+    F: PrimeField32,
+    Ctx: ExecutionCtxTrait,
+{
+    state.ctx.on_memory_operation(NATIVE_AS, ptr, N as u32);
+
+    memory_write_native(state.memory, ptr, data)
+}
+
+/// Atomic read operation which increments the timestamp by 1.
+/// Returns `(t_prev, [ptr:BLOCK_SIZE]_4)` where `t_prev` is the timestamp of the last memory
+/// access.
+#[inline(always)]
+pub fn timed_read_native<F, const BLOCK_SIZE: usize>(
+    memory: &mut TracingMemory,
+    ptr: u32,
+) -> (u32, [F; BLOCK_SIZE])
+where
+    F: PrimeField32,
+{
+    // SAFETY:
+    // - address space `Native` will always have cell type `F` and minimum alignment of `1`
+    unsafe { memory.read::<F, BLOCK_SIZE, 1>(NATIVE_AS, ptr) }
+}
+
+#[inline(always)]
+pub fn timed_write_native<F, const BLOCK_SIZE: usize>(
+    memory: &mut TracingMemory,
+    ptr: u32,
+    vals: [F; BLOCK_SIZE],
+) -> (u32, [F; BLOCK_SIZE])
+where
+    F: PrimeField32,
+{
+    // SAFETY:
+    // - address space `Native` will always have cell type `F` and minimum alignment of `1`
+    unsafe { memory.write::<F, BLOCK_SIZE, 1>(NATIVE_AS, ptr, vals) }
+}
+
+/// Reads register value at `ptr` from memory and records the previous timestamp.
+/// Reads are only done from address space [NATIVE_AS].
+#[inline(always)]
+pub fn tracing_read_native<F, const BLOCK_SIZE: usize>(
+    memory: &mut TracingMemory,
+    ptr: u32,
+    prev_timestamp: &mut u32,
+) -> [F; BLOCK_SIZE]
+where
+    F: PrimeField32,
+{
+    let (t_prev, data) = timed_read_native(memory, ptr);
+    *prev_timestamp = t_prev;
+    data
+}
+
+/// Writes `ptr, vals` into memory and records the previous timestamp and data.
+/// Writes are only done to address space [NATIVE_AS].
+#[inline(always)]
+pub fn tracing_write_native<F, const BLOCK_SIZE: usize>(
+    memory: &mut TracingMemory,
+    ptr: u32,
+    vals: [F; BLOCK_SIZE],
+    prev_timestamp: &mut u32,
+    prev_data: &mut [F; BLOCK_SIZE],
+) where
+    F: PrimeField32,
+{
+    let (t_prev, data_prev) = timed_write_native(memory, ptr, vals);
+    *prev_timestamp = t_prev;
+    *prev_data = data_prev;
+}
+
+/// Writes `ptr, vals` into memory and records the previous timestamp and data.
+#[inline(always)]
+pub fn tracing_write_native_inplace<F, const BLOCK_SIZE: usize>(
+    memory: &mut TracingMemory,
+    ptr: u32,
+    vals: [F; BLOCK_SIZE],
+    cols: &mut MemoryWriteAuxCols<F, BLOCK_SIZE>,
+) where
+    F: PrimeField32,
+{
+    let (t_prev, data_prev) = timed_write_native(memory, ptr, vals);
+    cols.base.set_prev(F::from_canonical_u32(t_prev));
+    cols.prev_data = data_prev;
+}
+
+/// Reads value at `_ptr` from memory and records the previous timestamp.
+/// If the read is an immediate, the previous timestamp will be set to `u32::MAX`.
+#[inline(always)]
+pub fn tracing_read_or_imm_native<F>(
+    memory: &mut TracingMemory,
+    addr_space: F,
+    ptr_or_imm: F,
+    prev_timestamp: &mut u32,
+) -> F
+where
+    F: PrimeField32,
+{
+    debug_assert!(
+        addr_space == F::ZERO || addr_space == F::from_canonical_u32(NATIVE_AS),
+        "addr_space={} is not valid",
+        addr_space
+    );
+
+    if addr_space == F::ZERO {
+        *prev_timestamp = u32::MAX;
+        memory.increment_timestamp();
+        ptr_or_imm
+    } else {
+        let data: [F; 1] =
+            tracing_read_native(memory, ptr_or_imm.as_canonical_u32(), prev_timestamp);
+        data[0]
+    }
+}
diff --git a/crates/vm/src/system/phantom/execution.rs b/crates/vm/src/system/phantom/execution.rs
new file mode 100644
index 0000000000..e7e1775052
--- /dev/null
+++ b/crates/vm/src/system/phantom/execution.rs
@@ -0,0 +1,192 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, PhantomDiscriminant, SysPhantom,
+};
+use openvm_stark_backend::p3_field::PrimeField32;
+use rand::rngs::StdRng;
+
+use crate::{
+    arch::{
+        execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait},
+        E2PreCompute, ExecuteFunc, ExecutionError, Executor, MeteredExecutor, PhantomSubExecutor,
+        StaticProgramError, Streams, VmExecState,
+    },
+    system::{memory::online::GuestMemory, phantom::PhantomExecutor},
+};
+
+#[derive(Clone, AlignedBytesBorrow)]
+#[repr(C)]
+pub(super) struct PhantomOperands {
+    pub(super) a: u32,
+    pub(super) b: u32,
+    pub(super) c: u32,
+}
+
+#[derive(Clone, AlignedBytesBorrow)]
+#[repr(C)]
+struct PhantomPreCompute<F> {
+    operands: PhantomOperands,
+    sub_executor: *const dyn PhantomSubExecutor<F>,
+}
+
+impl<F> Executor<F> for PhantomExecutor<F>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<PhantomPreCompute<F>>()
+    }
+    #[inline(always)]
+    fn pre_compute<Ctx>(
+        &self,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut PhantomPreCompute<F> = data.borrow_mut();
+        self.pre_compute_impl(inst, data);
+        Ok(execute_e1_impl)
+    }
+}
+
+pub(super) struct PhantomStateMut<'a, F> {
+    pub(super) pc: &'a mut u32,
+    pub(super) memory: &'a mut GuestMemory,
+    pub(super) streams: &'a mut Streams<F>,
+    pub(super) rng: &'a mut StdRng,
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &PhantomPreCompute<F>,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let sub_executor = &*pre_compute.sub_executor;
+    if let Err(e) = execute_impl(
+        PhantomStateMut {
+            pc: &mut vm_state.vm_state.pc,
+            memory: &mut vm_state.vm_state.memory,
+            streams: &mut vm_state.vm_state.streams,
+            rng: &mut vm_state.vm_state.rng,
+        },
+        &pre_compute.operands,
+        sub_executor,
+    ) {
+        vm_state.exit_code = Err(e);
+        return;
+    }
+    vm_state.pc += DEFAULT_PC_STEP;
+    vm_state.instret += 1;
+}
+
+#[inline(always)]
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &PhantomPreCompute<F> = pre_compute.borrow();
+    execute_e12_impl(pre_compute, vm_state);
+}
+
+#[inline(always)]
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<PhantomPreCompute<F>> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl(&pre_compute.data, vm_state);
+}
+
+#[inline(always)]
+fn execute_impl<F>(
+    state: PhantomStateMut<F>,
+    operands: &PhantomOperands,
+    sub_executor: &dyn PhantomSubExecutor<F>,
+) -> Result<(), ExecutionError> {
+    let &PhantomOperands { a, b, c } = operands;
+
+    let discriminant = PhantomDiscriminant(c as u16);
+    // SysPhantom::{CtStart, CtEnd} are only handled in Preflight Execution, so the only SysPhantom
+    // to handle here is DebugPanic.
+    if let Some(discr) = SysPhantom::from_repr(discriminant.0) {
+        if discr == SysPhantom::DebugPanic {
+            return Err(ExecutionError::Fail {
+                pc: *state.pc,
+                msg: "DebugPanic",
+            });
+        }
+    }
+    sub_executor
+        .phantom_execute(
+            state.memory,
+            state.streams,
+            state.rng,
+            discriminant,
+            a,
+            b,
+            (c >> 16) as u16,
+        )
+        .map_err(|e| ExecutionError::Phantom {
+            pc: *state.pc,
+            discriminant,
+            inner: e,
+        })?;
+
+    Ok(())
+}
+
+impl<F> PhantomExecutor<F>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_impl(&self, inst: &Instruction<F>, data: &mut PhantomPreCompute<F>) {
+        let c = inst.c.as_canonical_u32();
+        *data = PhantomPreCompute {
+            operands: PhantomOperands {
+                a: inst.a.as_canonical_u32(),
+                b: inst.b.as_canonical_u32(),
+                c,
+            },
+            sub_executor: self
+                .phantom_executors
+                .get(&PhantomDiscriminant(c as u16))
+                .unwrap_or_else(|| panic!("Phantom executor not found for insn {inst:?}"))
+                .as_ref(),
+        };
+    }
+}
+
+impl<F> MeteredExecutor<F> for PhantomExecutor<F>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<PhantomPreCompute<F>>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let e2_data: &mut E2PreCompute<PhantomPreCompute<F>> = data.borrow_mut();
+        e2_data.chip_idx = chip_idx as u32;
+        self.pre_compute_impl(inst, &mut e2_data.data);
+        Ok(execute_e2_impl)
+    }
+}
diff --git a/crates/vm/src/system/phantom/mod.rs b/crates/vm/src/system/phantom/mod.rs
index 28977fe2cd..19df72812b 100644
--- a/crates/vm/src/system/phantom/mod.rs
+++ b/crates/vm/src/system/phantom/mod.rs
@@ -1,37 +1,41 @@
+//! Chip to handle phantom instructions.
+//! The Air will always constrain a NOP which advances pc by DEFAULT_PC_STEP.
+//! The runtime executor will execute different phantom instructions that may
+//! affect trace generation based on the operand.
 use std::{
     borrow::{Borrow, BorrowMut},
-    sync::{Arc, Mutex, OnceLock},
+    sync::Arc,
 };
 
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
-    instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode, PhantomDiscriminant,
-    SysPhantom, SystemOpcode, VmOpcode,
+    instruction::Instruction, program::DEFAULT_PC_STEP, PhantomDiscriminant, SysPhantom,
+    SystemOpcode, VmOpcode,
 };
 use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
     interaction::InteractionBuilder,
     p3_air::{Air, AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
-    p3_matrix::{dense::RowMajorMatrix, Matrix},
-    p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
-    rap::{get_air_name, BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
+    p3_matrix::Matrix,
+    rap::{BaseAirWithPublicValues, PartitionedBaseAir},
 };
+use rand::rngs::StdRng;
 use rustc_hash::FxHashMap;
 use serde::{Deserialize, Serialize};
 use serde_big_array::BigArray;
 
-use super::memory::MemoryController;
+use super::memory::online::{GuestMemory, TracingMemory};
 use crate::{
     arch::{
-        ExecutionBridge, ExecutionBus, ExecutionError, ExecutionState, InstructionExecutor,
-        PcIncOrSet, PhantomSubExecutor, Streams,
+        get_record_from_slice, EmptyMultiRowLayout, ExecutionBridge, ExecutionError,
+        ExecutionState, PcIncOrSet, PhantomSubExecutor, PreflightExecutor, RecordArena, Streams,
+        TraceFiller, VmChipWrapper, VmStateMut,
     },
-    system::program::ProgramBus,
+    system::memory::MemoryAuxColsFactory,
 };
 
+mod execution;
 #[cfg(test)]
 mod tests;
 
@@ -88,95 +92,105 @@ impl<AB: AirBuilder + InteractionBuilder> Air<AB> for PhantomAir {
     }
 }
 
-pub struct PhantomChip<F> {
-    pub air: PhantomAir,
-    pub rows: Vec<PhantomCols<F>>,
-    streams: OnceLock<Arc<Mutex<Streams<F>>>>,
-    phantom_executors: FxHashMap<PhantomDiscriminant, Box<dyn PhantomSubExecutor<F>>>,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug, Clone)]
+pub struct PhantomRecord {
+    pub pc: u32,
+    pub operands: [u32; NUM_PHANTOM_OPERANDS],
+    pub timestamp: u32,
 }
 
-impl<F> PhantomChip<F> {
-    pub fn new(execution_bus: ExecutionBus, program_bus: ProgramBus, offset: usize) -> Self {
-        Self {
-            air: PhantomAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                phantom_opcode: VmOpcode::from_usize(offset + SystemOpcode::PHANTOM.local_usize()),
-            },
-            rows: vec![],
-            streams: OnceLock::new(),
-            phantom_executors: FxHashMap::default(),
-        }
-    }
-
-    pub fn set_streams(&mut self, streams: Arc<Mutex<Streams<F>>>) {
-        if self.streams.set(streams).is_err() {
-            panic!("Streams should only be set once");
-        }
-    }
-
-    pub(crate) fn add_sub_executor<P: PhantomSubExecutor<F> + 'static>(
-        &mut self,
-        sub_executor: P,
-        discriminant: PhantomDiscriminant,
-    ) -> Option<Box<dyn PhantomSubExecutor<F>>> {
-        self.phantom_executors
-            .insert(discriminant, Box::new(sub_executor))
-    }
+/// `PhantomChip` is a special executor because it is stateful and stores all the phantom
+/// sub-executors.
+#[derive(Clone, derive_new::new)]
+pub struct PhantomExecutor<F> {
+    pub(crate) phantom_executors: FxHashMap<PhantomDiscriminant, Arc<dyn PhantomSubExecutor<F>>>,
+    phantom_opcode: VmOpcode,
 }
 
-impl<F: PrimeField32> InstructionExecutor<F> for PhantomChip<F> {
+pub struct PhantomFiller;
+pub type PhantomChip<F> = VmChipWrapper<F, PhantomFiller>;
+
+impl<F, RA> PreflightExecutor<F, RA> for PhantomExecutor<F>
+where
+    F: PrimeField32,
+    for<'buf> RA: RecordArena<'buf, EmptyMultiRowLayout, &'buf mut PhantomRecord>,
+{
     fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>, ExecutionError> {
-        let &Instruction {
-            opcode, a, b, c, ..
-        } = instruction;
-        assert_eq!(opcode, self.air.phantom_opcode);
-
-        let c_u32 = c.as_canonical_u32();
-        let discriminant = PhantomDiscriminant(c_u32 as u16);
-        // If not a system phantom sub-instruction (which is handled in
-        // ExecutionSegment), look for a phantom sub-executor to handle it.
-        if SysPhantom::from_repr(discriminant.0).is_none() {
-            let sub_executor = self
-                .phantom_executors
-                .get_mut(&discriminant)
-                .ok_or_else(|| ExecutionError::PhantomNotFound {
-                    pc: from_state.pc,
-                    discriminant,
-                })?;
-            let mut streams = self.streams.get().unwrap().lock().unwrap();
+    ) -> Result<(), ExecutionError> {
+        let record: &mut PhantomRecord = state.ctx.alloc(EmptyMultiRowLayout::default());
+        let pc = *state.pc;
+        record.pc = pc;
+        record.timestamp = state.memory.timestamp;
+        let [a, b, c] = [instruction.a, instruction.b, instruction.c].map(|x| x.as_canonical_u32());
+        record.operands = [a, b, c];
+
+        debug_assert_eq!(instruction.opcode, self.phantom_opcode);
+        let discriminant = PhantomDiscriminant(c as u16);
+        if let Some(sys) = SysPhantom::from_repr(discriminant.0) {
+            tracing::trace!("pc: {pc:#x} | system phantom: {sys:?}");
+            match sys {
+                SysPhantom::DebugPanic => {
+                    #[cfg(all(
+                        feature = "metrics",
+                        any(debug_assertions, feature = "perf-metrics")
+                    ))]
+                    {
+                        let metrics = state.metrics;
+                        metrics.update_backtrace(pc);
+                        if let Some(mut backtrace) = metrics.prev_backtrace.take() {
+                            backtrace.resolve();
+                            eprintln!("openvm program failure; backtrace:\n{:?}", backtrace);
+                        } else {
+                            eprintln!("openvm program failure; no backtrace");
+                        }
+                    }
+                    return Err(ExecutionError::Fail {
+                        pc,
+                        msg: "DebugPanic",
+                    });
+                }
+                #[cfg(feature = "perf-metrics")]
+                SysPhantom::CtStart => {
+                    let metrics = state.metrics;
+                    if let Some(info) = metrics.debug_infos.get(pc) {
+                        metrics.cycle_tracker.start(info.dsl_instruction.clone());
+                    }
+                }
+                #[cfg(feature = "perf-metrics")]
+                SysPhantom::CtEnd => {
+                    let metrics = state.metrics;
+                    if let Some(info) = metrics.debug_infos.get(pc) {
+                        metrics.cycle_tracker.end(info.dsl_instruction.clone());
+                    }
+                }
+                _ => {}
+            }
+        } else {
+            let sub_executor = self.phantom_executors.get(&discriminant).unwrap();
             sub_executor
-                .as_mut()
                 .phantom_execute(
-                    memory,
-                    &mut streams,
+                    &state.memory.data,
+                    state.streams,
+                    state.rng,
                     discriminant,
                     a,
                     b,
-                    (c_u32 >> 16) as u16,
+                    (c >> 16) as u16,
                 )
-                .map_err(|e| ExecutionError::Phantom {
-                    pc: from_state.pc,
+                .map_err(|err| ExecutionError::Phantom {
+                    pc,
                     discriminant,
-                    inner: e,
+                    inner: err,
                 })?;
         }
+        *state.pc += DEFAULT_PC_STEP;
+        state.memory.increment_timestamp();
 
-        self.rows.push(PhantomCols {
-            pc: F::from_canonical_u32(from_state.pc),
-            operands: [a, b, c],
-            timestamp: F::from_canonical_u32(from_state.timestamp),
-            is_valid: F::ONE,
-        });
-        memory.increment_timestamp();
-        Ok(ExecutionState::new(
-            from_state.pc + DEFAULT_PC_STEP,
-            from_state.timestamp + 1,
-        ))
+        Ok(())
     }
 
     fn get_opcode_name(&self, _: usize) -> String {
@@ -184,41 +198,72 @@ impl<F: PrimeField32> InstructionExecutor<F> for PhantomChip<F> {
     }
 }
 
-impl<F: PrimeField32> ChipUsageGetter for PhantomChip<F> {
-    fn air_name(&self) -> String {
-        get_air_name(&self.air)
-    }
-    fn current_trace_height(&self) -> usize {
-        self.rows.len()
+impl<F: Field> TraceFiller<F> for PhantomFiller {
+    fn fill_trace_row(&self, _mem_helper: &MemoryAuxColsFactory<F>, mut row_slice: &mut [F]) {
+        // SAFETY: assume that row has size PhantomCols::<F>::width()
+        let record: &PhantomRecord = unsafe { get_record_from_slice(&mut row_slice, ()) };
+        let row: &mut PhantomCols<F> = row_slice.borrow_mut();
+        // SAFETY: must assign in reverse order of column struct to prevent overwriting
+        // borrowed data
+        row.is_valid = F::ONE;
+        row.timestamp = F::from_canonical_u32(record.timestamp);
+        row.operands[2] = F::from_canonical_u32(record.operands[2]);
+        row.operands[1] = F::from_canonical_u32(record.operands[1]);
+        row.operands[0] = F::from_canonical_u32(record.operands[0]);
+        row.pc = F::from_canonical_u32(record.pc)
     }
-    fn trace_width(&self) -> usize {
-        PhantomCols::<F>::width()
-    }
-    fn current_trace_cells(&self) -> usize {
-        self.trace_width() * self.current_trace_height()
+}
+
+pub struct NopPhantomExecutor;
+pub struct CycleStartPhantomExecutor;
+pub struct CycleEndPhantomExecutor;
+
+impl<F> PhantomSubExecutor<F> for NopPhantomExecutor {
+    #[inline(always)]
+    fn phantom_execute(
+        &self,
+        _memory: &GuestMemory,
+        _streams: &mut Streams<F>,
+        _rng: &mut StdRng,
+        _discriminant: PhantomDiscriminant,
+        _a: u32,
+        _b: u32,
+        _c_upper: u16,
+    ) -> eyre::Result<()> {
+        Ok(())
     }
 }
 
-impl<SC: StarkGenericConfig> Chip<SC> for PhantomChip<Val<SC>>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
+impl<F> PhantomSubExecutor<F> for CycleStartPhantomExecutor {
+    #[inline(always)]
+    fn phantom_execute(
+        &self,
+        _memory: &GuestMemory,
+        _streams: &mut Streams<F>,
+        _rng: &mut StdRng,
+        _discriminant: PhantomDiscriminant,
+        _a: u32,
+        _b: u32,
+        _c_upper: u16,
+    ) -> eyre::Result<()> {
+        // Cycle tracking is implemented separately only in Preflight Execution
+        Ok(())
     }
+}
 
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let correct_height = self.rows.len().next_power_of_two();
-        let width = PhantomCols::<Val<SC>>::width();
-        let mut rows = Val::<SC>::zero_vec(width * correct_height);
-        rows.par_chunks_mut(width)
-            .zip(&self.rows)
-            .for_each(|(row, row_record)| {
-                let row: &mut PhantomCols<_> = row.borrow_mut();
-                *row = *row_record;
-            });
-        let trace = RowMajorMatrix::new(rows, width);
-
-        AirProofInput::simple(trace, vec![])
+impl<F> PhantomSubExecutor<F> for CycleEndPhantomExecutor {
+    #[inline(always)]
+    fn phantom_execute(
+        &self,
+        _memory: &GuestMemory,
+        _streams: &mut Streams<F>,
+        _rng: &mut StdRng,
+        _discriminant: PhantomDiscriminant,
+        _a: u32,
+        _b: u32,
+        _c_upper: u16,
+    ) -> eyre::Result<()> {
+        // Cycle tracking is implemented separately only in Preflight Execution
+        Ok(())
     }
 }
diff --git a/crates/vm/src/system/phantom/tests.rs b/crates/vm/src/system/phantom/tests.rs
index 7a0b068d36..14eec85e3c 100644
--- a/crates/vm/src/system/phantom/tests.rs
+++ b/crates/vm/src/system/phantom/tests.rs
@@ -1,34 +1,41 @@
-use std::sync::{Arc, Mutex};
-
 use openvm_instructions::{instruction::Instruction, SystemOpcode};
 use openvm_stark_backend::p3_field::{FieldAlgebra, PrimeField32};
 use openvm_stark_sdk::p3_baby_bear::BabyBear;
 
-use super::PhantomChip;
-use crate::arch::{instructions::LocalOpcode, testing::VmChipTestBuilder, ExecutionState};
+use super::PhantomExecutor;
+use crate::{
+    arch::{
+        instructions::LocalOpcode,
+        testing::{TestChipHarness, VmChipTestBuilder},
+        ExecutionState, VmChipWrapper,
+    },
+    system::phantom::{PhantomAir, PhantomFiller},
+};
 type F = BabyBear;
 
 #[test]
 fn test_nops_and_terminate() {
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = PhantomChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        SystemOpcode::CLASS_OFFSET,
-    );
-    chip.set_streams(Arc::new(Mutex::new(Default::default())));
+    let phantom_opcode = SystemOpcode::PHANTOM.global_opcode();
+    let executor = PhantomExecutor::<F>::new(Default::default(), phantom_opcode);
+    let air = PhantomAir {
+        execution_bridge: tester.execution_bridge(),
+        phantom_opcode,
+    };
+    let chip = VmChipWrapper::new(PhantomFiller, tester.memory_helper());
+    let num_nops = 5;
+    let mut harness = TestChipHarness::with_capacity(executor, air, chip, num_nops);
 
-    let nop = Instruction::from_isize(SystemOpcode::PHANTOM.global_opcode(), 0, 0, 0, 0, 0);
+    let nop = Instruction::from_isize(phantom_opcode, 0, 0, 0, 0, 0);
     let mut state: ExecutionState<F> = ExecutionState::new(F::ZERO, F::ONE);
-    let num_nops = 5;
     for _ in 0..num_nops {
-        tester.execute_with_pc(&mut chip, &nop, state.pc.as_canonical_u32());
+        tester.execute_with_pc(&mut harness, &nop, state.pc.as_canonical_u32());
         let new_state = tester.execution.records.last().unwrap().final_state;
         assert_eq!(state.pc + F::from_canonical_usize(4), new_state.pc);
         assert_eq!(state.timestamp + F::ONE, new_state.timestamp);
         state = new_state;
     }
 
-    let tester = tester.build().load(chip).finalize();
+    let tester = tester.build().load(harness).finalize();
     tester.simple_test().expect("Verification failed");
 }
diff --git a/crates/vm/src/system/poseidon2/air.rs b/crates/vm/src/system/poseidon2/air.rs
index 99769d253d..81b99148e5 100644
--- a/crates/vm/src/system/poseidon2/air.rs
+++ b/crates/vm/src/system/poseidon2/air.rs
@@ -22,7 +22,7 @@ use super::columns::Poseidon2PeripheryCols;
 #[derive(Clone, new, Debug)]
 pub struct Poseidon2PeripheryAir<F: Field, const SBOX_REGISTERS: usize> {
     pub(super) subair: Arc<Poseidon2SubAir<F, SBOX_REGISTERS>>,
-    pub(super) bus: LookupBus,
+    pub bus: LookupBus,
 }
 
 impl<F: Field, const SBOX_REGISTERS: usize> BaseAirWithPublicValues<F>
diff --git a/crates/vm/src/system/poseidon2/chip.rs b/crates/vm/src/system/poseidon2/chip.rs
index e0059f1ce1..f7053edcb5 100644
--- a/crates/vm/src/system/poseidon2/chip.rs
+++ b/crates/vm/src/system/poseidon2/chip.rs
@@ -1,14 +1,18 @@
 use std::{
     array,
-    sync::{atomic::AtomicU32, Arc},
+    sync::{
+        atomic::{AtomicBool, AtomicU32},
+        Arc,
+    },
 };
 
+use dashmap::DashMap;
 use openvm_poseidon2_air::{Poseidon2Config, Poseidon2SubChip};
 use openvm_stark_backend::{
     interaction::{BusIndex, LookupBus},
-    p3_field::PrimeField32,
+    p3_field::{Field, PrimeField32},
 };
-use rustc_hash::FxHashMap;
+use rustc_hash::FxBuildHasher;
 
 use super::{
     air::Poseidon2PeripheryAir, PERIPHERY_POSEIDON2_CHUNK_SIZE, PERIPHERY_POSEIDON2_WIDTH,
@@ -16,10 +20,11 @@ use super::{
 use crate::arch::hasher::{Hasher, HasherChip};
 
 #[derive(Debug)]
-pub struct Poseidon2PeripheryBaseChip<F: PrimeField32, const SBOX_REGISTERS: usize> {
+pub struct Poseidon2PeripheryBaseChip<F: Field, const SBOX_REGISTERS: usize> {
     pub air: Arc<Poseidon2PeripheryAir<F, SBOX_REGISTERS>>,
     pub subchip: Poseidon2SubChip<F, SBOX_REGISTERS>,
-    pub records: FxHashMap<[F; PERIPHERY_POSEIDON2_WIDTH], AtomicU32>,
+    pub records: DashMap<[F; PERIPHERY_POSEIDON2_WIDTH], AtomicU32, FxBuildHasher>,
+    pub nonempty: AtomicBool,
 }
 
 impl<F: PrimeField32, const SBOX_REGISTERS: usize> Poseidon2PeripheryBaseChip<F, SBOX_REGISTERS> {
@@ -31,7 +36,8 @@ impl<F: PrimeField32, const SBOX_REGISTERS: usize> Poseidon2PeripheryBaseChip<F,
                 LookupBus::new(bus_idx),
             )),
             subchip,
-            records: FxHashMap::default(),
+            records: DashMap::default(),
+            nonempty: AtomicBool::new(false),
         }
     }
 }
@@ -63,7 +69,7 @@ impl<F: PrimeField32, const SBOX_REGISTERS: usize> HasherChip<PERIPHERY_POSEIDON
     ///
     /// No interactions with other chips.
     fn compress_and_record(
-        &mut self,
+        &self,
         lhs: &[F; PERIPHERY_POSEIDON2_CHUNK_SIZE],
         rhs: &[F; PERIPHERY_POSEIDON2_CHUNK_SIZE],
     ) -> [F; PERIPHERY_POSEIDON2_CHUNK_SIZE] {
@@ -73,6 +79,8 @@ impl<F: PrimeField32, const SBOX_REGISTERS: usize> HasherChip<PERIPHERY_POSEIDON
 
         let count = self.records.entry(input).or_insert(AtomicU32::new(0));
         count.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+        self.nonempty
+            .store(true, std::sync::atomic::Ordering::Relaxed);
 
         let output = self.subchip.permute(input);
         array::from_fn(|i| output[i])
diff --git a/crates/vm/src/system/poseidon2/mod.rs b/crates/vm/src/system/poseidon2/mod.rs
index ac4632c9cb..608e5e5d02 100644
--- a/crates/vm/src/system/poseidon2/mod.rs
+++ b/crates/vm/src/system/poseidon2/mod.rs
@@ -8,13 +8,15 @@
 //! internal leaves of a Merkle tree but **not** as the leaf hash because `compress` does not
 //! add any padding.
 
-use openvm_poseidon2_air::Poseidon2Config;
+use std::sync::Arc;
+
+use openvm_circuit_primitives::Chip;
+use openvm_poseidon2_air::{Poseidon2Config, Poseidon2SubAir};
 use openvm_stark_backend::{
     config::{StarkGenericConfig, Val},
-    interaction::BusIndex,
-    p3_field::PrimeField32,
-    prover::types::AirProofInput,
-    AirRef, Chip, ChipUsageGetter,
+    interaction::{BusIndex, LookupBus},
+    p3_field::{Field, PrimeField32},
+    AirRef, ChipUsageGetter,
 };
 
 #[cfg(test)]
@@ -24,14 +26,19 @@ pub mod air;
 mod chip;
 pub use chip::*;
 
-use crate::arch::hasher::{Hasher, HasherChip};
+use crate::{
+    arch::hasher::{Hasher, HasherChip},
+    system::poseidon2::air::Poseidon2PeripheryAir,
+};
 pub mod columns;
 pub mod trace;
 
 pub const PERIPHERY_POSEIDON2_WIDTH: usize = 16;
 pub const PERIPHERY_POSEIDON2_CHUNK_SIZE: usize = 8;
 
-pub enum Poseidon2PeripheryChip<F: PrimeField32> {
+#[derive(Chip)]
+#[chip(where = "F: Field")]
+pub enum Poseidon2PeripheryChip<F: Field> {
     Register0(Poseidon2PeripheryBaseChip<F, 0>),
     Register1(Poseidon2PeripheryBaseChip<F, 1>),
 }
@@ -49,22 +56,21 @@ impl<F: PrimeField32> Poseidon2PeripheryChip<F> {
     }
 }
 
-impl<SC: StarkGenericConfig> Chip<SC> for Poseidon2PeripheryChip<Val<SC>>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        match self {
-            Poseidon2PeripheryChip::Register0(chip) => chip.air(),
-            Poseidon2PeripheryChip::Register1(chip) => chip.air(),
-        }
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        match self {
-            Poseidon2PeripheryChip::Register0(chip) => chip.generate_air_proof_input(),
-            Poseidon2PeripheryChip::Register1(chip) => chip.generate_air_proof_input(),
-        }
+pub fn new_poseidon2_periphery_air<SC: StarkGenericConfig>(
+    poseidon2_config: Poseidon2Config<Val<SC>>,
+    direct_bus: LookupBus,
+    max_constraint_degree: usize,
+) -> AirRef<SC> {
+    if max_constraint_degree >= 7 {
+        Arc::new(Poseidon2PeripheryAir::<Val<SC>, 0>::new(
+            Arc::new(Poseidon2SubAir::new(poseidon2_config.constants.into())),
+            direct_bus,
+        ))
+    } else {
+        Arc::new(Poseidon2PeripheryAir::<Val<SC>, 1>::new(
+            Arc::new(Poseidon2SubAir::new(poseidon2_config.constants.into())),
+            direct_bus,
+        ))
     }
 }
 
@@ -106,7 +112,7 @@ impl<F: PrimeField32> Hasher<PERIPHERY_POSEIDON2_CHUNK_SIZE, F> for Poseidon2Per
 
 impl<F: PrimeField32> HasherChip<PERIPHERY_POSEIDON2_CHUNK_SIZE, F> for Poseidon2PeripheryChip<F> {
     fn compress_and_record(
-        &mut self,
+        &self,
         lhs: &[F; PERIPHERY_POSEIDON2_CHUNK_SIZE],
         rhs: &[F; PERIPHERY_POSEIDON2_CHUNK_SIZE],
     ) -> [F; PERIPHERY_POSEIDON2_CHUNK_SIZE] {
diff --git a/crates/vm/src/system/poseidon2/tests.rs b/crates/vm/src/system/poseidon2/tests.rs
index 095c8acba4..31cbe9ff47 100644
--- a/crates/vm/src/system/poseidon2/tests.rs
+++ b/crates/vm/src/system/poseidon2/tests.rs
@@ -1,5 +1,9 @@
 use openvm_poseidon2_air::Poseidon2Config;
-use openvm_stark_backend::p3_field::{FieldAlgebra, PrimeField32};
+use openvm_stark_backend::{
+    interaction::LookupBus,
+    p3_field::{FieldAlgebra, PrimeField32},
+    AirRef,
+};
 use openvm_stark_sdk::{
     dummy_airs::interaction::dummy_interaction_air::{DummyInteractionChip, DummyInteractionData},
     p3_baby_bear::BabyBear,
@@ -10,13 +14,28 @@ use rand::RngCore;
 use crate::{
     arch::{
         hasher::{Hasher, HasherChip},
-        testing::{VmChipTestBuilder, POSEIDON2_DIRECT_BUS},
+        testing::{TestSC, VmChipTestBuilder, POSEIDON2_DIRECT_BUS},
     },
     system::poseidon2::{
-        Poseidon2PeripheryChip, PERIPHERY_POSEIDON2_CHUNK_SIZE, PERIPHERY_POSEIDON2_WIDTH,
+        new_poseidon2_periphery_air, Poseidon2PeripheryChip, PERIPHERY_POSEIDON2_CHUNK_SIZE,
+        PERIPHERY_POSEIDON2_WIDTH,
     },
 };
 
+fn create_test_chip() -> (AirRef<TestSC>, Poseidon2PeripheryChip<BabyBear>) {
+    let chip = Poseidon2PeripheryChip::<BabyBear>::new(
+        Poseidon2Config::default(),
+        POSEIDON2_DIRECT_BUS,
+        3,
+    );
+    let air = new_poseidon2_periphery_air::<TestSC>(
+        Poseidon2Config::default(),
+        LookupBus::new(POSEIDON2_DIRECT_BUS),
+        3,
+    );
+    (air, chip)
+}
+
 /// Test that the direct bus interactions work.
 #[test]
 fn poseidon2_periphery_direct_test() {
@@ -31,12 +50,7 @@ fn poseidon2_periphery_direct_test() {
             std::array::from_fn(|_| BabyBear::from_canonical_u32(rng.next_u32() % (1 << 30))),
         )
     });
-
-    let mut chip = Poseidon2PeripheryChip::<BabyBear>::new(
-        Poseidon2Config::default(),
-        POSEIDON2_DIRECT_BUS,
-        3,
-    );
+    let (air, chip) = create_test_chip();
 
     let outs: [[BabyBear; PERIPHERY_POSEIDON2_CHUNK_SIZE]; NUM_OPS] =
         std::array::from_fn(|i| chip.compress_and_record(&hashes[i].0, &hashes[i].1));
@@ -65,8 +79,8 @@ fn poseidon2_periphery_direct_test() {
     let tester = VmChipTestBuilder::default();
     let tester = tester
         .build()
-        .load(dummy_interaction_chip)
-        .load(chip)
+        .load_periphery((dummy_interaction_chip.air, dummy_interaction_chip))
+        .load_periphery_ref((air, chip))
         .finalize();
     tester.simple_test().expect("Verification failed");
 }
@@ -86,11 +100,7 @@ fn poseidon2_periphery_duplicate_hashes_test() {
     });
     let counts: [u32; NUM_OPS] = std::array::from_fn(|_| rng.next_u32() % 20);
 
-    let mut chip = Poseidon2PeripheryChip::<BabyBear>::new(
-        Poseidon2Config::default(),
-        POSEIDON2_DIRECT_BUS,
-        3,
-    );
+    let (air, chip) = create_test_chip();
 
     let outs: [[BabyBear; PERIPHERY_POSEIDON2_CHUNK_SIZE]; NUM_OPS] = std::array::from_fn(|i| {
         for _ in 0..counts[i] {
@@ -123,7 +133,7 @@ fn poseidon2_periphery_duplicate_hashes_test() {
     let tester = VmChipTestBuilder::default();
     tester
         .build()
-        .load(chip)
-        .load(dummy_interaction_chip)
+        .load_periphery_ref((air, chip))
+        .load_periphery((dummy_interaction_chip.air, dummy_interaction_chip))
         .finalize();
 }
diff --git a/crates/vm/src/system/poseidon2/trace.rs b/crates/vm/src/system/poseidon2/trace.rs
index 2b6f3e6b0b..26eb198ea2 100644
--- a/crates/vm/src/system/poseidon2/trace.rs
+++ b/crates/vm/src/system/poseidon2/trace.rs
@@ -1,4 +1,4 @@
-use std::borrow::BorrowMut;
+use std::{borrow::BorrowMut, sync::Arc};
 
 use openvm_circuit_primitives::utils::next_power_of_two_or_zero;
 use openvm_stark_backend::{
@@ -7,32 +7,33 @@ use openvm_stark_backend::{
     p3_field::{FieldAlgebra, PrimeField32},
     p3_matrix::dense::RowMajorMatrix,
     p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
-    rap::get_air_name,
-    AirRef, Chip, ChipUsageGetter,
+    prover::{cpu::CpuBackend, types::AirProvingContext},
+    Chip, ChipUsageGetter,
 };
 
 use super::{columns::*, Poseidon2PeripheryBaseChip, PERIPHERY_POSEIDON2_WIDTH};
 
-impl<SC: StarkGenericConfig, const SBOX_REGISTERS: usize> Chip<SC>
+impl<RA, SC: StarkGenericConfig, const SBOX_REGISTERS: usize> Chip<RA, CpuBackend<SC>>
     for Poseidon2PeripheryBaseChip<Val<SC>, SBOX_REGISTERS>
 where
     Val<SC>: PrimeField32,
 {
-    fn air(&self) -> AirRef<SC> {
-        self.air.clone()
-    }
-
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
+    /// Generates trace and clears internal records state.
+    fn generate_proving_ctx(&self, _: RA) -> AirProvingContext<CpuBackend<SC>> {
         let height = next_power_of_two_or_zero(self.current_trace_height());
         let width = self.trace_width();
 
         let mut inputs = Vec::with_capacity(height);
         let mut multiplicities = Vec::with_capacity(height);
-        let (actual_inputs, actual_multiplicities): (Vec<_>, Vec<_>) = self
-            .records
-            .into_par_iter()
-            .map(|(input, mult)| (input, mult.load(std::sync::atomic::Ordering::Relaxed)))
+        #[cfg(feature = "parallel")]
+        let records_iter = self.records.par_iter();
+        #[cfg(not(feature = "parallel"))]
+        let records_iter = self.records.iter();
+        let (actual_inputs, actual_multiplicities): (Vec<_>, Vec<_>) = records_iter
+            .map(|r| {
+                let (input, mult) = r.pair();
+                (*input, mult.load(std::sync::atomic::Ordering::Relaxed))
+            })
             .unzip();
         inputs.extend(actual_inputs);
         multiplicities.extend(actual_multiplicities);
@@ -54,8 +55,9 @@ where
                 let cols: &mut Poseidon2PeripheryCols<Val<SC>, SBOX_REGISTERS> = row.borrow_mut();
                 cols.mult = Val::<SC>::from_canonical_u32(mult);
             });
+        self.records.clear();
 
-        AirProofInput::simple_no_pis(RowMajorMatrix::new(values, width))
+        AirProvingContext::simple_no_pis(Arc::new(RowMajorMatrix::new(values, width)))
     }
 }
 
@@ -63,11 +65,16 @@ impl<F: PrimeField32, const SBOX_REGISTERS: usize> ChipUsageGetter
     for Poseidon2PeripheryBaseChip<F, SBOX_REGISTERS>
 {
     fn air_name(&self) -> String {
-        get_air_name(&self.air)
+        format!("Poseidon2PeripheryAir<F, {}>", SBOX_REGISTERS)
     }
 
     fn current_trace_height(&self) -> usize {
-        self.records.len()
+        if self.nonempty.load(std::sync::atomic::Ordering::Relaxed) {
+            // Not to call `DashMap::len` too often
+            self.records.len()
+        } else {
+            0
+        }
     }
 
     fn trace_width(&self) -> usize {
diff --git a/crates/vm/src/system/program/air.rs b/crates/vm/src/system/program/air.rs
index 6336d7ad59..7d085877f8 100644
--- a/crates/vm/src/system/program/air.rs
+++ b/crates/vm/src/system/program/air.rs
@@ -32,7 +32,7 @@ pub struct ProgramExecutionCols<T> {
     pub g: T,
 }
 
-#[derive(Clone, Copy, Debug)]
+#[derive(Clone, Copy, Debug, derive_new::new)]
 pub struct ProgramAir {
     pub bus: ProgramBus,
 }
diff --git a/crates/vm/src/system/program/mod.rs b/crates/vm/src/system/program/mod.rs
index 3d68632d0f..c0cbb23d97 100644
--- a/crates/vm/src/system/program/mod.rs
+++ b/crates/vm/src/system/program/mod.rs
@@ -1,10 +1,8 @@
-use openvm_instructions::{
-    instruction::{DebugInfo, Instruction},
-    program::Program,
+use openvm_instructions::instruction::Instruction;
+use openvm_stark_backend::{
+    config::StarkGenericConfig,
+    prover::{cpu::CpuBackend, types::CommittedTraceData},
 };
-use openvm_stark_backend::{p3_field::PrimeField64, ChipUsageGetter};
-
-use crate::{arch::ExecutionError, system::program::trace::padding_instruction};
 
 #[cfg(test)]
 pub mod tests;
@@ -18,88 +16,19 @@ pub use bus::*;
 
 const EXIT_CODE_FAIL: usize = 1;
 
-#[derive(Debug)]
-pub struct ProgramChip<F> {
-    pub air: ProgramAir,
-    pub program: Program<F>,
-    pub true_program_length: usize,
-    pub execution_frequencies: Vec<usize>,
+// For CPU backend only
+pub struct ProgramChip<SC: StarkGenericConfig> {
+    /// `i` -> frequency of instruction in `i`th row of trace matrix. This requires filtering
+    /// `program.instructions_and_debug_infos` to remove gaps.
+    pub(super) filtered_exec_frequencies: Vec<u32>,
+    pub(super) cached: Option<CommittedTraceData<CpuBackend<SC>>>,
 }
 
-impl<F: PrimeField64> ProgramChip<F> {
-    pub fn new(bus: ProgramBus) -> Self {
+impl<SC: StarkGenericConfig> ProgramChip<SC> {
+    pub(super) fn unloaded() -> Self {
         Self {
-            execution_frequencies: vec![],
-            program: Program::default(),
-            true_program_length: 0,
-            air: ProgramAir { bus },
-        }
-    }
-
-    pub fn new_with_program(program: Program<F>, bus: ProgramBus) -> Self {
-        let mut ret = Self::new(bus);
-        ret.set_program(program);
-        ret
-    }
-
-    pub fn set_program(&mut self, mut program: Program<F>) {
-        let true_program_length = program.len();
-        let mut number_actual_instructions = program.num_defined_instructions();
-        while !number_actual_instructions.is_power_of_two() {
-            program.push_instruction(padding_instruction());
-            number_actual_instructions += 1;
+            filtered_exec_frequencies: Vec::new(),
+            cached: None,
         }
-        self.true_program_length = true_program_length;
-        self.execution_frequencies = vec![0; program.len()];
-        self.program = program;
-    }
-
-    fn get_pc_index(&self, pc: u32) -> Result<usize, ExecutionError> {
-        let step = self.program.step;
-        let pc_base = self.program.pc_base;
-        let pc_index = ((pc - pc_base) / step) as usize;
-        if !(0..self.true_program_length).contains(&pc_index) {
-            return Err(ExecutionError::PcOutOfBounds {
-                pc,
-                step,
-                pc_base,
-                program_len: self.true_program_length,
-            });
-        }
-        Ok(pc_index)
-    }
-
-    pub fn get_instruction(
-        &mut self,
-        pc: u32,
-    ) -> Result<&(Instruction<F>, Option<DebugInfo>), ExecutionError> {
-        let pc_index = self.get_pc_index(pc)?;
-        self.execution_frequencies[pc_index] += 1;
-        self.program
-            .get_instruction_and_debug_info(pc_index)
-            .ok_or(ExecutionError::PcNotFound {
-                pc,
-                step: self.program.step,
-                pc_base: self.program.pc_base,
-                program_len: self.program.len(),
-            })
-    }
-}
-
-impl<F: PrimeField64> ChipUsageGetter for ProgramChip<F> {
-    fn air_name(&self) -> String {
-        "ProgramChip".to_string()
-    }
-
-    fn constant_trace_height(&self) -> Option<usize> {
-        Some(self.true_program_length.next_power_of_two())
-    }
-
-    fn current_trace_height(&self) -> usize {
-        self.true_program_length
-    }
-
-    fn trace_width(&self) -> usize {
-        1
     }
 }
diff --git a/crates/vm/src/system/program/tests/mod.rs b/crates/vm/src/system/program/tests/mod.rs
index 4a0293b348..5ea43eb51d 100644
--- a/crates/vm/src/system/program/tests/mod.rs
+++ b/crates/vm/src/system/program/tests/mod.rs
@@ -1,6 +1,7 @@
-use std::iter;
+use std::{iter, sync::Arc};
 
 use openvm_instructions::{
+    exe::VmExe,
     instruction::Instruction,
     program::{Program, DEFAULT_PC_STEP},
     LocalOpcode,
@@ -10,15 +11,19 @@ use openvm_native_compiler::{
 };
 use openvm_rv32im_transpiler::BranchEqualOpcode::*;
 use openvm_stark_backend::{
+    config::StarkGenericConfig,
+    engine::StarkEngine,
     p3_field::FieldAlgebra,
     p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
+    prover::types::AirProvingContext,
+    Chip,
 };
 use openvm_stark_sdk::{
     any_rap_arc_vec,
     config::{
         baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
         baby_bear_poseidon2_root::BabyBearPoseidon2RootConfig,
+        FriParameters,
     },
     dummy_airs::interaction::dummy_interaction_air::DummyInteractionAir,
     engine::StarkFriEngine,
@@ -29,30 +34,48 @@ use static_assertions::assert_impl_all;
 
 use crate::{
     arch::{instructions::SystemOpcode::*, testing::READ_INSTRUCTION_BUS},
-    system::program::{trace::VmCommittedExe, ProgramBus, ProgramChip},
+    system::program::{trace::VmCommittedExe, ProgramAir, ProgramBus, ProgramChip},
 };
 
 assert_impl_all!(VmCommittedExe<BabyBearPoseidon2Config>: Serialize, DeserializeOwned);
 assert_impl_all!(VmCommittedExe<BabyBearPoseidon2RootConfig>: Serialize, DeserializeOwned);
 
 fn interaction_test(program: Program<BabyBear>, execution: Vec<u32>) {
-    let bus = ProgramBus::new(READ_INSTRUCTION_BUS);
-    let mut chip = ProgramChip::new_with_program(program.clone(), bus);
     let mut execution_frequencies = vec![0; program.len()];
     for pc_idx in execution {
         execution_frequencies[pc_idx as usize] += 1;
-        chip.get_instruction(pc_idx * DEFAULT_PC_STEP).unwrap();
     }
-    let program_air = chip.air;
-    let program_proof_input = chip.generate_air_proof_input(None);
+    let filtered_exec_frequencies: Vec<_> = program
+        .instructions_and_debug_infos
+        .iter()
+        .enumerate()
+        .filter(|(_, entry)| entry.is_some())
+        .map(|(i, _)| execution_frequencies[i])
+        .collect();
+    let original_height = filtered_exec_frequencies.len();
+
+    let bus = ProgramBus::new(READ_INSTRUCTION_BUS);
+    let program_air = ProgramAir::new(bus);
+
+    let engine = BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(1));
+    let exe = VmExe::new(program);
+    let committed_exe =
+        VmCommittedExe::<BabyBearPoseidon2Config>::commit(exe, engine.config().pcs());
+    let cached = committed_exe.get_committed_trace();
+    let chip = ProgramChip {
+        filtered_exec_frequencies,
+        cached: Some(cached),
+    };
+    let ctx = chip.generate_proving_ctx(());
 
     let counter_air = DummyInteractionAir::new(9, true, bus.inner.index);
     let mut program_cells = vec![];
+    let program = &committed_exe.exe.program;
     for (index, frequency) in execution_frequencies.into_iter().enumerate() {
         let option = program.get_instruction_and_debug_info(index);
         if let Some((instruction, _)) = option {
             program_cells.extend([
-                BabyBear::from_canonical_usize(frequency), // hacky: we should switch execution_frequencies into hashmap
+                BabyBear::from_canonical_u32(frequency),
                 BabyBear::from_canonical_usize(index * (DEFAULT_PC_STEP as usize)),
                 instruction.opcode.to_field(),
                 instruction.a,
@@ -68,23 +91,20 @@ fn interaction_test(program: Program<BabyBear>, execution: Vec<u32>) {
 
     // Pad program cells with zeroes to make height a power of two.
     let width = 10;
-    let original_height = program.num_defined_instructions();
     let desired_height = original_height.next_power_of_two();
     let cells_to_add = (desired_height - original_height) * width;
     program_cells.extend(iter::repeat_n(BabyBear::ZERO, cells_to_add));
 
-    let counter_trace = RowMajorMatrix::new(program_cells, 10);
+    let counter_trace = Arc::new(RowMajorMatrix::new(program_cells, 10));
     println!("trace height = {}", original_height);
     println!("counter trace height = {}", counter_trace.height());
 
-    BabyBearPoseidon2Engine::run_test_fast(
-        any_rap_arc_vec!(program_air, counter_air),
-        vec![
-            program_proof_input,
-            AirProofInput::simple_no_pis(counter_trace),
-        ],
-    )
-    .expect("Verification failed");
+    engine
+        .run_test(
+            any_rap_arc_vec!(program_air, counter_air),
+            vec![ctx, AirProvingContext::simple_no_pis(counter_trace)],
+        )
+        .expect("Verification failed");
 }
 
 #[test]
@@ -178,21 +198,25 @@ fn test_program_negative() {
     ];
     let bus = ProgramBus::new(READ_INSTRUCTION_BUS);
     let program = Program::from_instructions(&instructions);
+    let program_air = ProgramAir::new(bus);
 
-    let mut chip = ProgramChip::new_with_program(program, bus);
     let execution_frequencies = vec![1; instructions.len()];
-    for pc_idx in 0..instructions.len() {
-        chip.get_instruction(pc_idx as u32 * DEFAULT_PC_STEP)
-            .unwrap();
-    }
-    let program_air = chip.air;
-    let program_proof_input = chip.generate_air_proof_input(None);
+    let engine = BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(1));
+    let exe = VmExe::new(program);
+    let committed_exe =
+        VmCommittedExe::<BabyBearPoseidon2Config>::commit(exe, engine.config().pcs());
+    let cached = committed_exe.get_committed_trace();
+    let chip = ProgramChip {
+        filtered_exec_frequencies: execution_frequencies.clone(),
+        cached: Some(cached),
+    };
+    let ctx = chip.generate_proving_ctx(());
 
     let counter_air = DummyInteractionAir::new(7, true, bus.inner.index);
     let mut program_rows = vec![];
     for (pc_idx, instruction) in instructions.iter().enumerate() {
         program_rows.extend(vec![
-            BabyBear::from_canonical_usize(execution_frequencies[pc_idx]),
+            BabyBear::from_canonical_u32(execution_frequencies[pc_idx]),
             BabyBear::from_canonical_usize(pc_idx * DEFAULT_PC_STEP as usize),
             instruction.opcode.to_field(),
             instruction.a,
@@ -204,15 +228,14 @@ fn test_program_negative() {
     }
     let mut counter_trace = RowMajorMatrix::new(program_rows, 8);
     counter_trace.row_mut(1)[1] = BabyBear::ZERO;
+    let counter_trace = Arc::new(counter_trace);
 
-    BabyBearPoseidon2Engine::run_test_fast(
-        any_rap_arc_vec!(program_air, counter_air),
-        vec![
-            program_proof_input,
-            AirProofInput::simple_no_pis(counter_trace),
-        ],
-    )
-    .expect("Verification failed");
+    engine
+        .run_test(
+            any_rap_arc_vec!(program_air, counter_air),
+            vec![ctx, AirProvingContext::simple_no_pis(counter_trace)],
+        )
+        .expect("Verification failed");
 }
 
 #[test]
@@ -265,7 +288,7 @@ fn test_program_with_undefined_instructions() {
         )),
     ];
 
-    let program = Program::new_without_debug_infos_with_option(&instructions, DEFAULT_PC_STEP, 0);
+    let program = Program::new_without_debug_infos_with_option(&instructions, 0);
 
     interaction_test(program, vec![0, 2, 5]);
 }
diff --git a/crates/vm/src/system/program/trace.rs b/crates/vm/src/system/program/trace.rs
index d9e2abd956..d22a5ba136 100644
--- a/crates/vm/src/system/program/trace.rs
+++ b/crates/vm/src/system/program/trace.rs
@@ -3,63 +3,82 @@ use std::{borrow::BorrowMut, sync::Arc};
 use derivative::Derivative;
 use itertools::Itertools;
 use openvm_circuit::arch::hasher::poseidon2::Poseidon2Hasher;
-use openvm_instructions::{exe::VmExe, program::Program, LocalOpcode, SystemOpcode};
+use openvm_instructions::{
+    exe::VmExe,
+    program::{Program, DEFAULT_PC_STEP},
+    LocalOpcode, SystemOpcode,
+};
 use openvm_stark_backend::{
-    config::{Com, Domain, StarkGenericConfig, Val},
-    p3_commit::{Pcs, PolynomialSpace},
-    p3_field::{Field, FieldAlgebra, PrimeField32, PrimeField64},
+    config::{Com, PcsProverData, StarkGenericConfig, Val},
+    p3_commit::Pcs,
+    p3_field::{Field, FieldAlgebra, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
     p3_maybe_rayon::prelude::*,
+    p3_util::log2_strict_usize,
     prover::{
-        helper::AirProofInputTestHelper,
-        types::{AirProofInput, AirProofRawInput, CommittedTraceData},
+        cpu::{self, CpuBackend},
+        types::{AirProvingContext, CommittedTraceData},
     },
+    Chip,
 };
 use serde::{Deserialize, Serialize};
 
-use super::{Instruction, ProgramChip, ProgramExecutionCols, EXIT_CODE_FAIL};
+use super::{Instruction, ProgramExecutionCols, EXIT_CODE_FAIL};
 use crate::{
     arch::{
         hasher::{poseidon2::vm_poseidon2_hasher, Hasher},
         MemoryConfig,
     },
-    system::memory::{tree::MemoryNode, AddressMap, CHUNK},
+    system::{
+        memory::{merkle::MerkleTree, AddressMap, CHUNK},
+        program::ProgramChip,
+    },
 };
 
+/// **Note**: this struct stores the program ROM twice: once in [VmExe] and once as a cached trace
+/// matrix `trace`.
 #[derive(Serialize, Deserialize, Derivative)]
 #[serde(bound(
-    serialize = "VmExe<Val<SC>>: Serialize, CommittedTraceData<SC>: Serialize",
-    deserialize = "VmExe<Val<SC>>: Deserialize<'de>, CommittedTraceData<SC>: Deserialize<'de>"
+    serialize = "VmExe<Val<SC>>: Serialize, Com<SC>: Serialize, PcsProverData<SC>: Serialize",
+    deserialize = "VmExe<Val<SC>>: Deserialize<'de>, Com<SC>: Deserialize<'de>, PcsProverData<SC>: Deserialize<'de>"
 ))]
 #[derivative(Clone(bound = "Com<SC>: Clone"))]
 pub struct VmCommittedExe<SC: StarkGenericConfig> {
     /// Raw executable.
-    pub exe: VmExe<Val<SC>>,
-    /// Committed program trace.
-    pub committed_program: CommittedTraceData<SC>,
+    pub exe: Arc<VmExe<Val<SC>>>,
+    program_commitment: Com<SC>,
+    /// Program ROM as cached trace matrix.
+    pub trace: Arc<RowMajorMatrix<Val<SC>>>,
+    pub prover_data: Arc<PcsProverData<SC>>,
 }
 
-impl<SC: StarkGenericConfig> VmCommittedExe<SC>
-where
-    Val<SC>: PrimeField32,
-{
+impl<SC: StarkGenericConfig> VmCommittedExe<SC> {
     /// Creates [VmCommittedExe] from [VmExe] by using `pcs` to commit to the
     /// program code as a _cached trace_ matrix.
     pub fn commit(exe: VmExe<Val<SC>>, pcs: &SC::Pcs) -> Self {
-        let cached_trace = generate_cached_trace(&exe.program);
-        let domain = pcs.natural_domain_for_degree(cached_trace.height());
-        let (commitment, pcs_data) = pcs.commit(vec![(domain, cached_trace.clone())]);
+        let trace = generate_cached_trace(&exe.program);
+        let domain = pcs.natural_domain_for_degree(trace.height());
+
+        let (program_commitment, data) = pcs.commit(vec![(domain, trace.clone())]);
         Self {
-            committed_program: CommittedTraceData {
-                trace: Arc::new(cached_trace),
-                commitment,
-                pcs_data: Arc::new(pcs_data),
-            },
-            exe,
+            exe: Arc::new(exe),
+            program_commitment,
+            trace: Arc::new(trace),
+            prover_data: Arc::new(data),
         }
     }
     pub fn get_program_commit(&self) -> Com<SC> {
-        self.committed_program.commitment.clone()
+        self.program_commitment.clone()
+    }
+
+    pub fn get_committed_trace(&self) -> CommittedTraceData<CpuBackend<SC>> {
+        let log_trace_height: u8 = log2_strict_usize(self.trace.height()).try_into().unwrap();
+        let data = cpu::PcsData::new(self.prover_data.clone(), vec![log_trace_height]);
+        CommittedTraceData {
+            commitment: self.program_commitment.clone(),
+            trace: self.trace.clone(),
+            data,
+        }
     }
 
     /// Computes a commitment to [VmCommittedExe]. This is a Merklelized hash of:
@@ -74,65 +93,51 @@ where
     /// and a cryptographic compression function (for internal nodes).
     ///
     /// **Note**: This function recomputes the Merkle tree for the initial memory image.
-    pub fn compute_exe_commit(&self, memory_config: &MemoryConfig) -> Com<SC>
+    pub fn compute_exe_commit(
+        program_commitment: &Com<SC>,
+        exe: &VmExe<Val<SC>>,
+        memory_config: &MemoryConfig,
+    ) -> Com<SC>
     where
         Com<SC>: AsRef<[Val<SC>; CHUNK]> + From<[Val<SC>; CHUNK]>,
+        Val<SC>: PrimeField32,
     {
         let hasher = vm_poseidon2_hasher();
         let memory_dimensions = memory_config.memory_dimensions();
-        let app_program_commit: &[Val<SC>; CHUNK] = self.committed_program.commitment.as_ref();
+        let app_program_commit: &[Val<SC>; CHUNK] = program_commitment.as_ref();
         let mem_config = memory_config;
-        let init_memory_commit = MemoryNode::tree_from_memory(
-            memory_dimensions,
-            &AddressMap::from_iter(
-                mem_config.as_offset,
-                1 << mem_config.as_height,
-                1 << mem_config.pointer_max_bits,
-                self.exe.init_memory.clone(),
-            ),
-            &hasher,
-        )
-        .hash();
+        let mut memory_image = AddressMap::new(mem_config.addr_spaces.clone());
+        memory_image.set_from_sparse(&exe.init_memory);
+        let init_memory_commit =
+            MerkleTree::from_memory(&memory_image, &memory_dimensions, &hasher).root();
         Com::<SC>::from(compute_exe_commit(
             &hasher,
             app_program_commit,
             &init_memory_commit,
-            Val::<SC>::from_canonical_u32(self.exe.pc_start),
+            Val::<SC>::from_canonical_u32(exe.pc_start),
         ))
     }
 }
 
-impl<F: PrimeField64> ProgramChip<F> {
-    pub fn generate_air_proof_input<SC: StarkGenericConfig>(
-        self,
-        cached: Option<CommittedTraceData<SC>>,
-    ) -> AirProofInput<SC>
-    where
-        Domain<SC>: PolynomialSpace<Val = F>,
-    {
-        let common_trace = RowMajorMatrix::new_col(
-            self.execution_frequencies
-                .into_iter()
-                .zip_eq(self.program.instructions_and_debug_infos.iter())
-                .filter_map(|(frequency, option)| {
-                    option.as_ref().map(|_| F::from_canonical_usize(frequency))
-                })
-                .collect::<Vec<F>>(),
-        );
-        if let Some(cached) = cached {
-            AirProofInput {
-                cached_mains_pdata: vec![(cached.commitment, cached.pcs_data)],
-                raw: AirProofRawInput {
-                    cached_mains: vec![cached.trace],
-                    common_main: Some(common_trace),
-                    public_values: vec![],
-                },
-            }
-        } else {
-            AirProofInput::cached_traces_no_pis(
-                vec![generate_cached_trace(&self.program)],
-                common_trace,
-            )
+impl<RA, SC: StarkGenericConfig> Chip<RA, CpuBackend<SC>> for ProgramChip<SC> {
+    /// The cached program trace is cloned and left for future use. The clone is cheap because the
+    /// cached trace is behind smart pointers. The execution frequencies are left unchanged.
+    fn generate_proving_ctx(&self, _: RA) -> AirProvingContext<CpuBackend<SC>> {
+        let cached = self
+            .cached
+            .clone()
+            .expect("cached program trace must be loaded");
+        assert!(self.filtered_exec_frequencies.len() <= cached.trace.height());
+        let mut freqs = Val::<SC>::zero_vec(cached.trace.height());
+        freqs
+            .par_iter_mut()
+            .zip(self.filtered_exec_frequencies.par_iter())
+            .for_each(|(f, x)| *f = Val::<SC>::from_canonical_u32(*x));
+        let common_trace = RowMajorMatrix::new_col(freqs);
+        AirProvingContext {
+            cached_mains: vec![cached],
+            common_main: Some(Arc::new(common_trace)),
+            public_values: vec![],
         }
     }
 }
@@ -158,7 +163,7 @@ pub fn compute_exe_commit<F: PrimeField32>(
     hasher.compress(&hasher.compress(&program_hash, &memory_hash), &pc_hash)
 }
 
-pub(crate) fn generate_cached_trace<F: PrimeField64>(program: &Program<F>) -> RowMajorMatrix<F> {
+pub(crate) fn generate_cached_trace<F: Field>(program: &Program<F>) -> RowMajorMatrix<F> {
     let width = ProgramExecutionCols::<F>::width();
     let mut instructions = program
         .enumerate_by_pc()
@@ -169,7 +174,7 @@ pub(crate) fn generate_cached_trace<F: PrimeField64>(program: &Program<F>) -> Ro
     let padding = padding_instruction();
     while !instructions.len().is_power_of_two() {
         instructions.push((
-            program.pc_base + instructions.len() as u32 * program.step,
+            program.pc_base + instructions.len() as u32 * DEFAULT_PC_STEP,
             padding.clone(),
         ));
     }
diff --git a/crates/vm/src/system/public_values/core.rs b/crates/vm/src/system/public_values/core.rs
index de189f101b..a4120ef628 100644
--- a/crates/vm/src/system/public_values/core.rs
+++ b/crates/vm/src/system/public_values/core.rs
@@ -1,8 +1,12 @@
-use std::sync::Mutex;
+use std::marker::PhantomData;
 
-use openvm_circuit_primitives::{encoder::Encoder, SubAir};
+use getset::Setters;
+use openvm_circuit_primitives::{encoder::Encoder, AlignedBytesBorrow, SubAir};
 use openvm_instructions::{
-    instruction::Instruction, LocalOpcode, PublishOpcode, PublishOpcode::PUBLISH,
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    LocalOpcode,
+    PublishOpcode::{self, PUBLISH},
 };
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -10,17 +14,21 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
 
 use crate::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, MinimalInstruction,
-        Result, VmAdapterInterface, VmCoreAir, VmCoreChip,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, EmptyAdapterCoreLayout, ExecutionError, MinimalInstruction,
+        PreflightExecutor, RecordArena, TraceFiller, VmCoreAir, VmStateMut,
+    },
+    system::{
+        memory::{online::TracingMemory, MemoryAuxColsFactory},
+        native_adapter::NativeAdapterExecutor,
+        public_values::columns::PublicValuesCoreColsView,
     },
-    system::public_values::columns::PublicValuesCoreColsView,
 };
+
 pub(crate) type AdapterInterface<F> = BasicAdapterInterface<F, MinimalInstruction<F>, 2, 0, 1, 1>;
-pub(crate) type AdapterInterfaceReads<F> = <AdapterInterface<F> as VmAdapterInterface<F>>::Reads;
 
 #[derive(Clone, Debug)]
 pub struct PublicValuesCoreAir {
@@ -99,95 +107,139 @@ impl<AB: InteractionBuilder + AirBuilderWithPublicValues> VmCoreAir<AB, AdapterI
 }
 
 #[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(AlignedBytesBorrow, Debug)]
 pub struct PublicValuesRecord<F> {
-    value: F,
-    index: F,
+    pub value: F,
+    pub index: F,
 }
 
 /// ATTENTION: If a specific public value is not provided, a default 0 will be used when generating
 /// the proof but in the perspective of constraints, it could be any value.
-pub struct PublicValuesCoreChip<F> {
-    air: PublicValuesCoreAir,
-    // Mutex is to make the struct Sync. But it actually won't be accessed by multiple threads.
-    custom_pvs: Mutex<Vec<Option<F>>>,
+#[derive(Clone)]
+pub struct PublicValuesExecutor<F, A = NativeAdapterExecutor<F, 2, 0>> {
+    adapter: A,
+    phantom: PhantomData<F>,
+}
+
+#[derive(Clone, Setters)]
+pub struct PublicValuesFiller<F, A = NativeAdapterExecutor<F, 2, 0>> {
+    adapter: A,
+    encoder: Encoder,
+    num_custom_pvs: usize,
+    public_values: Vec<F>,
+}
+
+impl<F: Clone, A> PublicValuesExecutor<F, A> {
+    pub fn new(adapter: A) -> Self {
+        Self {
+            adapter,
+            phantom: PhantomData,
+        }
+    }
 }
 
-impl<F: PrimeField32> PublicValuesCoreChip<F> {
+impl<F: Clone, A> PublicValuesFiller<F, A> {
     /// **Note:** `max_degree` is the maximum degree of the constraint polynomials to represent the
     /// flags. If you want the overall AIR's constraint degree to be `<= max_constraint_degree`,
     /// then typically you should set `max_degree` to `max_constraint_degree - 1`.
-    pub fn new(num_custom_pvs: usize, max_degree: u32) -> Self {
+    pub fn new(adapter: A, num_custom_pvs: usize, max_degree: u32) -> Self {
         Self {
-            air: PublicValuesCoreAir::new(num_custom_pvs, max_degree),
-            custom_pvs: Mutex::new(vec![None; num_custom_pvs]),
+            adapter,
+            encoder: Encoder::new(num_custom_pvs, max_degree, true),
+            num_custom_pvs,
+            public_values: Vec::new(),
         }
     }
-    pub fn get_custom_public_values(&self) -> Vec<Option<F>> {
-        self.custom_pvs.lock().unwrap().clone()
+
+    pub fn set_public_values(&mut self, public_values: Vec<F>)
+    where
+        F: Field,
+    {
+        assert_eq!(public_values.len(), self.num_custom_pvs);
+        self.public_values = public_values;
     }
 }
 
-impl<F: PrimeField32> VmCoreChip<F, AdapterInterface<F>> for PublicValuesCoreChip<F> {
-    type Record = PublicValuesRecord<F>;
-    type Air = PublicValuesCoreAir;
+impl<F, A, RA> PreflightExecutor<F, RA> for PublicValuesExecutor<F, A>
+where
+    F: PrimeField32,
+    A: 'static + Clone + AdapterTraceExecutor<F, ReadData = [[F; 1]; 2], WriteData = [[F; 1]; 0]>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut PublicValuesRecord<F>),
+    >,
+{
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            PublishOpcode::from_usize(opcode - PublishOpcode::CLASS_OFFSET)
+        )
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
-        _instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: AdapterInterfaceReads<F>,
-    ) -> Result<(AdapterRuntimeContext<F, AdapterInterface<F>>, Self::Record)> {
-        let [[value], [index]] = reads;
+        state: VmStateMut<F, TracingMemory, RA>,
+        instruction: &Instruction<F>,
+    ) -> Result<(), ExecutionError> {
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        [[core_record.value], [core_record.index]] =
+            self.adapter
+                .read(state.memory, instruction, &mut adapter_record);
         {
-            let idx: usize = index.as_canonical_u32() as usize;
-            let mut custom_pvs = self.custom_pvs.lock().unwrap();
+            let idx: usize = core_record.index.as_canonical_u32() as usize;
+            let custom_pvs = state.custom_pvs;
 
             if custom_pvs[idx].is_none() {
-                custom_pvs[idx] = Some(value);
+                custom_pvs[idx] = Some(core_record.value);
             } else {
                 // Not a hard constraint violation when publishing the same value twice but the
                 // program should avoid that.
                 panic!("Custom public value {} already set", idx);
             }
         }
-        let output = AdapterRuntimeContext {
-            to_pc: None,
-            writes: [],
-        };
-        let record = Self::Record { value, index };
-        Ok((output, record))
-    }
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            PublishOpcode::from_usize(opcode - PublishOpcode::CLASS_OFFSET)
-        )
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
     }
+}
+
+impl<F, A> TraceFiller<F> for PublicValuesFiller<F, A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &PublicValuesRecord<F> = unsafe { get_record_from_slice(&mut core_row, ()) };
+        let cols = PublicValuesCoreColsView::<_, &mut F>::borrow_mut(core_row);
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let mut cols = PublicValuesCoreColsView::<_, &mut F>::borrow_mut(row_slice);
-        debug_assert_eq!(cols.width(), BaseAir::<F>::width(&self.air));
-        *cols.is_valid = F::ONE;
-        *cols.value = record.value;
-        *cols.index = record.index;
         let idx: usize = record.index.as_canonical_u32() as usize;
-        let pt = self.air.encoder.get_flag_pt(idx);
-        for (i, var) in cols.custom_pv_vars.iter_mut().enumerate() {
-            **var = F::from_canonical_u32(pt[i]);
-        }
-    }
+        let pt = self.encoder.get_flag_pt(idx);
 
-    fn generate_public_values(&self) -> Vec<F> {
-        self.get_custom_public_values()
+        cols.custom_pv_vars
             .into_iter()
-            .map(|x| x.unwrap_or(F::ZERO))
-            .collect()
+            .zip(pt.iter())
+            .for_each(|(var, &val)| {
+                *var = F::from_canonical_u32(val);
+            });
+
+        *cols.index = record.index;
+        *cols.value = record.value;
+        *cols.is_valid = F::ONE;
     }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+    fn generate_public_values(&self) -> Vec<F> {
+        assert_eq!(
+            self.public_values.len(),
+            self.num_custom_pvs,
+            "Did not set public values"
+        );
+        self.public_values.clone()
     }
 }
diff --git a/crates/vm/src/system/public_values/execution.rs b/crates/vm/src/system/public_values/execution.rs
new file mode 100644
index 0000000000..34c1f22ff0
--- /dev/null
+++ b/crates/vm/src/system/public_values/execution.rs
@@ -0,0 +1,179 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_IMM_AS, NATIVE_AS,
+};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::PublicValuesExecutor;
+use crate::{
+    arch::{
+        execution_mode::{ExecutionCtxTrait, MeteredExecutionCtxTrait},
+        E2PreCompute, ExecuteFunc, Executor, MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+    utils::{transmute_field_to_u32, transmute_u32_to_field},
+};
+
+#[derive(AlignedBytesBorrow)]
+#[repr(C)]
+struct PublicValuesPreCompute {
+    b_or_imm: u32,
+    c_or_imm: u32,
+}
+
+impl<F, A> PublicValuesExecutor<F, A>
+where
+    F: PrimeField32,
+{
+    fn pre_compute_impl(
+        &self,
+        inst: &Instruction<F>,
+        data: &mut PublicValuesPreCompute,
+    ) -> (bool, bool) {
+        let &Instruction { b, c, e, f, .. } = inst;
+
+        let e = e.as_canonical_u32();
+        let f = f.as_canonical_u32();
+
+        let b_is_imm = e == RV32_IMM_AS;
+        let c_is_imm = f == RV32_IMM_AS;
+
+        let b_or_imm = if b_is_imm {
+            transmute_field_to_u32(&b)
+        } else {
+            b.as_canonical_u32()
+        };
+        let c_or_imm = if c_is_imm {
+            transmute_field_to_u32(&c)
+        } else {
+            c.as_canonical_u32()
+        };
+
+        *data = PublicValuesPreCompute { b_or_imm, c_or_imm };
+
+        (b_is_imm, c_is_imm)
+    }
+}
+
+impl<F, A> Executor<F> for PublicValuesExecutor<F, A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<PublicValuesPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx>(
+        &self,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut PublicValuesPreCompute = data.borrow_mut();
+        let (b_is_imm, c_is_imm) = self.pre_compute_impl(inst, data);
+
+        let fn_ptr = match (b_is_imm, c_is_imm) {
+            (true, true) => execute_e1_impl::<_, _, true, true>,
+            (true, false) => execute_e1_impl::<_, _, true, false>,
+            (false, true) => execute_e1_impl::<_, _, false, true>,
+            (false, false) => execute_e1_impl::<_, _, false, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A> MeteredExecutor<F> for PublicValuesExecutor<F, A>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<PublicValuesPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<PublicValuesPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let (b_is_imm, c_is_imm) = self.pre_compute_impl(inst, &mut data.data);
+
+        let fn_ptr = match (b_is_imm, c_is_imm) {
+            (true, true) => execute_e2_impl::<_, _, true, true>,
+            (true, false) => execute_e2_impl::<_, _, true, false>,
+            (false, true) => execute_e2_impl::<_, _, false, true>,
+            (false, false) => execute_e2_impl::<_, _, false, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX, const B_IS_IMM: bool, const C_IS_IMM: bool>(
+    pre_compute: &PublicValuesPreCompute,
+    state: &mut VmExecState<F, GuestMemory, CTX>,
+) where
+    CTX: ExecutionCtxTrait,
+{
+    let value = if B_IS_IMM {
+        transmute_u32_to_field(&pre_compute.b_or_imm)
+    } else {
+        state.vm_read::<F, 1>(NATIVE_AS, pre_compute.b_or_imm)[0]
+    };
+    let index = if C_IS_IMM {
+        transmute_u32_to_field(&pre_compute.c_or_imm)
+    } else {
+        state.vm_read::<F, 1>(NATIVE_AS, pre_compute.c_or_imm)[0]
+    };
+
+    let idx: usize = index.as_canonical_u32() as usize;
+    {
+        let custom_pvs = &mut state.vm_state.custom_pvs;
+
+        if custom_pvs[idx].is_none() {
+            custom_pvs[idx] = Some(value);
+        } else {
+            // Not a hard constraint violation when publishing the same value twice but the
+            // program should avoid that.
+            panic!("Custom public value {} already set", idx);
+        }
+    }
+    state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+    state.instret += 1;
+}
+
+#[inline(always)]
+unsafe fn execute_e1_impl<F: PrimeField32, CTX, const B_IS_IMM: bool, const C_IS_IMM: bool>(
+    pre_compute: &[u8],
+    state: &mut VmExecState<F, GuestMemory, CTX>,
+) where
+    CTX: ExecutionCtxTrait,
+{
+    let pre_compute: &PublicValuesPreCompute = pre_compute.borrow();
+    execute_e12_impl::<_, _, B_IS_IMM, C_IS_IMM>(pre_compute, state);
+}
+
+#[inline(always)]
+unsafe fn execute_e2_impl<F: PrimeField32, CTX, const B_IS_IMM: bool, const C_IS_IMM: bool>(
+    pre_compute: &[u8],
+    state: &mut VmExecState<F, GuestMemory, CTX>,
+) where
+    CTX: MeteredExecutionCtxTrait,
+{
+    let pre_compute: &E2PreCompute<PublicValuesPreCompute> = pre_compute.borrow();
+    state.ctx.on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<_, _, B_IS_IMM, C_IS_IMM>(&pre_compute.data, state);
+}
diff --git a/crates/vm/src/system/public_values/mod.rs b/crates/vm/src/system/public_values/mod.rs
index 918606497b..84fe686005 100644
--- a/crates/vm/src/system/public_values/mod.rs
+++ b/crates/vm/src/system/public_values/mod.rs
@@ -1,18 +1,16 @@
 use crate::{
     arch::{VmAirWrapper, VmChipWrapper},
-    system::{
-        native_adapter::{NativeAdapterAir, NativeAdapterChip},
-        public_values::core::{PublicValuesCoreAir, PublicValuesCoreChip},
-    },
+    system::native_adapter::NativeAdapterAir,
 };
 
 mod columns;
 /// Chip to publish custom public values from VM programs.
-pub mod core;
+mod core;
+mod execution;
+pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
 pub type PublicValuesAir = VmAirWrapper<NativeAdapterAir<2, 0>, PublicValuesCoreAir>;
-pub type PublicValuesChip<F> =
-    VmChipWrapper<F, NativeAdapterChip<F, 2, 0>, PublicValuesCoreChip<F>>;
+pub type PublicValuesChip<F> = VmChipWrapper<F, PublicValuesFiller<F>>;
diff --git a/crates/vm/src/system/public_values/tests.rs b/crates/vm/src/system/public_values/tests.rs
index dbf9dc217d..4798e84340 100644
--- a/crates/vm/src/system/public_values/tests.rs
+++ b/crates/vm/src/system/public_values/tests.rs
@@ -1,30 +1,44 @@
 use std::sync::Arc;
 
+use openvm_instructions::{
+    instruction::Instruction, riscv::RV32_IMM_AS, LocalOpcode, PublishOpcode, NATIVE_AS,
+};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::{Air, AirBuilderWithPublicValues},
-    p3_field::{Field, FieldAlgebra},
+    p3_field::{Field, FieldAlgebra, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
+    prover::types::AirProvingContext,
     rap::PartitionedBaseAir,
     utils::disable_debug_builder,
     verifier::VerificationError,
     AirRef,
 };
 use openvm_stark_sdk::{
-    config::baby_bear_poseidon2::BabyBearPoseidon2Engine, engine::StarkFriEngine,
-    p3_baby_bear::BabyBear, utils::to_field_vec,
+    config::baby_bear_poseidon2::BabyBearPoseidon2Engine,
+    engine::StarkFriEngine,
+    p3_baby_bear::BabyBear,
+    utils::{create_seeded_rng, to_field_vec},
 };
+use rand::{rngs::StdRng, Rng};
 
 use crate::{
-    arch::VmCoreAir,
-    system::public_values::{
-        columns::PublicValuesCoreColsView,
-        core::{AdapterInterface, PublicValuesCoreAir},
+    arch::{
+        testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder},
+        MemoryConfig, SystemConfig, VmCoreAir,
+    },
+    system::{
+        native_adapter::{NativeAdapterAir, NativeAdapterExecutor},
+        public_values::{
+            columns::PublicValuesCoreColsView,
+            core::{AdapterInterface, PublicValuesCoreAir},
+            PublicValuesAir, PublicValuesChip, PublicValuesExecutor, PublicValuesFiller,
+        },
     },
 };
 
 type F = BabyBear;
+type Harness = TestChipHarness<F, PublicValuesExecutor<F>, PublicValuesAir, PublicValuesChip<F>>;
 
 impl<F: Field> PartitionedBaseAir<F> for PublicValuesCoreAir {}
 
@@ -38,6 +52,110 @@ impl<AB: InteractionBuilder + AirBuilderWithPublicValues> Air<AB> for PublicValu
     }
 }
 
+fn create_test_chips(tester: &VmChipTestBuilder<F>, system_config: &SystemConfig) -> Harness {
+    let num_custom_pvs = system_config.num_public_values;
+    let max_degree = system_config.max_constraint_degree as u32 - 1;
+
+    let air = PublicValuesAir::new(
+        NativeAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        PublicValuesCoreAir::new(num_custom_pvs, max_degree),
+    );
+
+    let executor = PublicValuesExecutor::new(NativeAdapterExecutor::<F, 2, 0>::default());
+
+    let cpu_chip = PublicValuesChip::new(
+        PublicValuesFiller::new(
+            NativeAdapterExecutor::<F, 2, 0>::default(),
+            num_custom_pvs,
+            max_degree,
+        ),
+        tester.memory_helper(),
+    );
+
+    Harness::with_capacity(executor, air, cpu_chip, num_custom_pvs)
+}
+
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    public_values: &mut Vec<F>,
+    idx: u32,
+) {
+    let (b, e) = if rng.gen_bool(0.5) {
+        let val = F::from_canonical_u32(rng.gen_range(0..F::ORDER_U32));
+        public_values.push(val);
+        (val, F::from_canonical_u32(RV32_IMM_AS))
+    } else {
+        let ptr = gen_pointer(rng, 4);
+        let val = F::from_canonical_u32(rng.gen_range(0..F::ORDER_U32));
+        public_values.push(val);
+        tester.write(NATIVE_AS as usize, ptr, [val]);
+        (
+            F::from_canonical_u32(ptr as u32),
+            F::from_canonical_u32(NATIVE_AS),
+        )
+    };
+
+    let (c, f) = if rng.gen_bool(0.5) {
+        (
+            F::from_canonical_u32(idx),
+            F::from_canonical_u32(RV32_IMM_AS),
+        )
+    } else {
+        let ptr = gen_pointer(rng, 4);
+        let val = F::from_canonical_u32(idx);
+        tester.write(NATIVE_AS as usize, ptr, [val]);
+        (
+            F::from_canonical_u32(ptr as u32),
+            F::from_canonical_u32(NATIVE_AS),
+        )
+    };
+
+    let instruction = Instruction {
+        opcode: PublishOpcode::PUBLISH.global_opcode(),
+        a: F::ZERO,
+        b,
+        c,
+        d: F::ZERO,
+        e,
+        f,
+        g: F::ZERO,
+    };
+
+    tester.execute(harness, &instruction);
+}
+
+#[test]
+fn public_values_rand_test() {
+    let mut rng = create_seeded_rng();
+    let system_config = SystemConfig::default();
+    let mem_config = MemoryConfig::default();
+    let mut tester = VmChipTestBuilder::volatile(mem_config);
+    tester.set_num_public_values(system_config.num_public_values);
+
+    let mut harness = create_test_chips(&tester, &system_config);
+
+    let mut public_values = Vec::new();
+    for idx in 0..system_config.num_public_values {
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &mut public_values,
+            idx as u32,
+        );
+    }
+    harness.chip.inner.set_public_values(public_values);
+
+    tester
+        .build()
+        .load(harness)
+        .finalize()
+        .simple_test()
+        .expect("Verification failed");
+}
+
 #[test]
 fn public_values_happy_path_1() {
     let cols = PublicValuesCoreColsView::<F, F> {
@@ -51,8 +169,11 @@ fn public_values_happy_path_1() {
     let trace = RowMajorMatrix::new_row(cols.flatten());
     let pvs = to_field_vec(vec![0, 0, 12]);
 
-    BabyBearPoseidon2Engine::run_test_fast(vec![air], vec![AirProofInput::simple(trace, pvs)])
-        .expect("Verification failed");
+    BabyBearPoseidon2Engine::run_test_fast(
+        vec![air],
+        vec![AirProvingContext::simple(Arc::new(trace), pvs)],
+    )
+    .expect("Verification failed");
 }
 
 #[test]
@@ -70,8 +191,11 @@ fn public_values_neg_pv_not_match() {
 
     disable_debug_builder();
     assert_eq!(
-        BabyBearPoseidon2Engine::run_test_fast(vec![air], vec![AirProofInput::simple(trace, pvs)])
-            .err(),
+        BabyBearPoseidon2Engine::run_test_fast(
+            vec![air],
+            vec![AirProvingContext::simple(Arc::new(trace), pvs)]
+        )
+        .err(),
         Some(VerificationError::OodEvaluationMismatch)
     );
 }
@@ -91,8 +215,11 @@ fn public_values_neg_index_out_of_bound() {
 
     disable_debug_builder();
     assert_eq!(
-        BabyBearPoseidon2Engine::run_test_fast(vec![air], vec![AirProofInput::simple(trace, pvs)])
-            .err(),
+        BabyBearPoseidon2Engine::run_test_fast(
+            vec![air],
+            vec![AirProvingContext::simple(Arc::new(trace), pvs)]
+        )
+        .err(),
         Some(VerificationError::OodEvaluationMismatch)
     );
 }
@@ -129,8 +256,11 @@ fn public_values_neg_double_publish_impl(actual_pv: u32) {
 
     disable_debug_builder();
     assert_eq!(
-        BabyBearPoseidon2Engine::run_test_fast(vec![air], vec![AirProofInput::simple(trace, pvs)])
-            .err(),
+        BabyBearPoseidon2Engine::run_test_fast(
+            vec![air],
+            vec![AirProvingContext::simple(Arc::new(trace), pvs)]
+        )
+        .err(),
         Some(VerificationError::OodEvaluationMismatch)
     );
 }
diff --git a/crates/vm/src/utils/mod.rs b/crates/vm/src/utils/mod.rs
index 7b4823c53a..0d86c280d9 100644
--- a/crates/vm/src/utils/mod.rs
+++ b/crates/vm/src/utils/mod.rs
@@ -1,10 +1,59 @@
 #[cfg(any(test, feature = "test-utils"))]
 mod stark_utils;
 #[cfg(any(test, feature = "test-utils"))]
-mod test_utils;
+pub mod test_utils;
+
+use std::mem::size_of_val;
 
 pub use openvm_circuit_primitives::utils::next_power_of_two_or_zero;
+use openvm_stark_backend::p3_field::PrimeField32;
 #[cfg(any(test, feature = "test-utils"))]
 pub use stark_utils::*;
 #[cfg(any(test, feature = "test-utils"))]
 pub use test_utils::*;
+
+#[inline(always)]
+pub fn transmute_field_to_u32<F: PrimeField32>(field: &F) -> u32 {
+    debug_assert_eq!(
+        std::mem::size_of::<F>(),
+        std::mem::size_of::<u32>(),
+        "Field type F must have the same size as u32"
+    );
+    debug_assert_eq!(
+        std::mem::align_of::<F>(),
+        std::mem::align_of::<u32>(),
+        "Field type F must have the same alignment as u32"
+    );
+    // SAFETY: This assumes that F has the same memory layout as u32.
+    // This is only safe for field types that are guaranteed to be represented
+    // as a single u32 internally
+    unsafe { *(field as *const F as *const u32) }
+}
+
+#[inline(always)]
+pub fn transmute_u32_to_field<F: PrimeField32>(value: &u32) -> F {
+    debug_assert_eq!(
+        std::mem::size_of::<F>(),
+        std::mem::size_of::<u32>(),
+        "Field type F must have the same size as u32"
+    );
+    debug_assert_eq!(
+        std::mem::align_of::<F>(),
+        std::mem::align_of::<u32>(),
+        "Field type F must have the same alignment as u32"
+    );
+    // SAFETY: This assumes that F has the same memory layout as u32.
+    // This is only safe for field types that are guaranteed to be represented
+    // as a single u32 internally
+    unsafe { *(value as *const u32 as *const F) }
+}
+
+/// # Safety
+/// The type `T` should be plain old data so there is no worry about [Drop] behavior in the
+/// transmutation.
+#[inline(always)]
+pub unsafe fn slice_as_bytes<T>(slice: &[T]) -> &[u8] {
+    let len = size_of_val(slice);
+    // SAFETY: length and alignment are correct.
+    unsafe { std::slice::from_raw_parts(slice.as_ptr() as *const u8, len) }
+}
diff --git a/crates/vm/src/utils/stark_utils.rs b/crates/vm/src/utils/stark_utils.rs
index d940be5c75..de1a834d30 100644
--- a/crates/vm/src/utils/stark_utils.rs
+++ b/crates/vm/src/utils/stark_utils.rs
@@ -1,170 +1,173 @@
-use itertools::multiunzip;
-use openvm_instructions::{exe::VmExe, program::Program};
+use openvm_instructions::exe::VmExe;
 use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
+    config::{Com, Val},
+    engine::VerificationData,
     p3_field::PrimeField32,
-    verifier::VerificationError,
-    Chip,
 };
 use openvm_stark_sdk::{
     config::{
         baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
         setup_tracing, FriParameters,
     },
-    engine::{StarkEngine, StarkFriEngine, VerificationDataWithFriParams},
+    engine::{StarkFriEngine, VerificationDataWithFriParams},
     p3_baby_bear::BabyBear,
-    utils::ProofInputForTest,
 };
 
-use crate::arch::{
-    vm::{VirtualMachine, VmExecutor},
-    Streams, VmConfig, VmMemoryState,
+use crate::{
+    arch::{
+        debug_proving_ctx, execution_mode::Segment, vm::VirtualMachine, Executor, ExitCode,
+        MatrixRecordArena, MeteredExecutor, PreflightExecutionOutput, PreflightExecutor, Streams,
+        VmBuilder, VmCircuitConfig, VmConfig, VmExecutionConfig,
+    },
+    system::memory::{MemoryImage, CHUNK},
 };
 
-pub fn air_test<VC>(config: VC, exe: impl Into<VmExe<BabyBear>>)
+// NOTE on trait bounds: the compiler cannot figure out Val<SC>=BabyBear without the
+// VmExecutionConfig and VmCircuitConfig bounds even though VmProverBuilder already includes them.
+// The compiler also seems to need the extra VC even though VC=VB::VmConfig
+pub fn air_test<VB, VC>(builder: VB, config: VC, exe: impl Into<VmExe<BabyBear>>)
 where
-    VC: VmConfig<BabyBear>,
-    VC::Executor: Chip<BabyBearPoseidon2Config>,
-    VC::Periphery: Chip<BabyBearPoseidon2Config>,
+    VB: VmBuilder<
+        BabyBearPoseidon2Engine,
+        VmConfig = VC,
+        RecordArena = MatrixRecordArena<BabyBear>,
+    >,
+    VC: VmExecutionConfig<BabyBear>
+        + VmCircuitConfig<BabyBearPoseidon2Config>
+        + VmConfig<BabyBearPoseidon2Config>,
+    <VC as VmExecutionConfig<BabyBear>>::Executor: Executor<BabyBear>
+        + MeteredExecutor<BabyBear>
+        + PreflightExecutor<BabyBear, MatrixRecordArena<BabyBear>>,
 {
-    air_test_with_min_segments(config, exe, Streams::default(), 1);
+    air_test_with_min_segments(builder, config, exe, Streams::default(), 1);
 }
 
 /// Executes and proves the VM and returns the final memory state.
-pub fn air_test_with_min_segments<VC>(
+pub fn air_test_with_min_segments<VB, VC>(
+    builder: VB,
     config: VC,
     exe: impl Into<VmExe<BabyBear>>,
     input: impl Into<Streams<BabyBear>>,
     min_segments: usize,
-) -> Option<VmMemoryState<BabyBear>>
+) -> Option<MemoryImage>
 where
-    VC: VmConfig<BabyBear>,
-    VC::Executor: Chip<BabyBearPoseidon2Config>,
-    VC::Periphery: Chip<BabyBearPoseidon2Config>,
+    VB: VmBuilder<
+        BabyBearPoseidon2Engine,
+        VmConfig = VC,
+        RecordArena = MatrixRecordArena<BabyBear>,
+    >,
+    VC: VmExecutionConfig<BabyBear>
+        + VmCircuitConfig<BabyBearPoseidon2Config>
+        + VmConfig<BabyBearPoseidon2Config>,
+    <VC as VmExecutionConfig<BabyBear>>::Executor: Executor<BabyBear>
+        + MeteredExecutor<BabyBear>
+        + PreflightExecutor<BabyBear, MatrixRecordArena<BabyBear>>,
 {
-    air_test_impl(config, exe, input, min_segments, true)
+    let mut log_blowup = 1;
+    while config.as_ref().max_constraint_degree > (1 << log_blowup) + 1 {
+        log_blowup += 1;
+    }
+    let fri_params = FriParameters::new_for_testing(log_blowup);
+    let (final_memory, _) = air_test_impl::<BabyBearPoseidon2Engine, VB>(
+        fri_params,
+        builder,
+        config,
+        exe,
+        input,
+        min_segments,
+        true,
+    )
+    .unwrap();
+    final_memory
 }
 
 /// Executes and proves the VM and returns the final memory state.
 /// If `debug` is true, runs the debug prover.
-pub fn air_test_impl<VC>(
-    config: VC,
-    exe: impl Into<VmExe<BabyBear>>,
-    input: impl Into<Streams<BabyBear>>,
+//
+// Same implementation as VmLocalProver, but we need to do something special to run the debug prover
+#[allow(clippy::type_complexity)]
+pub fn air_test_impl<E, VB>(
+    fri_params: FriParameters,
+    builder: VB,
+    config: VB::VmConfig,
+    exe: impl Into<VmExe<Val<E::SC>>>,
+    input: impl Into<Streams<Val<E::SC>>>,
     min_segments: usize,
     debug: bool,
-) -> Option<VmMemoryState<BabyBear>>
+) -> eyre::Result<(
+    Option<MemoryImage>,
+    Vec<VerificationDataWithFriParams<E::SC>>,
+)>
 where
-    VC: VmConfig<BabyBear>,
-    VC::Executor: Chip<BabyBearPoseidon2Config>,
-    VC::Periphery: Chip<BabyBearPoseidon2Config>,
+    E: StarkFriEngine,
+    Val<E::SC>: PrimeField32,
+    VB: VmBuilder<E>,
+    <VB::VmConfig as VmExecutionConfig<Val<E::SC>>>::Executor: Executor<Val<E::SC>>
+        + MeteredExecutor<Val<E::SC>>
+        + PreflightExecutor<Val<E::SC>, VB::RecordArena>,
+    Com<E::SC>: AsRef<[Val<E::SC>; CHUNK]> + From<[Val<E::SC>; CHUNK]>,
 {
     setup_tracing();
-    let mut log_blowup = 1;
-    while config.system().max_constraint_degree > (1 << log_blowup) + 1 {
-        log_blowup += 1;
-    }
-    let engine = BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(log_blowup));
-    let vm = VirtualMachine::new(engine, config);
-    let pk = vm.keygen();
-    let mut result = vm.execute_and_generate(exe, input).unwrap();
-    let final_memory = Option::take(&mut result.final_memory);
-    let global_airs = vm.config().create_chip_complex().unwrap().airs();
-    if debug {
-        for proof_input in &result.per_segment {
-            let (airs, pks, air_proof_inputs): (Vec<_>, Vec<_>, Vec<_>) =
-                multiunzip(proof_input.per_air.iter().map(|(air_id, air_proof_input)| {
-                    (
-                        global_airs[*air_id].clone(),
-                        pk.per_air[*air_id].clone(),
-                        air_proof_input.clone(),
-                    )
-                }));
-            vm.engine.debug(&airs, &pks, &air_proof_inputs);
-        }
-    }
-    let proofs = vm.prove(&pk, result);
+    let engine = E::new(fri_params);
+    let (mut vm, pk) = VirtualMachine::<E, VB>::new_with_keygen(engine, builder, config)?;
+    let vk = pk.get_vk();
+    let exe = exe.into();
+    let input = input.into();
+    let metered_ctx = vm.build_metered_ctx();
+    let (segments, _) = vm
+        .metered_interpreter(&exe)?
+        .execute_metered(input.clone(), metered_ctx)?;
+    let cached_program_trace = vm.commit_program_on_device(&exe.program);
+    vm.load_program(cached_program_trace);
+    let mut preflight_interpreter = vm.preflight_interpreter(&exe)?;
 
-    assert!(proofs.len() >= min_segments);
-    vm.verify(&pk.get_vk(), proofs)
-        .expect("segment proofs should verify");
-    final_memory
-}
+    let mut state = Some(vm.create_initial_state(&exe, input));
+    let mut proofs = Vec::new();
+    let mut exit_code = None;
+    for segment in segments {
+        let Segment {
+            instret_start,
+            num_insns,
+            trace_heights,
+        } = segment;
+        assert_eq!(state.as_ref().unwrap().instret, instret_start);
+        let from_state = Option::take(&mut state).unwrap();
+        vm.transport_init_memory_to_device(&from_state.memory);
+        let PreflightExecutionOutput {
+            system_records,
+            record_arenas,
+            to_state,
+        } = vm.execute_preflight(
+            &mut preflight_interpreter,
+            from_state,
+            Some(num_insns),
+            &trace_heights,
+        )?;
+        state = Some(to_state);
+        exit_code = system_records.exit_code;
 
-/// Generates the VM STARK circuit, in the form of AIRs and traces, but does not
-/// do any proving. Output is the payload of everything the prover needs.
-///
-/// The output AIRs and traces are sorted by height in descending order.
-pub fn gen_vm_program_test_proof_input<SC: StarkGenericConfig, VC>(
-    program: Program<Val<SC>>,
-    input_stream: impl Into<Streams<Val<SC>>> + Clone,
-    #[allow(unused_mut)] mut config: VC,
-) -> ProofInputForTest<SC>
-where
-    Val<SC>: PrimeField32,
-    VC: VmConfig<Val<SC>> + Clone,
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
-{
-    cfg_if::cfg_if! {
-        if #[cfg(feature = "bench-metrics")] {
-            // Run once with metrics collection enabled, which can improve runtime performance
-            config.system_mut().profiling = true;
-            {
-                let executor = VmExecutor::<Val<SC>, VC>::new(config.clone());
-                executor.execute(program.clone(), input_stream.clone()).unwrap();
-            }
-            // Run again with metrics collection disabled and measure trace generation time
-            config.system_mut().profiling = false;
-            let start = std::time::Instant::now();
+        let ctx = vm.generate_proving_ctx(system_records, record_arenas)?;
+        if debug {
+            debug_proving_ctx(&vm, &pk, &ctx);
         }
+        let proof = vm.engine.prove(vm.pk(), ctx);
+        proofs.push(proof);
     }
-
-    let airs = config.create_chip_complex().unwrap().airs();
-    let executor = VmExecutor::<Val<SC>, VC>::new(config);
-
-    let mut result = executor
-        .execute_and_generate(program, input_stream)
-        .unwrap();
-    assert_eq!(
-        result.per_segment.len(),
-        1,
-        "only proving one segment for now"
-    );
-
-    let result = result.per_segment.pop().unwrap();
-    #[cfg(feature = "bench-metrics")]
-    metrics::gauge!("execute_and_trace_gen_time_ms").set(start.elapsed().as_millis() as f64);
-    // Filter out unused AIRS (where trace is empty)
-    let (used_airs, per_air) = result
-        .per_air
+    assert!(proofs.len() >= min_segments);
+    vm.verify(&vk, &proofs)
+        .expect("segment proofs should verify");
+    let state = state.unwrap();
+    let final_memory = (exit_code == Some(ExitCode::Success as u32)).then_some(state.memory.memory);
+    let vdata = proofs
         .into_iter()
-        .map(|(air_id, x)| (airs[air_id].clone(), x))
-        .unzip();
-    ProofInputForTest {
-        airs: used_airs,
-        per_air,
-    }
-}
-
-type ExecuteAndProveResult<SC> = Result<VerificationDataWithFriParams<SC>, VerificationError>;
+        .map(|proof| VerificationDataWithFriParams {
+            data: VerificationData {
+                vk: vk.clone(),
+                proof,
+            },
+            fri_params: vm.engine.fri_params(),
+        })
+        .collect();
 
-/// Executes program and runs simple STARK prover test (keygen, prove, verify).
-pub fn execute_and_prove_program<SC: StarkGenericConfig, E: StarkFriEngine<SC>, VC>(
-    program: Program<Val<SC>>,
-    input_stream: impl Into<Streams<Val<SC>>> + Clone,
-    config: VC,
-    engine: &E,
-) -> ExecuteAndProveResult<SC>
-where
-    Val<SC>: PrimeField32,
-    VC: VmConfig<Val<SC>> + Clone,
-    VC::Executor: Chip<SC>,
-    VC::Periphery: Chip<SC>,
-{
-    let span = tracing::info_span!("execute_and_prove_program").entered();
-    let test_proof_input = gen_vm_program_test_proof_input(program, input_stream, config);
-    let vparams = test_proof_input.run_test(engine)?;
-    span.exit();
-    Ok(vparams)
+    Ok((final_memory, vdata))
 }
diff --git a/crates/vm/src/utils/test_utils.rs b/crates/vm/src/utils/test_utils.rs
index 9449aff5b8..c933d8b63e 100644
--- a/crates/vm/src/utils/test_utils.rs
+++ b/crates/vm/src/utils/test_utils.rs
@@ -1,8 +1,15 @@
 use std::array;
 
+use openvm_circuit::arch::{MemoryConfig, SystemConfig};
+use openvm_instructions::{
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    NATIVE_AS,
+};
 use openvm_stark_backend::p3_field::PrimeField32;
 use rand::{rngs::StdRng, Rng};
 
+use crate::system::memory::{merkle::public_values::PUBLIC_VALUES_AS, online::PAGE_SIZE};
+
 pub fn i32_to_f<F: PrimeField32>(val: i32) -> F {
     if val.signum() == -1 {
         -F::from_canonical_u32(val.unsigned_abs())
@@ -31,3 +38,27 @@ pub fn u32_sign_extend<const IMM_BITS: usize>(num: u32) -> u32 {
         num
     }
 }
+
+pub fn test_system_config_without_continuations() -> SystemConfig {
+    let mut addr_spaces = MemoryConfig::empty_address_space_configs(5);
+    addr_spaces[RV32_REGISTER_AS as usize].num_cells = PAGE_SIZE;
+    addr_spaces[RV32_MEMORY_AS as usize].num_cells = 1 << 22;
+    addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = PAGE_SIZE;
+    addr_spaces[NATIVE_AS as usize].num_cells = 1 << 25;
+    SystemConfig::new(3, MemoryConfig::new(2, addr_spaces, 29, 29, 17, 32), 32)
+        .without_continuations()
+}
+
+// Testing config when native address space is not needed, with continuations enabled
+pub fn test_system_config() -> SystemConfig {
+    let mut config = test_system_config_without_continuations();
+    config.memory_config.addr_spaces[NATIVE_AS as usize].num_cells = 0;
+    config.with_continuations()
+}
+
+/// Generate a random message of a given length in bytes
+pub fn get_random_message(rng: &mut StdRng, len: usize) -> Vec<u8> {
+    let mut random_message: Vec<u8> = vec![0u8; len];
+    rng.fill(&mut random_message[..]);
+    random_message
+}
diff --git a/crates/vm/tests/integration_test.rs b/crates/vm/tests/integration_test.rs
deleted file mode 100644
index 168d756111..0000000000
--- a/crates/vm/tests/integration_test.rs
+++ /dev/null
@@ -1,776 +0,0 @@
-use std::{
-    collections::{BTreeMap, VecDeque},
-    iter::zip,
-    sync::Arc,
-};
-
-use openvm_circuit::{
-    arch::{
-        hasher::{poseidon2::vm_poseidon2_hasher, Hasher},
-        ChipId, ExecutionSegment, MemoryConfig, SingleSegmentVmExecutor, SystemConfig,
-        SystemTraceHeights, VirtualMachine, VmComplexTraceHeights, VmConfig,
-        VmInventoryTraceHeights,
-    },
-    system::{
-        memory::{MemoryTraceHeights, VolatileMemoryTraceHeights, CHUNK},
-        program::trace::VmCommittedExe,
-    },
-    utils::{air_test, air_test_with_min_segments},
-};
-use openvm_instructions::{
-    exe::VmExe,
-    instruction::Instruction,
-    program::{Program, DEFAULT_PC_STEP},
-    LocalOpcode, PhantomDiscriminant,
-    PublishOpcode::PUBLISH,
-    SysPhantom,
-    SystemOpcode::*,
-};
-use openvm_native_circuit::NativeConfig;
-use openvm_native_compiler::{
-    FieldArithmeticOpcode::*, FieldExtensionOpcode::*, NativeBranchEqualOpcode, NativeJalOpcode::*,
-    NativeLoadStoreOpcode::*, NativePhantom,
-};
-use openvm_rv32im_transpiler::BranchEqualOpcode::*;
-use openvm_stark_backend::{
-    config::StarkGenericConfig, engine::StarkEngine, p3_field::FieldAlgebra,
-};
-use openvm_stark_sdk::{
-    config::{
-        baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
-        fri_params::standard_fri_params_with_100_bits_conjectured_security,
-        setup_tracing, FriParameters,
-    },
-    engine::StarkFriEngine,
-    p3_baby_bear::BabyBear,
-};
-use rand::Rng;
-use test_log::test;
-
-pub fn gen_pointer<R>(rng: &mut R, len: usize) -> usize
-where
-    R: Rng + ?Sized,
-{
-    const MAX_MEMORY: usize = 1 << 29;
-    rng.gen_range(0..MAX_MEMORY - len) / len * len
-}
-
-fn test_native_config() -> NativeConfig {
-    NativeConfig {
-        system: SystemConfig::new(3, MemoryConfig::new(2, 1, 16, 29, 15, 32, 1024), 0),
-        native: Default::default(),
-    }
-}
-
-fn test_native_continuations_config() -> NativeConfig {
-    let mut config = test_native_config();
-    config.system = config.system.with_continuations();
-    config
-}
-
-#[test]
-fn test_vm_1() {
-    let n = 6;
-    /*
-    Instruction 0 assigns word[0]_4 to n.
-    Instruction 4 terminates
-    The remainder is a loop that decrements word[0]_4 until it reaches 0, then terminates.
-    Instruction 1 checks if word[0]_4 is 0 yet, and if so sets pc to 5 in order to terminate
-    Instruction 2 decrements word[0]_4 (using word[1]_4)
-    Instruction 3 uses JAL as a simple jump to go back to instruction 1 (repeating the loop).
-     */
-    let instructions = vec![
-        // word[0]_4 <- word[n]_0
-        Instruction::large_from_isize(ADD.global_opcode(), 0, n, 0, 4, 0, 0, 0),
-        // if word[0]_4 == 0 then pc += 3 * DEFAULT_PC_STEP
-        Instruction::from_isize(
-            NativeBranchEqualOpcode(BEQ).global_opcode(),
-            0,
-            0,
-            3 * DEFAULT_PC_STEP as isize,
-            4,
-            0,
-        ),
-        // word[0]_4 <- word[0]_4 - word[1]_4
-        Instruction::large_from_isize(SUB.global_opcode(), 0, 0, 1, 4, 4, 0, 0),
-        // word[2]_4 <- pc + DEFAULT_PC_STEP, pc -= 2 * DEFAULT_PC_STEP
-        Instruction::from_isize(
-            JAL.global_opcode(),
-            2,
-            -2 * DEFAULT_PC_STEP as isize,
-            0,
-            4,
-            0,
-        ),
-        // terminate
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    air_test(test_native_config(), program);
-}
-
-#[test]
-fn test_vm_override_executor_height() {
-    let e = BabyBearPoseidon2Engine::new(FriParameters::standard_fast());
-    let program = Program::<BabyBear>::from_instructions(&[
-        Instruction::large_from_isize(ADD.global_opcode(), 0, 4, 0, 4, 0, 0, 0),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ]);
-    let committed_exe = Arc::new(VmCommittedExe::<BabyBearPoseidon2Config>::commit(
-        program.into(),
-        e.config().pcs(),
-    ));
-
-    // Test getting heights.
-    let vm_config = NativeConfig::aggregation(8, 3);
-
-    let executor = SingleSegmentVmExecutor::new(vm_config.clone());
-    let res = executor
-        .execute_and_compute_heights(committed_exe.exe.clone(), vec![])
-        .unwrap();
-    // Memory trace heights are not computed during execution.
-    assert_eq!(
-        res.vm_heights.system,
-        SystemTraceHeights {
-            memory: MemoryTraceHeights::Volatile(VolatileMemoryTraceHeights {
-                boundary: 1,
-                access_adapters: vec![0, 0, 0],
-            }),
-        }
-    );
-    assert_eq!(
-        res.vm_heights.inventory,
-        VmInventoryTraceHeights {
-            chips: vec![
-                (ChipId::Executor(0), 0),
-                (ChipId::Executor(1), 0),
-                (ChipId::Executor(2), 0),
-                (ChipId::Executor(3), 0),
-                (ChipId::Executor(4), 0),
-                (ChipId::Executor(5), 0),
-                (ChipId::Executor(6), 1), // corresponds to FieldArithmeticChip
-                (ChipId::Executor(7), 0),
-                (ChipId::Executor(8), 0),
-                (ChipId::Executor(9), 0),
-            ]
-            .into_iter()
-            .collect(),
-        }
-    );
-
-    // Test overriding heights.
-    let system_overridden_heights = SystemTraceHeights {
-        memory: MemoryTraceHeights::Volatile(VolatileMemoryTraceHeights {
-            boundary: 1,
-            access_adapters: vec![8, 4, 2],
-        }),
-    };
-    let inventory_overridden_heights = VmInventoryTraceHeights {
-        chips: vec![
-            (ChipId::Executor(0), 16),
-            (ChipId::Executor(1), 32),
-            (ChipId::Executor(2), 64),
-            (ChipId::Executor(3), 128),
-            (ChipId::Executor(4), 256),
-            (ChipId::Executor(5), 512),
-            (ChipId::Executor(6), 1024),
-            (ChipId::Executor(7), 2048),
-            (ChipId::Executor(8), 4096),
-            (ChipId::Executor(9), 8192),
-        ]
-        .into_iter()
-        .collect(),
-    };
-    let overridden_heights = VmComplexTraceHeights::new(
-        system_overridden_heights.clone(),
-        inventory_overridden_heights.clone(),
-    );
-    let executor = SingleSegmentVmExecutor::new_with_overridden_trace_heights(
-        vm_config,
-        Some(overridden_heights),
-    );
-    let proof_input = executor
-        .execute_and_generate(committed_exe, vec![])
-        .unwrap();
-    let air_heights: Vec<_> = proof_input
-        .per_air
-        .iter()
-        .map(|(_, api)| api.main_trace_height())
-        .collect();
-    // It's hard to define the mapping semantically. Please recompute the following magical AIR
-    // heights by hands whenever something changes.
-    assert_eq!(
-        air_heights,
-        vec![2, 2, 16, 1, 8, 4, 2, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 262144]
-    );
-}
-
-#[test]
-fn test_vm_1_optional_air() {
-    // Aggregation VmConfig has Core/Poseidon2/FieldArithmetic/FieldExtension chips. The program
-    // only uses Core and FieldArithmetic. All other chips should not have AIR proof inputs.
-    let config = NativeConfig::aggregation(4, 3);
-    let engine =
-        BabyBearPoseidon2Engine::new(standard_fri_params_with_100_bits_conjectured_security(3));
-    let vm = VirtualMachine::new(engine, config);
-    let pk = vm.keygen();
-    let num_airs = pk.per_air.len();
-
-    {
-        let n = 6;
-        let instructions = vec![
-            Instruction::large_from_isize(ADD.global_opcode(), 0, n, 0, 4, 0, 0, 0),
-            Instruction::large_from_isize(SUB.global_opcode(), 0, 0, 1, 4, 4, 0, 0),
-            Instruction::from_isize(
-                NativeBranchEqualOpcode(BNE).global_opcode(),
-                0,
-                0,
-                -(DEFAULT_PC_STEP as isize),
-                4,
-                0,
-            ),
-            Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-        ];
-
-        let program = Program::from_instructions(&instructions);
-        let result = vm
-            .execute_and_generate(program, vec![])
-            .expect("Failed to execute VM");
-        assert_eq!(result.per_segment.len(), 1);
-        let proof_input = result.per_segment.last().unwrap();
-        assert!(
-            proof_input.per_air.len() < num_airs,
-            "Expect less used AIRs"
-        );
-        let proofs = vm.prove(&pk, result);
-        assert_eq!(proofs.len(), 1);
-        vm.verify(&pk.get_vk(), proofs)
-            .expect("Verification failed");
-    }
-}
-
-#[test]
-fn test_vm_public_values() {
-    setup_tracing();
-    let num_public_values = 100;
-    let config = SystemConfig::default().with_public_values(num_public_values);
-    let engine =
-        BabyBearPoseidon2Engine::new(standard_fri_params_with_100_bits_conjectured_security(3));
-    let vm = VirtualMachine::new(engine, config.clone());
-    let pk = vm.keygen();
-
-    {
-        let instructions = vec![
-            Instruction::from_usize(PUBLISH.global_opcode(), [0, 12, 2, 0, 0, 0]),
-            Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-        ];
-
-        let program = Program::from_instructions(&instructions);
-        let committed_exe = Arc::new(VmCommittedExe::commit(
-            program.clone().into(),
-            vm.engine.config.pcs(),
-        ));
-        let single_vm = SingleSegmentVmExecutor::new(config);
-        let exe_result = single_vm
-            .execute_and_compute_heights(program, vec![])
-            .unwrap();
-        assert_eq!(
-            exe_result.public_values,
-            [
-                vec![None, None, Some(BabyBear::from_canonical_u32(12))],
-                vec![None; num_public_values - 3]
-            ]
-            .concat(),
-        );
-        let proof_input = single_vm
-            .execute_and_generate(committed_exe, vec![])
-            .unwrap();
-        vm.engine
-            .prove_then_verify(&pk, proof_input)
-            .expect("Verification failed");
-    }
-}
-
-#[test]
-fn test_vm_initial_memory() {
-    // Program that fails if mem[(4, 7)] != 101.
-    let program = Program::from_instructions(&[
-        Instruction::<BabyBear>::from_isize(
-            NativeBranchEqualOpcode(BEQ).global_opcode(),
-            7,
-            101,
-            2 * DEFAULT_PC_STEP as isize,
-            4,
-            0,
-        ),
-        Instruction::<BabyBear>::from_isize(
-            PHANTOM.global_opcode(),
-            0,
-            0,
-            SysPhantom::DebugPanic as isize,
-            0,
-            0,
-        ),
-        Instruction::<BabyBear>::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ]);
-
-    let init_memory: BTreeMap<_, _> = [((4, 7), BabyBear::from_canonical_u32(101))]
-        .into_iter()
-        .collect();
-
-    let config = test_native_continuations_config();
-    let exe = VmExe {
-        program,
-        pc_start: 0,
-        init_memory,
-        fn_bounds: Default::default(),
-    };
-    air_test(config, exe);
-}
-
-#[test]
-fn test_vm_1_persistent() {
-    let engine = BabyBearPoseidon2Engine::new(FriParameters::standard_fast());
-    let config = test_native_continuations_config();
-    let ptr_max_bits = config.system.memory_config.pointer_max_bits;
-    let as_height = config.system.memory_config.as_height;
-    let airs = VmConfig::<BabyBear>::create_chip_complex(&config)
-        .unwrap()
-        .airs::<BabyBearPoseidon2Config>();
-
-    let vm = VirtualMachine::new(engine, config);
-    let pk = vm.keygen();
-
-    let n = 6;
-    let instructions = vec![
-        Instruction::large_from_isize(ADD.global_opcode(), 0, n, 0, 4, 0, 0, 0),
-        Instruction::large_from_isize(SUB.global_opcode(), 0, 0, 1, 4, 4, 0, 0),
-        Instruction::from_isize(
-            NativeBranchEqualOpcode(BNE).global_opcode(),
-            0,
-            0,
-            -(DEFAULT_PC_STEP as isize),
-            4,
-            0,
-        ),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    let result = vm.execute_and_generate(program.clone(), vec![]).unwrap();
-    {
-        let proof_input = result.per_segment.into_iter().next().unwrap();
-
-        let ((_, merkle_air_proof_input), _) = zip(&proof_input.per_air, &airs)
-            .find(|(_, air)| air.name() == "MemoryMerkleAir<8>")
-            .unwrap();
-        assert_eq!(merkle_air_proof_input.raw.public_values.len(), 16);
-        assert_eq!(
-            merkle_air_proof_input.raw.public_values[..8],
-            merkle_air_proof_input.raw.public_values[8..]
-        );
-        let mut digest = [BabyBear::ZERO; CHUNK];
-        let compression = vm_poseidon2_hasher();
-        for _ in 0..ptr_max_bits + as_height - 2 {
-            digest = compression.compress(&digest, &digest);
-        }
-        assert_eq!(
-            merkle_air_proof_input.raw.public_values[..8],
-            // The value when you start with zeros and repeatedly hash the value with itself
-            // ptr_max_bits + as_height - 2 times.
-            // The height of the tree is ptr_max_bits + as_height - log2(8). The leaf also must be
-            // hashed once with padding for security.
-            digest
-        );
-    }
-
-    let result_for_proof = vm.execute_and_generate(program, vec![]).unwrap();
-    let proofs = vm.prove(&pk, result_for_proof);
-    vm.verify(&pk.get_vk(), proofs)
-        .expect("Verification failed");
-}
-
-#[test]
-fn test_vm_without_field_arithmetic() {
-    /*
-    Instruction 0 assigns word[0]_4 to 5.
-    Instruction 1 checks if word[0]_4 is *not* 4, and if so jumps to instruction 4.
-    Instruction 2 is never run.
-    Instruction 3 terminates.
-    Instruction 4 checks if word[0]_4 is 5, and if so jumps to instruction 3 to terminate.
-     */
-    let instructions = vec![
-        // word[0]_4 <- word[5]_0
-        Instruction::large_from_isize(ADD.global_opcode(), 0, 5, 0, 4, 0, 0, 0),
-        // if word[0]_4 != 4 then pc += 3 * DEFAULT_PC_STEP
-        Instruction::from_isize(
-            NativeBranchEqualOpcode(BNE).global_opcode(),
-            0,
-            4,
-            3 * DEFAULT_PC_STEP as isize,
-            4,
-            0,
-        ),
-        // word[2]_4 <- pc + DEFAULT_PC_STEP, pc -= 2 * DEFAULT_PC_STEP
-        Instruction::from_isize(
-            JAL.global_opcode(),
-            2,
-            -2 * DEFAULT_PC_STEP as isize,
-            0,
-            4,
-            0,
-        ),
-        // terminate
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-        // if word[0]_4 == 5 then pc -= 1
-        Instruction::from_isize(
-            NativeBranchEqualOpcode(BEQ).global_opcode(),
-            0,
-            5,
-            -(DEFAULT_PC_STEP as isize),
-            4,
-            0,
-        ),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    air_test(test_native_config(), program);
-}
-
-#[test]
-fn test_vm_fibonacci_old() {
-    let instructions = vec![
-        // [0]_4 <- [19]_0
-        Instruction::large_from_isize(ADD.global_opcode(), 0, 19, 0, 4, 0, 0, 0),
-        // [2]_4 <- [11]_0
-        Instruction::large_from_isize(ADD.global_opcode(), 2, 11, 0, 4, 0, 0, 0),
-        // [3]_4 <- [1]_0
-        Instruction::large_from_isize(ADD.global_opcode(), 3, 1, 0, 4, 0, 0, 0),
-        // [10]_4 <- [0]_4 + [2]_4
-        Instruction::large_from_isize(ADD.global_opcode(), 10, 0, 0, 4, 0, 0, 0),
-        // [11]_4 <- [1]_4 + [3]_4
-        Instruction::large_from_isize(ADD.global_opcode(), 11, 1, 0, 4, 0, 0, 0),
-        Instruction::from_isize(
-            NativeBranchEqualOpcode(BEQ).global_opcode(),
-            2,
-            0,
-            7 * DEFAULT_PC_STEP as isize,
-            4,
-            4,
-        ),
-        // [2]_4 <- [2]_4 + [3]_4
-        Instruction::large_from_isize(ADD.global_opcode(), 2, 2, 3, 4, 4, 4, 0),
-        // [4]_4 <- [[2]_4 - 2]_4
-        Instruction::from_isize(LOADW.global_opcode(), 4, -2, 2, 4, 4),
-        // [5]_4 <- [[2]_4 - 1]_4
-        Instruction::from_isize(LOADW.global_opcode(), 5, -1, 2, 4, 4),
-        // [6]_4 <- [4]_4 + [5]_4
-        Instruction::large_from_isize(ADD.global_opcode(), 6, 4, 5, 4, 4, 4, 0),
-        // [[2]_4]_4 <- [6]_4
-        Instruction::from_isize(STOREW.global_opcode(), 6, 0, 2, 4, 4),
-        Instruction::from_isize(
-            JAL.global_opcode(),
-            7,
-            -6 * DEFAULT_PC_STEP as isize,
-            0,
-            4,
-            0,
-        ),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    air_test(test_native_config(), program);
-}
-
-#[test]
-fn test_vm_fibonacci_old_cycle_tracker() {
-    // NOTE: Instructions commented until cycle tracker instructions are not counted as additional
-    // assembly Instructions
-    let instructions = vec![
-        Instruction::debug(PhantomDiscriminant(SysPhantom::CtStart as u16)),
-        Instruction::debug(PhantomDiscriminant(SysPhantom::CtStart as u16)),
-        // [0]_4 <- [19]_0
-        Instruction::large_from_isize(ADD.global_opcode(), 0, 19, 0, 4, 0, 0, 0),
-        // [2]_4 <- [11]_0
-        Instruction::large_from_isize(ADD.global_opcode(), 2, 11, 0, 4, 0, 0, 0),
-        // [3]_4 <- [1]_0
-        Instruction::large_from_isize(ADD.global_opcode(), 3, 1, 0, 4, 0, 0, 0),
-        // [10]_4 <- [0]_4 + [2]_4
-        Instruction::large_from_isize(ADD.global_opcode(), 10, 0, 0, 4, 0, 0, 0),
-        // [11]_4 <- [1]_4 + [3]_4
-        Instruction::large_from_isize(ADD.global_opcode(), 11, 1, 0, 4, 0, 0, 0),
-        Instruction::debug(PhantomDiscriminant(SysPhantom::CtEnd as u16)),
-        Instruction::debug(PhantomDiscriminant(SysPhantom::CtStart as u16)),
-        // if [2]_4 == [0]_4 then pc += 9 * DEFAULT_PC_STEP
-        Instruction::from_isize(
-            NativeBranchEqualOpcode(BEQ).global_opcode(),
-            2,
-            0,
-            9 * DEFAULT_PC_STEP as isize,
-            4,
-            4,
-        ),
-        // [2]_4 <- [2]_4 + [3]_4
-        Instruction::large_from_isize(ADD.global_opcode(), 2, 2, 3, 4, 4, 4, 0),
-        Instruction::debug(PhantomDiscriminant(SysPhantom::CtStart as u16)),
-        // [4]_4 <- [[2]_4 - 2]_4
-        Instruction::from_isize(LOADW.global_opcode(), 4, -2, 2, 4, 4),
-        // [5]_4 <- [[2]_4 - 1]_4
-        Instruction::from_isize(LOADW.global_opcode(), 5, -1, 2, 4, 4),
-        // [6]_4 <- [4]_4 + [5]_4
-        Instruction::large_from_isize(ADD.global_opcode(), 6, 4, 5, 4, 4, 4, 0),
-        // [[2]_4]_4 <- [6]_4
-        Instruction::from_isize(STOREW.global_opcode(), 6, 0, 2, 4, 4),
-        Instruction::debug(PhantomDiscriminant(SysPhantom::CtEnd as u16)),
-        // [a]_4 <- pc + 4, pc -= 8 * DEFAULT_PC_STEP
-        Instruction::from_isize(
-            JAL.global_opcode(),
-            7,
-            -8 * DEFAULT_PC_STEP as isize,
-            0,
-            4,
-            0,
-        ),
-        Instruction::debug(PhantomDiscriminant(SysPhantom::CtEnd as u16)),
-        Instruction::debug(PhantomDiscriminant(SysPhantom::CtEnd as u16)),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    air_test(test_native_config(), program);
-}
-
-#[test]
-fn test_vm_field_extension_arithmetic() {
-    let instructions = vec![
-        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 1, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 2, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 3, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 4, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 5, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 6, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 7, 0, 2, 4, 0, 0, 0),
-        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
-        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
-        Instruction::from_isize(FE4SUB.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(BBE4MUL.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(BBE4DIV.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    air_test(test_native_config(), program);
-}
-
-#[test]
-fn test_vm_max_access_adapter_8() {
-    let instructions = vec![
-        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 1, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 2, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 3, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 4, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 5, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 6, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 7, 0, 2, 4, 0, 0, 0),
-        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
-        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
-        Instruction::from_isize(FE4SUB.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(BBE4MUL.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(BBE4DIV.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    let mut config = test_native_config();
-    {
-        let chip_complex1 = config.create_chip_complex().unwrap();
-        let mem_ctrl1 = chip_complex1.base.memory_controller;
-        config.system.memory_config.max_access_adapter_n = 8;
-        let chip_complex2 = config.create_chip_complex().unwrap();
-        let mem_ctrl2 = chip_complex2.base.memory_controller;
-        // AccessAdapterAir with N=16/32 are disabled.
-        assert_eq!(mem_ctrl1.air_names().len(), mem_ctrl2.air_names().len() + 2);
-        assert_eq!(
-            mem_ctrl1.airs::<BabyBearPoseidon2Config>().len(),
-            mem_ctrl2.airs::<BabyBearPoseidon2Config>().len() + 2
-        );
-        assert_eq!(
-            mem_ctrl1.current_trace_heights().len(),
-            mem_ctrl2.current_trace_heights().len() + 2
-        );
-    }
-    air_test(config, program);
-}
-
-#[test]
-fn test_vm_field_extension_arithmetic_persistent() {
-    let instructions = vec![
-        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 1, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 2, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 3, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 4, 0, 2, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 5, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 6, 0, 1, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 7, 0, 2, 4, 0, 0, 0),
-        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
-        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
-        Instruction::from_isize(FE4SUB.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(BBE4MUL.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(BBE4DIV.global_opcode(), 12, 0, 4, 4, 4),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-    let config = test_native_continuations_config();
-    air_test(config, program);
-}
-
-#[test]
-fn test_vm_hint() {
-    let instructions = vec![
-        Instruction::large_from_isize(ADD.global_opcode(), 16, 0, 0, 4, 0, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 20, 16, 16777220, 4, 4, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 32, 20, 0, 4, 4, 0, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 20, 20, 1, 4, 4, 0, 0),
-        Instruction::from_isize(
-            PHANTOM.global_opcode(),
-            0,
-            0,
-            NativePhantom::HintInput as isize,
-            0,
-            0,
-        ),
-        Instruction::from_isize(HINT_STOREW.global_opcode(), 32, 0, 0, 4, 4),
-        Instruction::from_isize(LOADW.global_opcode(), 38, 0, 32, 4, 4),
-        Instruction::large_from_isize(ADD.global_opcode(), 44, 20, 0, 4, 4, 0, 0),
-        Instruction::from_isize(MUL.global_opcode(), 24, 38, 1, 4, 4),
-        Instruction::large_from_isize(ADD.global_opcode(), 20, 20, 24, 4, 4, 1, 0),
-        Instruction::large_from_isize(ADD.global_opcode(), 50, 16, 0, 4, 4, 0, 0),
-        Instruction::from_isize(
-            JAL.global_opcode(),
-            24,
-            6 * DEFAULT_PC_STEP as isize,
-            0,
-            4,
-            0,
-        ),
-        Instruction::from_isize(MUL.global_opcode(), 0, 50, 1, 4, 4),
-        Instruction::large_from_isize(ADD.global_opcode(), 0, 44, 0, 4, 4, 4, 0),
-        Instruction::from_isize(HINT_STOREW.global_opcode(), 0, 0, 0, 4, 4),
-        Instruction::large_from_isize(ADD.global_opcode(), 50, 50, 1, 4, 4, 0, 0),
-        Instruction::from_isize(
-            NativeBranchEqualOpcode(BNE).global_opcode(),
-            50,
-            38,
-            -4 * (DEFAULT_PC_STEP as isize),
-            4,
-            4,
-        ),
-        Instruction::from_isize(
-            NativeBranchEqualOpcode(BNE).global_opcode(),
-            50,
-            38,
-            -5 * (DEFAULT_PC_STEP as isize),
-            4,
-            4,
-        ),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    type F = BabyBear;
-
-    let input_stream: Vec<Vec<F>> = vec![vec![F::TWO]];
-    let config = NativeConfig::new(SystemConfig::default(), Default::default());
-    air_test_with_min_segments(config, program, input_stream, 1);
-}
-
-#[test]
-fn test_hint_load_1() {
-    type F = BabyBear;
-    let instructions = vec![
-        Instruction::phantom(
-            PhantomDiscriminant(NativePhantom::HintLoad as u16),
-            F::ZERO,
-            F::ZERO,
-            0,
-        ),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    let mut segment = ExecutionSegment::new(
-        &test_native_config(),
-        program,
-        vec![vec![F::ONE, F::TWO]].into(),
-        None,
-        vec![],
-        Default::default(),
-    );
-    segment.execute_from_pc(0).unwrap();
-    let streams = segment.chip_complex.take_streams();
-    assert!(streams.input_stream.is_empty());
-    assert_eq!(streams.hint_stream, VecDeque::from(vec![F::ZERO]));
-    assert_eq!(streams.hint_space, vec![vec![F::ONE, F::TWO]]);
-}
-
-#[test]
-fn test_hint_load_2() {
-    type F = BabyBear;
-    let instructions = vec![
-        Instruction::phantom(
-            PhantomDiscriminant(NativePhantom::HintLoad as u16),
-            F::ZERO,
-            F::ZERO,
-            0,
-        ),
-        Instruction::from_isize(HINT_STOREW.global_opcode(), 32, 0, 0, 4, 4),
-        Instruction::phantom(
-            PhantomDiscriminant(NativePhantom::HintLoad as u16),
-            F::ZERO,
-            F::ZERO,
-            0,
-        ),
-        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
-    ];
-
-    let program = Program::from_instructions(&instructions);
-
-    let mut segment = ExecutionSegment::new(
-        &test_native_config(),
-        program,
-        vec![vec![F::ONE, F::TWO], vec![F::TWO, F::ONE]].into(),
-        None,
-        vec![],
-        Default::default(),
-    );
-    segment.execute_from_pc(0).unwrap();
-    assert_eq!(
-        segment
-            .chip_complex
-            .memory_controller()
-            .unsafe_read_cell(F::from_canonical_usize(4), F::from_canonical_usize(32)),
-        F::ZERO
-    );
-    let streams = segment.chip_complex.take_streams();
-    assert!(streams.input_stream.is_empty());
-    assert_eq!(streams.hint_stream, VecDeque::from(vec![F::ONE]));
-    assert_eq!(
-        streams.hint_space,
-        vec![vec![F::ONE, F::TWO], vec![F::TWO, F::ONE]]
-    );
-}
diff --git a/docs/README.md b/docs/README.md
index 4673492376..7646398123 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -3,7 +3,6 @@
 This directory contains documentation for contributors.
 
 - [Repository and Project Structure](./repo)
-- [Design and Specification](./specs)
 - [Crates](./crates)
 
 ![](../assets/modularity.png)
diff --git a/docs/crates/benchmarks.md b/docs/crates/benchmarks.md
index 6a259cb10e..39c8bc838f 100644
--- a/docs/crates/benchmarks.md
+++ b/docs/crates/benchmarks.md
@@ -120,10 +120,10 @@ for more detailed profiling we generate special flamegraphs that visualize VM-sp
 The benchmark must be run with special configuration so that additional metrics are collected for profiling. Note that the additional metric collection will slow down the benchmark. To run a benchmark with the additional profiling, run the following command:
 
 ```bash
-OUTPUT_PATH="metrics.json" GUEST_SYMBOLS_PATH="guest.syms" cargo run --release --bin <benchmark_name> --features profiling -- --profiling
+OUTPUT_PATH="metrics.json" GUEST_SYMBOLS_PATH="guest.syms" cargo run --release --bin <benchmark_name> --features perf-metrics -- --profiling
 ```
 
-Add `--features aggregation,profiling` to run with leaf aggregation. The `profiling` feature tells the VM to run with additional metric collection. The `--profiling` CLI argument tells the script to build the guest program with `profile=profiling` so that the guest program is compiled without stripping debug symbols. When the `profiling` feature is enabled, the `GUEST_SYMBOLS_PATH` environment variable must be set to the file path where function symbols of the guest program will be exported. Those symbols are then used to annotate the flamegraph with function names.
+Add `--features aggregation,perf-metrics` to run with leaf aggregation. The `perf-metrics` feature tells the VM to run with additional metric collection. The `--profiling` CLI argument tells the script to build the guest program with `profile=profiling` so that the guest program is compiled without stripping debug symbols. When the `perf-metrics` feature is enabled, the `GUEST_SYMBOLS_PATH` environment variable must be set to the file path where function symbols of the guest program will be exported. Those symbols are then used to annotate the flamegraph with function names.
 
 After the collected metrics are written to `$OUTPUT_PATH`, these flamegraphs can be generated if you have [inferno-flamegraph](https://crates.io/crates/inferno) installed. Install via
 
@@ -181,19 +181,19 @@ For execution benchmarks, the ELF files need to be compiled before running the b
 
 ```bash
 # Build all benchmark ELFs
-cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-binaries
+cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-elfs
 
 # Build specific benchmark ELFs
-cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-binaries -- fibonacci_recursive fibonacci_iterative
+cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-elfs -- fibonacci_recursive fibonacci_iterative
 
 # Skip specific programs
-cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-binaries -- --skip keccak256 sha256
+cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-elfs -- --skip keccak256 sha256
 
 # Force rebuild even if ELFs already exist (overwrite)
-cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-binaries -- --force
+cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-elfs -- --force
 
 # Set build profile (debug or release)
-cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-binaries -- --profile debug
+cargo run --package openvm-benchmarks-utils --bin build-elfs --features build-elfs -- --profile debug
 ```
 
 ## Profiling Execution
diff --git a/docs/crates/metrics.md b/docs/crates/metrics.md
index 362bce47e0..6fe3072add 100644
--- a/docs/crates/metrics.md
+++ b/docs/crates/metrics.md
@@ -2,21 +2,25 @@
 
 We use the [`metrics`](https://docs.rs/metrics/latest/metrics/) crate to collect metrics for the STARK prover. We refer to [reth docs](https://github.com/paradigmxyz/reth/blob/main/docs/design/metrics.md) for more guidelines on how to use metrics.
 
-Metrics will only be collected if the `bench-metrics` feature is enabled.
+Metrics will only be collected if the `metrics` feature is enabled.
 We describe the metrics that are collected for a single VM circuit proof, which corresponds to a single execution segment.
 
 To scope metrics from different proofs, we use the [`metrics_tracing_context`](https://docs.rs/metrics-tracing-context/latest/metrics_tracing_context/) crate to provide context-dependent labels. With the exception of the `segment` label, all other labels must be set by the caller.
 
-For a single segment proof, the following metrics are collected:
+For a segment proof, the following metrics are collected:
 
-- `execute_time_ms` (gauge): The runtime execution time of the segment in milliseconds.
+- `execute_metered_time_ms` (gauge): The metered execution time of the segment in milliseconds. This is timed across **all** segments in the group.
+- `execute_preflight_time_ms` (gauge): The preflight execution time of the segment in milliseconds.
   - If this is a segment in a VM with continuations enabled, a `segment: segment_idx` label is added to the metric.
+  - `memory_finalize_time_ms` (gauge): The time at the end of preflight execution spent on memory finalization.
 - `trace_gen_time_ms` (gauge): The time to generate non-cached trace matrices from execution records.
   - If this is a segment in a VM with continuations enabled, a `segment: segment_idx` label is added to the metric.
 - All metrics collected by [`openvm-stark-backend`](https://github.com/openvm-org/stark-backend/blob/main/docs/metrics.md), in particular `stark_prove_excluding_trace_time_ms` (gauge).
-  - The total proving time of the proof is the sum of `execute_time_ms + trace_gen_time_ms + stark_prove_excluding_trace_time_ms`.
-- `total_cycles` (counter): The total number of cycles in the segment.
+- The `total_proof_time_ms` of the proof is instrumented directly when possible. Otherwise, it is calculated as:
+  - The sum `execute_preflight_time_ms + trace_gen_time_ms + stark_prove_excluding_trace_time_ms`. The `execute_metered_time_ms` is excluded for app proofs because it is not run on a per-segment basis.
+- `insns` (counter): The total number of instructions executed in the segment.
 - `main_cells_used` (counter): The total number of main trace cells used by all chips in the segment. This does not include cells needed to pad rows to power-of-two matrix heights. Only main trace cells, not preprocessed or permutation trace cells, are counted.
+- `total_cells_used` (counter): The total number of preprocessed, main, and permutation trace cells used by all chips in the segment. This does not include cells needed to pad rows to power-of-two matrix heights.
 
 ## Scoping
 
diff --git a/docs/crates/vm-extensions.md b/docs/crates/vm-extensions.md
index 490bac08c5..a27f56fca3 100644
--- a/docs/crates/vm-extensions.md
+++ b/docs/crates/vm-extensions.md
@@ -2,7 +2,7 @@
 
 ```rust
 pub trait VmExtension<F: PrimeField32> {
-    type Executor: InstructionExecutor<F> + AnyEnum;
+    type Executor: PreflightExecutor<F> + AnyEnum;
     type Periphery: AnyEnum;
 
     fn build(
@@ -17,7 +17,7 @@ by them. This data is collected into a `VmInventory` struct, which is returned.
 
 To handle previous chip dependencies necessary for chip construction and also automatic bus index management, we provide a `VmInventoryBuilder` api.
 
-Due to strong types, we have **two** associated trait types `Executor, Periphery`. It is expected that `Executor` is an enum of all types implementing `InstructionExecutor + Chip` that this extension will construct. It is expected that `Periphery` is an enum of all types that implement `Chip` **but are not InstructionExecutor**. In general, it is always OK for the enum to have more kinds than necessary. For easy downcasting and enum wrangling, we also have an `AnyEnum` trait, which can always be derived by a macro.
+Due to strong types, we have **two** associated trait types `Executor, Periphery`. It is expected that `Executor` is an enum of all types implementing `PreflightExecutor + Chip` that this extension will construct. It is expected that `Periphery` is an enum of all types that implement `Chip` **but are not PreflightExecutor**. In general, it is always OK for the enum to have more kinds than necessary. For easy downcasting and enum wrangling, we also have an `AnyEnum` trait, which can always be derived by a macro.
 
 ### `VmInventory<Executor, Periphery>`
 
@@ -90,7 +90,7 @@ We have trait `VmConfig`:
 
 ```rust
 pub trait VmConfig<F: PrimeField32> {
-    type Executor: InstructionExecutor<F> + AnyEnum + ChipUsageGetter;
+    type Executor: PreflightExecutor<F> + AnyEnum + ChipUsageGetter;
     type Periphery: AnyEnum + ChipUsageGetter;
 
     /// Must contain system config
diff --git a/docs/crates/vm.md b/docs/crates/vm.md
index 989e9c8a88..a814cf010b 100644
--- a/docs/crates/vm.md
+++ b/docs/crates/vm.md
@@ -1,12 +1,12 @@
 # VM Architecture and Chips
 
-### `InstructionExecutor` Trait
+### `PreflightExecutor` Trait
 
 We define an **instruction** to be an **opcode** combined with the **operands** for the opcode. Running the instrumented
 runtime for an opcode is encapsulated in the following trait:
 
 ```rust
-pub trait InstructionExecutor<F> {
+pub trait PreflightExecutor<F> {
     /// Runtime execution of the instruction, if the instruction is owned by the
     /// current instance. May internally store records of this call for later trace generation.
     fn execute(
@@ -26,14 +26,14 @@ Opcodes are partitioned into groups, each of which is handled by a single **chip
 type `C` and associated Air of type `A` which satisfy the following trait bounds:
 
 ```rust
-C: Chip<SC> + InstructionExecutor<F>
+C: Chip<SC> + PreflightExecutor<F>
 A: Air<AB> + BaseAir<F> + BaseAirWithPublicValues<F>
 ```
 
 Together, these provide the following functionalities:
 
 - **Keygen:** Performed via the `Air::<AB>::eval()` function.
-- **Trace Generation:** This is done by calling `InstructionExecutor::<F>::execute()` which computes and stores
+- **Trace Generation:** This is done by calling `PreflightExecutor::<F>::execute()` which computes and stores
   execution records and then `Chip::<SC>::generate_air_proof_input()` which generates the trace using the corresponding
   records.
 
@@ -59,6 +59,7 @@ pub trait PhantomSubExecutor<F> {
         &mut self,
         memory: &MemoryController<F>,
         streams: &mut Streams<F>,
+        rng: &mut StdRng,
         discriminant: PhantomDiscriminant,
         a: F,
         b: F,
@@ -88,7 +89,7 @@ The engine type `E` should be `openvm_stark_backend::engine::StarkEngine<SC> `an
 
 ```rust
 pub trait VmConfig<F: PrimeField32>: Clone + Serialize + DeserializeOwned {
-  type Executor: InstructionExecutor<F> + AnyEnum + ChipUsageGetter;
+  type Executor: PreflightExecutor<F> + AnyEnum + ChipUsageGetter;
   type Periphery: AnyEnum + ChipUsageGetter;
 
   /// Must contain system config
@@ -318,7 +319,7 @@ pub struct VmAirWrapper<A, C> {
 
 They implement the following traits:
 
-- `InstructionExecutor<F>` is implemented on `VmChipWrapper<F, A, C>`, where the `execute()` function:
+- `PreflightExecutor<F>` is implemented on `VmChipWrapper<F, A, C>`, where the `execute()` function:
   - calls `preprocess()` on `A` with `memory` and the raw `instruction`
   - calls `execute_instruction()` on `C` with the raw `instruction`, `from_pc`, and `reads` from `preprocess()`
   - calls `postprocess()` on `A` with the raw `instruction`, `from_state`, the `output: AdapterRuntimeContext` from `execute_instruction()`, and the `read_record`
diff --git a/book/src/custom-extensions/algebra.md b/docs/vocs/docs/pages/book/acceleration-using-extensions/algebra.mdx
similarity index 86%
rename from book/src/custom-extensions/algebra.md
rename to docs/vocs/docs/pages/book/acceleration-using-extensions/algebra.mdx
index c4bcc936b1..948de823d8 100644
--- a/book/src/custom-extensions/algebra.md
+++ b/docs/vocs/docs/pages/book/acceleration-using-extensions/algebra.mdx
@@ -1,6 +1,6 @@
 # Algebra (Modular Arithmetic)
 
-The OpenVM Algebra extension provides tools to create and manipulate modular arithmetic structures and their complex extensions. For example, if \\(p\\) is prime, OpenVM Algebra can handle modular arithmetic in \\(\mathbb{F}\_p\\)​ and its quadratic extension fields \\(\mathbb{F}\_p[x]/(x^2 + 1)\\).
+The OpenVM Algebra extension provides tools to create and manipulate modular arithmetic structures and their complex extensions. For example, if $p$ is prime, OpenVM Algebra can handle modular arithmetic in $\mathbb{F}_p$ and its quadratic extension fields $\mathbb{F}_p[x]/(x^2 + 1)$.
 
 The functional part is provided by the `openvm-algebra-guest` crate, which is a guest library that can be used in any OpenVM program. The macros for creating corresponding structs are in the `openvm-algebra-moduli-macros` and `openvm-algebra-complex-macros` crates.
 
@@ -22,7 +22,7 @@ The functional part is provided by the `openvm-algebra-guest` crate, which is a
 
 ## Modular arithmetic
 
-To [leverage](./overview.md) compile-time known moduli for performance, you declare and initialize the arithmetic structures:
+To [leverage](/book/acceleration-using-extensions/overview) compile-time known moduli for performance, you declare and initialize the arithmetic structures:
 
 1. **Declare**: Use the `moduli_declare!` macro to define a modular arithmetic struct. This can be done multiple times in various crates or modules:
 
@@ -37,7 +37,7 @@ This creates `Bls12_381Fp` and `Bn254Fp` structs, each implementing the `IntMod`
 Since both moduli are prime, both structs also implement the `Field` and `Sqrt` traits.
 The modulus parameter must be a string literal in decimal or hexadecimal format.
 
-2. **Init**: Use the [`openvm::init!` macro](./overview.md#automating-the-init-step) exactly once in the final binary:
+2. **Init**: Use the [`openvm::init!` macro](/book/acceleration-using-extensions/overview#automating-the-init-step) exactly once in the final binary:
 
 ```rust
 openvm::init!();
@@ -58,7 +58,7 @@ This step enumerates the declared moduli (e.g., `0` for the first one, `1` for t
 
 ## Complex field extension
 
-Complex extensions, such as \\(\mathbb{F}\_p[x]/(x^2 + 1)\\), are defined similarly using `complex_declare!` and `complex_init!`:
+Complex extensions, such as $\mathbb{F}_p[x]/(x^2 + 1)$, are defined similarly using `complex_declare!` and `complex_init!`:
 
 1. **Declare**:
 
@@ -70,7 +70,7 @@ complex_declare! {
 
 This creates a `Bn254Fp2` struct, representing a complex extension field. The `mod_type` must implement `IntMod`.
 
-2. **Init**: After calling `complex_declare!`, the [`openvm::init!` macro](./overview.md#automating-the-init-step) will now expand to the appropriate call to `complex_init!`.
+2. **Init**: After calling `complex_declare!`, the [`openvm::init!` macro](/book/acceleration-using-extensions/overview#automating-the-init-step) will now expand to the appropriate call to `complex_init!`.
 
 ```rust
 openvm::init!();
@@ -106,8 +106,8 @@ Also, each modulus in `[app_vm_config.fp2]` must be paired with the name of the
 
 Here is a toy example using both the modular arithmetic and complex field extension capabilities:
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/algebra/src/main.rs }}
+```rust
+// [!include ~/snippets/examples/algebra/src/main.rs]
 ```
 
 To have the correct imports for the above example, add the following to the `Cargo.toml` file:
diff --git a/book/src/custom-extensions/bigint.md b/docs/vocs/docs/pages/book/acceleration-using-extensions/big-integer.mdx
similarity index 100%
rename from book/src/custom-extensions/bigint.md
rename to docs/vocs/docs/pages/book/acceleration-using-extensions/big-integer.mdx
diff --git a/book/src/custom-extensions/ecc.md b/docs/vocs/docs/pages/book/acceleration-using-extensions/elliptic-curve-cryptography.mdx
similarity index 87%
rename from book/src/custom-extensions/ecc.md
rename to docs/vocs/docs/pages/book/acceleration-using-extensions/elliptic-curve-cryptography.mdx
index de8f31ab62..3abfed9213 100644
--- a/book/src/custom-extensions/ecc.md
+++ b/docs/vocs/docs/pages/book/acceleration-using-extensions/elliptic-curve-cryptography.mdx
@@ -2,7 +2,7 @@
 
 The OpenVM Elliptic Curve Cryptography Extension provides support for elliptic curve operations through the `openvm-ecc-guest` crate.
 
-Developers can enable arbitrary Weierstrass curves by configuring this extension with the modulus for the coordinate field and the coefficients in the curve equation. Preset configurations for the secp256k1 and secp256r1 curves are provided through the [K256](../guest-libs/k256.md) and [P256](../guest-libs/p256.md) guest libraries.
+Developers can enable arbitrary Weierstrass curves by configuring this extension with the modulus for the coordinate field and the coefficients in the curve equation. Preset configurations for the secp256k1 and secp256r1 curves are provided through the [K256](/book/guest-libraries/k256) and [P256](/book/guest-libraries/p256) guest libraries.
 
 ## Available traits and methods
 
@@ -29,7 +29,7 @@ Developers can enable arbitrary Weierstrass curves by configuring this extension
 
 ## Macros
 
-For elliptic curve cryptography, the `openvm-ecc-guest` crate provides macros similar to those in [`openvm-algebra-guest`](./algebra.md):
+For elliptic curve cryptography, the `openvm-ecc-guest` crate provides macros similar to those in [`openvm-algebra-guest`](/book/acceleration-using-extensions/algebra):
 
 1. **Declare**: Use `sw_declare!` to define elliptic curves over the previously declared moduli. For example:
 
@@ -43,7 +43,7 @@ sw_declare! {
 Each declared curve must specify the `mod_type` (implementing `IntMod`) and a constant `b` for the Weierstrass curve equation \\(y^2 = x^3 + ax + b\\). `a` is optional and defaults to 0 for short Weierstrass curves.
 This creates `Bls12_381G1Affine` and `P256Affine` structs which implement the `Group` and `WeierstrassPoint` traits. The underlying memory layout of the structs uses the memory layout of the `Bls12_381Fp` and `P256Coord` structs, respectively.
 
-2. **Init**: Called once, the [`openvm::init!` macro](./overview.md#automating-the-init-step) produces a call to `sw_init!` that enumerates these curves and allows the compiler to produce optimized instructions:
+2. **Init**: Called once, the [`openvm::init!` macro](/book/acceleration-using-extensions/overview#automating-the-init-step) produces a call to `sw_init!` that enumerates these curves and allows the compiler to produce optimized instructions:
 
 ```rust
 openvm::init!();
@@ -59,7 +59,7 @@ sw_init! {
 - `sw_declare!`: Declares elliptic curve structures.
 - `init!`: Initializes them once, linking them to the underlying moduli.
 
-To use elliptic curve operations on a struct defined with `sw_declare!`, it is expected that the struct for the curve's coordinate field was defined using `moduli_declare!`. In particular, the coordinate field needs to be initialized and set up as described in the [algebra extension](./algebra.md) chapter.
+To use elliptic curve operations on a struct defined with `sw_declare!`, it is expected that the struct for the curve's coordinate field was defined using `moduli_declare!`. In particular, the coordinate field needs to be initialized and set up as described in the [algebra extension](/book/acceleration-using-extensions/algebra) chapter.
 
 For the basic operations provided by the `WeierstrassPoint` trait, the scalar field is not needed. For the ECDSA functions in the `ecdsa` module, the scalar field must also be declared, initialized, and set up.
 
diff --git a/book/src/custom-extensions/pairing.md b/docs/vocs/docs/pages/book/acceleration-using-extensions/elliptic-curve-pairing.mdx
similarity index 50%
rename from book/src/custom-extensions/pairing.md
rename to docs/vocs/docs/pages/book/acceleration-using-extensions/elliptic-curve-pairing.mdx
index a9e84b5895..ade7fb893b 100644
--- a/book/src/custom-extensions/pairing.md
+++ b/docs/vocs/docs/pages/book/acceleration-using-extensions/elliptic-curve-pairing.mdx
@@ -1,23 +1,23 @@
 # Elliptic Curve Pairing
 
-The pairing extension enables usage of the optimal Ate pairing check on the BN254 and BLS12-381 elliptic curves. The following field extension tower for \\(\mathbb{F}\_{p^{12}}\\) is used for pairings in this crate:
+The pairing extension enables usage of the optimal Ate pairing check on the BN254 and BLS12-381 elliptic curves. The following field extension tower for $\mathbb{F}_{p^{12}}$ is used for pairings in this crate:
 
 $$
-\mathbb{F_{p^2}} = \mathbb{F_{p}}[u]/(u^2 - \beta)\\\\
-\mathbb{F_{p^6}} = \mathbb{F_{p^2}}[v]/(v^3 - \xi)\\\\
+\mathbb{F_{p^2}} = \mathbb{F_{p}}[u]/(u^2 - \beta)\\
+\mathbb{F_{p^6}} = \mathbb{F_{p^2}}[v]/(v^3 - \xi)\\
 \mathbb{F_{p^{12}}} = \mathbb{F_{p^6}}[w]/(w^2 - v)
 $$
 
 The main feature of the pairing extension is the `pairing_check` function, which asserts that a product of pairings evaluates to 1.
 For example, for the BLS12-381 curve,
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/pairing/src/main.rs:pairing_check }}
+```rust
+// [!include ~/snippets/examples/pairing/src/main.rs:pairing_check]
 ```
 
-This asserts that \\(e(p_0, q_0) e(p_1, q_1) = 1\\).
+This asserts that $e(p_0, q_0) e(p_1, q_1) = 1$.
 Naturally, this can be extended to more points by adding more elements to the arrays.
 
-The pairing extension additionally provides field operations in \\(\mathbb{F_{p^{12}}}\\) for both BN254 and BLS12-381 curves where \\(\mathbb{F}\\) is the coordinate field.
+The pairing extension additionally provides field operations in $\mathbb{F}_{p^{12}}$ for both BN254 and BLS12-381 curves where $\mathbb{F}$ is the coordinate field.
 
-See the [pairing guest library](../guest-libs/pairing.md) for usage details.
+See the [pairing guest library](/book/guest-libraries/pairing) for usage details.
diff --git a/book/src/custom-extensions/keccak.md b/docs/vocs/docs/pages/book/acceleration-using-extensions/keccak.mdx
similarity index 100%
rename from book/src/custom-extensions/keccak.md
rename to docs/vocs/docs/pages/book/acceleration-using-extensions/keccak.mdx
diff --git a/book/src/custom-extensions/overview.md b/docs/vocs/docs/pages/book/acceleration-using-extensions/overview.mdx
similarity index 67%
rename from book/src/custom-extensions/overview.md
rename to docs/vocs/docs/pages/book/acceleration-using-extensions/overview.mdx
index 2b07a73ec4..8f57cfcda6 100644
--- a/book/src/custom-extensions/overview.md
+++ b/docs/vocs/docs/pages/book/acceleration-using-extensions/overview.mdx
@@ -2,12 +2,12 @@
 
 OpenVM ships with a set of pre-built extensions maintained by the OpenVM team. Below, we highlight six of these extensions designed to accelerate common arithmetic and cryptographic operations that are notoriously expensive to execute. Some of these extensions have corresponding guest libraries which provide convenient, high-level interfaces for your guest program to interact with the extension.
 
-- [`openvm-keccak-guest`](./keccak.md) - Keccak256 hash function. See the [Keccak256 guest library](../guest-libs/keccak256.md) for usage details.
-- [`openvm-sha256-guest`](./sha256.md) - SHA-256 hash function. See the [SHA-2 guest library](../guest-libs/sha2.md) for usage details.
-- [`openvm-bigint-guest`](./bigint.md) - Big integer arithmetic for 256-bit signed and unsigned integers. See the [ruint guest library](../guest-libs/ruint.md) for using accelerated 256-bit integer ops in rust.
-- [`openvm-algebra-guest`](./algebra.md) - Modular arithmetic and complex field extensions.
-- [`openvm-ecc-guest`](./ecc.md) - Elliptic curve cryptography. See the [k256](../guest-libs/k256.md) and [p256](../guest-libs/p256.md) guest libraries for using this extension over the respective curves.
-- [`openvm-pairing-guest`](./pairing.md) - Elliptic curve optimal Ate pairings. See the [pairing guest library](../guest-libs/pairing.md) for usage details.
+- [`openvm-keccak-guest`](/book/acceleration-using-extensions/keccak) - Keccak256 hash function. See the [Keccak256 guest library](/book/guest-libraries/keccak256) for usage details.
+- [`openvm-sha256-guest`](/book/acceleration-using-extensions/sha-256) - SHA-256 hash function. See the [SHA-2 guest library](/book/guest-libraries/sha2) for usage details.
+- [`openvm-bigint-guest`](/book/acceleration-using-extensions/big-integer) - Big integer arithmetic for 256-bit signed and unsigned integers. See the [ruint guest library](/book/guest-libraries/ruint) for using accelerated 256-bit integer ops in rust.
+- [`openvm-algebra-guest`](/book/acceleration-using-extensions/algebra) - Modular arithmetic and complex field extensions.
+- [`openvm-ecc-guest`](/book/acceleration-using-extensions/elliptic-curve-cryptography) - Elliptic curve cryptography. See the [k256](/book/guest-libraries/k256) and [p256](/book/guest-libraries/p256) guest libraries for using this extension over the respective curves.
+- [`openvm-pairing-guest`](/book/acceleration-using-extensions/elliptic-curve-pairing) - Elliptic curve optimal Ate pairings. See the [pairing guest library](/book/guest-libraries/pairing) for usage details.
 
 ## Optimizing Modular Arithmetic
 
@@ -24,7 +24,7 @@ Our design for the configuration procedure above was inspired by the [EVMMAX pro
 
 The `openvm` crate provides an `init!` macro to automate the **init** step:
 1. Call `openvm::init!()` exactly once in the code of the final program binary.
-2. When [compiling the program](../writing-apps/build.md), `cargo openvm build` will read the [configuration file](#configuration) to automatically generate the correct init code and write it to `<INIT_FILE_NAME>`, which defaults to `openvm_init.rs` in the manifest directory.
+2. When [compiling the program](/book/writing-apps/compiling), `cargo openvm build` will read the [configuration file](#configuration) to automatically generate the correct init code and write it to `<INIT_FILE_NAME>`, which defaults to `openvm_init.rs` in the manifest directory.
 3. The `openvm::init!()` macro will include the `openvm_init.rs` file into the final binary to complete the init process. You can call `openvm::init!(INIT_FILE_NAME)` to include init code from a different file if needed.
 
 ## Configuration
@@ -52,10 +52,10 @@ range_tuple_checker_sizes = [256, 8192]
 range_tuple_checker_sizes = [256, 8192]
 
 [app_vm_config.modular]
-supported_moduli = ["<modulus_1>", "<modulus_2>", ...]
+supported_moduli = ["<modulus_1>", "<modulus_2>", "..."]
 
 [app_vm_config.fp2]
-supported_moduli = ["<modulus_1>", "<modulus_2>", ...]
+supported_moduli = ["<modulus_1>", "<modulus_2>", "..."]
 
 [app_vm_config.pairing]
 supported_curves = ["Bls12_381", "Bn254"]
diff --git a/book/src/custom-extensions/sha256.md b/docs/vocs/docs/pages/book/acceleration-using-extensions/sha-256.mdx
similarity index 100%
rename from book/src/custom-extensions/sha256.md
rename to docs/vocs/docs/pages/book/acceleration-using-extensions/sha-256.mdx
diff --git a/book/src/advanced-usage/new-extension.md b/docs/vocs/docs/pages/book/advanced-usage/creating-a-new-extension.mdx
similarity index 100%
rename from book/src/advanced-usage/new-extension.md
rename to docs/vocs/docs/pages/book/advanced-usage/creating-a-new-extension.mdx
diff --git a/book/src/advanced-usage/recursion.md b/docs/vocs/docs/pages/book/advanced-usage/recursive-verification.mdx
similarity index 57%
rename from book/src/advanced-usage/recursion.md
rename to docs/vocs/docs/pages/book/advanced-usage/recursive-verification.mdx
index edfdc4b046..bd06a70332 100644
--- a/book/src/advanced-usage/recursion.md
+++ b/docs/vocs/docs/pages/book/advanced-usage/recursive-verification.mdx
@@ -1,3 +1,3 @@
 # Recursive Verification
 
-OpenVM supports recursively verifying its own proofs using the [Verify STARK guest library](https://github.com/openvm-org/openvm/tree/main/guest-libs/verify_stark). See its [dedicated page](../guest-libs/verify-stark.md) to learn more.
\ No newline at end of file
+OpenVM supports recursively verifying its own proofs using the [Verify STARK guest library](https://github.com/openvm-org/openvm/tree/main/guest-libs/verify_stark). See its [dedicated page](/book/guest-libraries/verify-stark) to learn more.
\ No newline at end of file
diff --git a/book/src/advanced-usage/sdk.md b/docs/vocs/docs/pages/book/advanced-usage/sdk.mdx
similarity index 67%
rename from book/src/advanced-usage/sdk.md
rename to docs/vocs/docs/pages/book/advanced-usage/sdk.mdx
index 16a1d9c6e5..25ab02a3b6 100644
--- a/book/src/advanced-usage/sdk.md
+++ b/docs/vocs/docs/pages/book/advanced-usage/sdk.mdx
@@ -2,33 +2,39 @@
 
 While the CLI provides a convenient way to build, prove, and verify programs, you may want more fine-grained control over the process. The OpenVM Rust SDK allows you to customize various aspects of the workflow programmatically.
 
-For more information on the basic CLI flow, see [Overview of Basic Usage](../writing-apps/overview.md). Writing a guest program is the same as in the CLI.
+For more information on the basic CLI flow, see [Overview of Basic Usage](/book/writing-apps/overview). Writing a guest program is the same as in the CLI.
 
 ## Imports and Setup
 
 If you have a guest program and would like to try running the **host program** specified in the next section, you can do so by adding the following imports and setup at the top of the file. You may need to modify the imports and/or the `SomeStruct` struct to match your program.
 
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:dependencies }}
+```rust
+// [!include ~/snippets/examples_sdk/sdk_stark.rs:dependencies]
 ```
 
 ## Building and Transpiling a Program
 
 The SDK provides lower-level control over the building and transpiling process.
 
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:build }}
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:read_elf}}
+```rust
+// [!include ~/snippets/examples_sdk/sdk_stark.rs:build]
+// [!include ~/snippets/examples_sdk/sdk_stark.rs:read_elf]
 
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:transpilation }}
+// [!include ~/snippets/examples_sdk/sdk_stark.rs:transpilation]
 ```
 
 ### Using `SdkVmConfig`
 
 The `SdkVmConfig` struct allows you to specify the extensions and system configuration your VM will use. To customize your own configuration, you can use the `SdkVmConfig::builder()` method and set the extensions and system configuration you want.
 
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:vm_config }}
+```rust
+    let vm_config = SdkVmConfig::builder()
+        .system(Default::default())
+        .rv32i(Default::default())
+        .rv32m(Default::default())
+        .io(Default::default())
+        .build()
+        .optimize();
 ```
 
 > ℹ️
@@ -38,8 +44,8 @@ The `SdkVmConfig` struct allows you to specify the extensions and system configu
 
 To run your program and see the public value output, you can do the following:
 
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:execution }}
+```rust
+// [!include ~/snippets/examples_sdk/sdk_stark.rs:execution]
 ```
 
 ### Using `StdIn`
@@ -62,8 +68,8 @@ There are two types of proofs that you can generate, with the sections below con
 
 After building and transpiling a program, you can then generate a proof. To do so, you need to commit your `VmExe`, generate an `AppProvingKey`, format your input into `StdIn`, and then generate a proof.
 
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:proof_generation }}
+```rust
+// [!include ~/snippets/examples_sdk/sdk_stark.rs:proof_generation]
 ```
 
 For large guest programs, the program will be proved in multiple continuation segments and the returned `proof: ContinuationVmProof` object consists of multiple STARK proofs, one for each segment.
@@ -72,56 +78,44 @@ For large guest programs, the program will be proved in multiple continuation se
 
 After generating a proof, you can verify it. To do so, you need your verifying key (which you can get from your `AppProvingKey`) and the output of your `generate_app_proof` call.
 
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:verification }}
+```rust
+// [!include ~/snippets/examples_sdk/sdk_stark.rs:verification]
 ```
 
 ## EVM Proof
 
 ### Setup
 
-To generate an EVM proof, you'll first need to ensure that you have followed the [CLI installation steps](../getting-started/install.md). get the appropriate KZG params by running the following command.
+To generate an EVM proof, you'll first need to ensure that you have followed the [CLI installation steps](/book/getting-started/install). get the appropriate KZG params by running the following command.
 
 ```bash
-cargo openvm setup
+cargo openvm setup --evm
 ```
 
 > ⚠️ **WARNING**
 >
-> `cargo openvm setup` requires very large amounts of computation and memory (~200 GB).
+> `cargo openvm setup --evm` requires very large amounts of computation and memory (~200 GB).
 
 <details>
 <summary>Also note that there are additional dependencies for the EVM Proof flow. Click here to view.</summary>
 
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_app.rs:dependencies }}
+```rust
+// [!include ~/snippets/examples_sdk/sdk_evm.rs:dependencies]
 ```
 
 </details>
 
-### Keygen
-
-Now, you'll need to generate the app proving key for the next step.
-
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_evm.rs:keygen }}
-```
-
-> ⚠️ **WARNING**
->
-> If you have run `cargo openvm setup` and don't need a specialized aggregation configuration, consider deserializing the proving key from the file `~/.openvm/agg.pk` instead of generating it, to save computation.
-
 ### EVM Proof Generation and Verification
 
 You can now run the aggregation keygen, proof, and verification functions for the EVM proof.
 
 **Note**: you **do not** need to generate the app proof with the `generate_app_proof` function, as the EVM proof function will handle this automatically.
 
-```rust,no_run,noplayground
-{{ #include ../../../crates/sdk/examples/sdk_evm.rs:evm_verification }}
+```rust
+// [!include ~/snippets/examples_sdk/sdk_evm.rs:evm_verification]
 ```
 
 > ⚠️ **WARNING**
 > The aggregation proving key `agg_pk` above is large. Avoid cloning it if possible.
 
-Note that `DEFAULT_PARAMS_DIR` is the directory where Halo2 parameters are stored by the `cargo openvm setup` CLI command. For more information on the setup process, see the `EVM Level` section of the [verify](../writing-apps/verify.md) doc.
+Note that `DEFAULT_PARAMS_DIR` is the directory where Halo2 parameters are stored by the `cargo openvm setup --evm` CLI command. For more information on the setup process, see the `EVM Level` section of the [verify](/book/writing-apps/verifying-proofs) doc.
diff --git a/docs/vocs/docs/pages/book/getting-started/install.mdx b/docs/vocs/docs/pages/book/getting-started/install.mdx
new file mode 100644
index 0000000000..6582fac8a5
--- /dev/null
+++ b/docs/vocs/docs/pages/book/getting-started/install.mdx
@@ -0,0 +1,69 @@
+# Install
+
+To use OpenVM for generating proofs, you must install the OpenVM command line tool `cargo-openvm`
+either via Git URL or by cloning the repo and building from source manually. Prior to either
+approach, you should make sure you have installed the necessary [Prerequisites](#installation-prerequisites).
+
+## Option 1: Install Via Git URL (Recommended)
+
+Begin the installation:
+
+```bash
+cargo +1.86 install --locked --git https://github.com/openvm-org/openvm.git --tag v1.4.0-rc.4 cargo-openvm
+```
+
+This will globally install `cargo-openvm`. You can validate a successful installation with:
+
+```bash
+cargo openvm --version
+```
+
+## Option 2: Build from source
+
+To build from source, clone the repository and begin the installation.
+
+```bash
+git clone --branch v1.4.0-rc.4 --single-branch https://github.com/openvm-org/openvm.git
+cd openvm
+cargo install --locked --force --path crates/cli
+```
+
+This will globally install `cargo-openvm`. You can validate a successful installation with:
+
+```bash
+cargo openvm --version
+```
+
+## Installation Prerequisites
+
+Prior to installing `cargo-openvm`, make sure you have the following packages installed:
+
+:::code-group
+
+```bash [Ubuntu]
+# install Rust
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+. "$HOME/.cargo/env"
+
+# install nightly Rust toolchain for binary builds
+rustup install nightly-2025-02-14
+rustup component add rust-src --toolchain nightly-2025-02-14
+
+# install build tools for Ubuntu
+sudo apt update
+sudo apt install -y build-essential   # gcc, g++, make, libc headers
+```
+
+```bash [macOS]
+# install Rust
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+. "$HOME/.cargo/env"
+
+# install nightly Rust toolchain for binary builds
+rustup install nightly-2025-02-14
+rustup component add rust-src --toolchain nightly-2025-02-14
+
+# install build tools for macOS
+xcode-select --install
+brew install cmake git curl
+```
\ No newline at end of file
diff --git a/book/src/introduction.md b/docs/vocs/docs/pages/book/getting-started/introduction.mdx
similarity index 83%
rename from book/src/introduction.md
rename to docs/vocs/docs/pages/book/getting-started/introduction.mdx
index ed39cbe33a..e1192cc35f 100644
--- a/book/src/introduction.md
+++ b/docs/vocs/docs/pages/book/getting-started/introduction.mdx
@@ -26,11 +26,11 @@ OpenVM is an open-source zero-knowledge virtual machine (zkVM) framework focused
 
 The following chapters will guide you through:
 
-- [Getting started](./getting-started/install.md).
-- [Writing applications](./writing-apps/overview.md) in Rust targeting OpenVM and generating proofs.
-- [Acceleration using extensions](./custom-extensions/overview.md) to optimize your Rust programs.
-- An assortment of helpful [guest libraries](./guest-libs/keccak256.md) designed for direct use within your guest programs.
-- [How to add custom VM extensions](./advanced-usage/new-extension.md).
+- [Getting started](../getting-started/install).
+- [Writing applications](/book/writing-apps/overview) in Rust targeting OpenVM and generating proofs.
+- [Acceleration using extensions](/book/acceleration-using-extensions/overview) to optimize your Rust programs.
+- An assortment of helpful [guest libraries](/book/guest-libraries/keccak256) designed for direct use within your guest programs.
+- [How to add custom VM extensions](/book/advanced-usage/creating-a-new-extension).
 
 ## Security Status
 
diff --git a/book/src/getting-started/quickstart.md b/docs/vocs/docs/pages/book/getting-started/quickstart.mdx
similarity index 98%
rename from book/src/getting-started/quickstart.md
rename to docs/vocs/docs/pages/book/getting-started/quickstart.mdx
index 638fa7fff0..5a11eefd57 100644
--- a/book/src/getting-started/quickstart.md
+++ b/docs/vocs/docs/pages/book/getting-started/quickstart.mdx
@@ -14,7 +14,7 @@ This will generate an OpenVM-specific starter package. Notice `Cargo.toml` has t
 
 ```toml
 [dependencies]
-openvm = { git = "https://github.com/openvm-org/openvm.git", tag = "v1.3.0", features = ["std"] }
+openvm = { git = "https://github.com/openvm-org/openvm.git", tag = "v1.4.0-rc.4", features = ["std"] }
 ```
 
 Note that `std` is not enabled by default, so explicitly enabling it is required.
diff --git a/book/src/guest-libs/k256.md b/docs/vocs/docs/pages/book/guest-libraries/k256.mdx
similarity index 69%
rename from book/src/guest-libs/k256.md
rename to docs/vocs/docs/pages/book/guest-libraries/k256.mdx
index 44aa4d2743..5a36936986 100644
--- a/book/src/guest-libs/k256.md
+++ b/docs/vocs/docs/pages/book/guest-libraries/k256.mdx
@@ -1,6 +1,6 @@
 # K256
 
-The K256 guest library uses [`openvm-ecc-guest`](../custom-extensions/ecc.md) to provide elliptic curve operations over the Secp256k1 curve. It is intended as a patch for the [`k256`](https://crates.io/crates/k256) rust crate and can be swapped in for accelerated signature verification usage. Note that signing from a private key is not supported. 
+The K256 guest library uses [`openvm-ecc-guest`](/book/acceleration-using-extensions/elliptic-curve-cryptography) to provide elliptic curve operations over the Secp256k1 curve. It is intended as a patch for the [`k256`](https://crates.io/crates/k256) rust crate and can be swapped in for accelerated signature verification usage. Note that signing from a private key is not supported. 
 
 
 ## Example program
@@ -17,19 +17,19 @@ openvm-k256 = { git = "https://github.com/openvm-org/openvm.git", package = "k25
 
 The guest library provides a `Secp256k1Coord`, which represents a field element on the coordinate field of Secp256k1, and a `Secp256k1Point`, which represents an Secp256k1 elliptic curve point.
 
-The K256 guest library handles the "Declare" phase described in [Optimizing Modular Arithmetic](../custom-extensions/overview.md#optimizing-modular-arithmetic). The consuming guest program is responsible for running the "Init" phase via `openvm::init!()`.
+The K256 guest library handles the "Declare" phase described in [Optimizing Modular Arithmetic](/book/acceleration-using-extensions/overview#optimizing-modular-arithmetic). The consuming guest program is responsible for running the "Init" phase via `openvm::init!()`.
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/ecc/src/main.rs:imports }}
-{{ #include ../../../examples/ecc/src/main.rs:init }}
+```rust
+// [!include ~/snippets/examples/ecc/src/main.rs:imports]
+// [!include ~/snippets/examples/ecc/src/main.rs:init]
 ```
 
 `moduli_init!` is called for both the coordinate and scalar field because they were declared in the `k256` module, although we will not be using the scalar field below.
 
 With the above we can start doing elliptic curve operations like adding points:
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/ecc/src/main.rs:main }}
+```rust
+// [!include ~/snippets/examples/ecc/src/main.rs:main]
 ```
 
 ### Config parameters
@@ -48,7 +48,7 @@ a = "0"
 b = "7"
 ```
 
-The `supported_moduli` parameter is a list of moduli that the guest program will use. As mentioned in the [algebra extension](../custom-extensions/algebra.md) chapter, the order of moduli in `[app_vm_config.modular]` must match the order in the `moduli_init!` macro.
+The `supported_moduli` parameter is a list of moduli that the guest program will use. As mentioned in the [algebra extension](/book/acceleration-using-extensions/algebra) chapter, the order of moduli in `[app_vm_config.modular]` must match the order in the `moduli_init!` macro.
 
 The `ecc.supported_curves` parameter is a list of supported curves that the guest program will use. They must be provided in decimal format in the `.toml` file. For multiple curves create multiple `[[app_vm_config.ecc.supported_curves]]` sections. The order of curves in `[[app_vm_config.ecc.supported_curves]]` must match the order in the `sw_init!` macro.
 Also, the `struct_name` field must be the name of the elliptic curve struct created by `sw_declare!`.
diff --git a/book/src/guest-libs/keccak256.md b/docs/vocs/docs/pages/book/guest-libraries/keccak256.mdx
similarity index 85%
rename from book/src/guest-libs/keccak256.md
rename to docs/vocs/docs/pages/book/guest-libraries/keccak256.mdx
index af125f7a0e..73da02c1e9 100644
--- a/book/src/guest-libs/keccak256.md
+++ b/docs/vocs/docs/pages/book/guest-libraries/keccak256.mdx
@@ -9,9 +9,9 @@ See the full example [here](https://github.com/openvm-org/openvm/blob/main/examp
 
 ## Example
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/keccak/src/main.rs:imports }}
-{{ #include ../../../examples/keccak/src/main.rs:main }}
+```rust
+// [!include ~/snippets/examples/keccak/src/main.rs:imports]
+// [!include ~/snippets/examples/keccak/src/main.rs:main]
 ```
 
 To be able to import the `keccak256` function, add the following to your `Cargo.toml` file:
diff --git a/book/src/guest-libs/p256.md b/docs/vocs/docs/pages/book/guest-libraries/p256.mdx
similarity index 68%
rename from book/src/guest-libs/p256.md
rename to docs/vocs/docs/pages/book/guest-libraries/p256.mdx
index 2d39422cc0..5d7955ba64 100644
--- a/book/src/guest-libs/p256.md
+++ b/docs/vocs/docs/pages/book/guest-libraries/p256.mdx
@@ -1,6 +1,6 @@
 # P256
 
-The P256 guest library uses [`openvm-ecc-guest`](../custom-extensions/ecc.md) to provide elliptic curve operations over the Secp256r1 curve. It is intended as a patch for the [`p256`](https://crates.io/crates/p256) rust crate and can be swapped in for accelerated signature verification usage. Note that signing from a private key is not supported.  
+The P256 guest library uses [`openvm-ecc-guest`](/book/acceleration-using-extensions/elliptic-curve-cryptography) to provide elliptic curve operations over the Secp256r1 curve. It is intended as a patch for the [`p256`](https://crates.io/crates/p256) rust crate and can be swapped in for accelerated signature verification usage. Note that signing from a private key is not supported.  
 
 
 ### Config parameters
@@ -19,7 +19,7 @@ a = "115792089210356248762697446949407573530086143415290314195533631308867097853
 b = "41058363725152142129326129780047268409114441015993725554835256314039467401291"
 ```
 
-The `supported_moduli` parameter is a list of moduli that the guest program will use. As mentioned in the [algebra extension](../custom-extensions/algebra.md) chapter, the order of moduli in `[app_vm_config.modular]` must match the order in the `moduli_init!` macro.
+The `supported_moduli` parameter is a list of moduli that the guest program will use. As mentioned in the [algebra extension](/book/acceleration-using-extensions/algebra) chapter, the order of moduli in `[app_vm_config.modular]` must match the order in the `moduli_init!` macro.
 
 The `ecc.supported_curves` parameter is a list of supported curves that the guest program will use. They must be provided in decimal format in the `.toml` file. For multiple curves create multiple `[[app_vm_config.ecc.supported_curves]]` sections. The order of curves in `[[app_vm_config.ecc.supported_curves]]` must match the order in the `sw_init!` macro.
 Also, the `struct_name` field must be the name of the elliptic curve struct created by `sw_declare!`.
diff --git a/book/src/guest-libs/pairing.md b/docs/vocs/docs/pages/book/guest-libraries/pairing.mdx
similarity index 75%
rename from book/src/guest-libs/pairing.md
rename to docs/vocs/docs/pages/book/guest-libraries/pairing.mdx
index bb07c27f0c..bec6eaf777 100644
--- a/book/src/guest-libs/pairing.md
+++ b/docs/vocs/docs/pages/book/guest-libraries/pairing.mdx
@@ -1,22 +1,22 @@
 # Elliptic Curve Pairing
 
-We'll be working with an example using the BLS12-381 elliptic curve. This is in addition to the setup that needs to be done in the [Writing a Program](../writing-apps/write-program.md) section.
+We'll be working with an example using the BLS12-381 elliptic curve. This is in addition to the setup that needs to be done in the [Writing a Program](/book/writing-apps/writing-a-program) section.
 
 In the guest program, we will import the `PairingCheck` and `IntMod` traits, along with the BLS12-381 curve structs (**IMPORTANT:** this requires the `bls12_381` feature enabled in Cargo.toml for the `openvm-pairing` dependency), and a few other values that we will need:
 
-```rust,no_run,noplayground title="guest program"
-{{ #include ../../../examples/pairing/src/main.rs:imports }}
+```rust title="guest program"
+// [!include ~/snippets/examples/pairing/src/main.rs:imports]
 ```
 
-Additionally, we'll need to initialize our moduli and `Fp2` struct via the following macros. For a more in-depth description of these macros, please see the [OpenVM Algebra](./algebra.md) section.
+Additionally, we'll need to initialize our moduli and `Fp2` struct via the following macros. For a more in-depth description of these macros, please see the [OpenVM Algebra](/book/acceleration-using-extensions/algebra) section.
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/pairing/src/main.rs:init }}
+```rust
+// [!include ~/snippets/examples/pairing/src/main.rs:init]
 ```
 
 ## Input values
 
-The inputs to the pairing check are `AffinePoint`s in \\(\mathbb{F}\_p\\) and \\(\mathbb{F}\_{p^2}\\). They can be constructed via the `AffinePoint::new` function, with the inner `Fp` and `Fp2` values constructed via various `from_...` functions.
+The inputs to the pairing check are `AffinePoint`s in $\mathbb{F}_p$ and $\mathbb{F}_{p^2}$. They can be constructed via the `AffinePoint::new` function, with the inner `Fp` and `Fp2` values constructed via various `from_...` functions.
 
 We can create a new struct to hold these `AffinePoint`s for the purpose of this guide. You may instead put them into a custom struct to serve your use case.
 
@@ -34,8 +34,8 @@ pub struct PairingCheckInput {
 
 Most users that use the pairing extension will want to assert that a pairing is valid (the final exponentiation equals one). With the `PairingCheck` trait imported from the previous section, we have access to the `pairing_check` function on the `Bls12_381` struct. After reading in the input struct, we can use its values in the `pairing_check`:
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/pairing/src/main.rs:pairing_check }}
+```rust
+// [!include ~/snippets/examples/pairing/src/main.rs:pairing_check]
 ```
 
 ## Additional functionality
@@ -88,11 +88,6 @@ RUST_MIN_STACK=8388608 cargo openvm keygen
 
 This [example code](https://github.com/openvm-org/openvm/blob/main/examples/pairing/src/main.rs) contains hardcoded values and no inputs as an example that can be run via the CLI.
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/pairing/src/main.rs:pre }}
-{{ #include ../../../examples/pairing/src/main.rs:imports }}
-
-{{ #include ../../../examples/pairing/src/main.rs:init }}
-
-{{ #include ../../../examples/pairing/src/main.rs:main }}
+```rust
+// [!include ~/snippets/examples/pairing/src/main.rs]
 ```
diff --git a/book/src/guest-libs/ruint.md b/docs/vocs/docs/pages/book/guest-libraries/ruint.mdx
similarity index 81%
rename from book/src/guest-libs/ruint.md
rename to docs/vocs/docs/pages/book/guest-libraries/ruint.mdx
index 9ffd28041e..2886af3360 100644
--- a/book/src/guest-libs/ruint.md
+++ b/docs/vocs/docs/pages/book/guest-libraries/ruint.mdx
@@ -1,13 +1,13 @@
 # Ruint 
 
-The Ruint guest library is a fork of [ruint](https://github.com/recmo/uint) that allows for patching of U256 operations with logic from [openvm-bigint-guest](../custom-extensions/bigint.md).
+The Ruint guest library is a fork of [ruint](https://github.com/recmo/uint) that allows for patching of U256 operations with logic from [openvm-bigint-guest](/book/acceleration-using-extensions/big-integer).
 
 ## Example matrix multiplication using `U256`
 
 See the full example [here](https://github.com/openvm-org/openvm/blob/main/examples/u256/src/main.rs).
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/u256/src/main.rs }}
+```rust
+// [!include ~/snippets/examples/u256/src/main.rs]
 ```
 
 To be able to import the `U256` struct, add the following to your `Cargo.toml` file:
@@ -20,8 +20,8 @@ openvm-ruint = { git = "https://github.com/openvm-org/openvm.git", package = "ru
 
 See the full example [here](https://github.com/openvm-org/openvm/blob/main/examples/i256/src/main.rs).
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/i256/src/main.rs }}
+```rust
+// [!include ~/snippets/examples/i256/src/main.rs]
 ```
 
 To be able to import the `I256` struct, add the following to your `Cargo.toml` file:
diff --git a/book/src/guest-libs/sha2.md b/docs/vocs/docs/pages/book/guest-libraries/sha2.mdx
similarity index 88%
rename from book/src/guest-libs/sha2.md
rename to docs/vocs/docs/pages/book/guest-libraries/sha2.mdx
index cd35cf2e02..b3db64a1f4 100644
--- a/book/src/guest-libs/sha2.md
+++ b/docs/vocs/docs/pages/book/guest-libraries/sha2.mdx
@@ -17,9 +17,9 @@ See the full example [here](https://github.com/openvm-org/openvm/blob/main/examp
 
 ### Example
 
-```rust,no_run,noplayground
-{{ #include ../../../examples/sha256/src/main.rs:imports }}
-{{ #include ../../../examples/sha256/src/main.rs:main }}
+```rust
+// [!include ~/snippets/examples/sha256/src/main.rs:imports]
+// [!include ~/snippets/examples/sha256/src/main.rs:main]
 ```
 
 To be able to import the `sha256` function, add the following to your `Cargo.toml` file:
diff --git a/book/src/guest-libs/verify-stark.md b/docs/vocs/docs/pages/book/guest-libraries/verify-stark.mdx
similarity index 100%
rename from book/src/guest-libs/verify-stark.md
rename to docs/vocs/docs/pages/book/guest-libraries/verify-stark.mdx
diff --git a/book/src/writing-apps/build.md b/docs/vocs/docs/pages/book/writing-apps/compiling.mdx
similarity index 97%
rename from book/src/writing-apps/build.md
rename to docs/vocs/docs/pages/book/writing-apps/compiling.mdx
index 4f0d207598..50bdb54de3 100644
--- a/book/src/writing-apps/build.md
+++ b/docs/vocs/docs/pages/book/writing-apps/compiling.mdx
@@ -25,7 +25,7 @@ OpenVM-specific artifacts will be placed in `${target_dir}/openvm/`, but if `--o
 
 - `--config <CONFIG>`
 
-  **Description**: Path to the OpenVM config `.toml` file that specifies the VM extensions. By default will search the manifest directory for `openvm.toml`. If no file is found, OpenVM will use a default configuration. Currently the CLI only supports known extensions listed in the [Using Existing Extensions](../custom-extensions/overview.md) section. To use other extensions, use the [SDK](../advanced-usage/sdk.md).
+  **Description**: Path to the OpenVM config `.toml` file that specifies the VM extensions. By default will search the manifest directory for `openvm.toml`. If no file is found, OpenVM will use a default configuration. Currently the CLI only supports known extensions listed in the [Using Existing Extensions](/book/acceleration-using-extensions/overview) section. To use other extensions, use the [SDK](/book/advanced-usage/sdk).
 
 - `--output_dir <OUTPUT_DIR>`
 
diff --git a/book/src/writing-apps/prove.md b/docs/vocs/docs/pages/book/writing-apps/generating-proofs.mdx
similarity index 88%
rename from book/src/writing-apps/prove.md
rename to docs/vocs/docs/pages/book/writing-apps/generating-proofs.mdx
index 1a4b3cc1e6..8eaf797e77 100644
--- a/book/src/writing-apps/prove.md
+++ b/docs/vocs/docs/pages/book/writing-apps/generating-proofs.mdx
@@ -24,7 +24,7 @@ The proving and verification key will be written to `${target_dir}/openvm/` (and
 
 ## Proof Generation
 
-The `prove` CLI command, at its core, uses the options below. `prove` gets access to all of the options that `run` has (see [Running a Program](../writing-apps/run.md) for more information).
+The `prove` CLI command, at its core, uses the options below. `prove` gets access to all of the options that `run` has (see [Running a Program](/book/writing-apps/running-a-program) for more information).
 
 ```bash
 cargo openvm prove [app | stark | evm]
@@ -45,9 +45,9 @@ If `--proof` is not provided then the command will write the proof to `./${bin_n
 The `app` subcommand generates an application-level proof, the `stark` command generates an aggregated root-level proof, while the `evm` command generates an end-to-end EVM proof. For more information on aggregation, see [this specification](https://github.com/openvm-org/openvm/blob/bf8df90b13f4e80bb76dbb71f255a12154c84838/docs/specs/continuations.md).
 
 > ⚠️ **WARNING**
-> In order to run the `evm` subcommand, you must have previously called the costly `cargo openvm setup`, which requires very large amounts of computation and memory (~200 GB).
+> In order to run the `evm` subcommand, you must have previously called the costly `cargo openvm setup --evm`, which requires very large amounts of computation and memory (~200 GB).
 
-See [EVM Proof Format](./verify.md#evm-proof-json-format) for details on the output format for `cargo openvm prove evm`.
+See [EVM Proof Format](./verifying-proofs#evm-proof-json-format) for details on the output format for `cargo openvm prove evm`.
 
 ## Commit Hashes
 
diff --git a/book/src/writing-apps/overview.md b/docs/vocs/docs/pages/book/writing-apps/overview.mdx
similarity index 87%
rename from book/src/writing-apps/overview.md
rename to docs/vocs/docs/pages/book/writing-apps/overview.mdx
index 9603ddef75..1ad9b420fb 100644
--- a/book/src/writing-apps/overview.md
+++ b/docs/vocs/docs/pages/book/writing-apps/overview.mdx
@@ -2,7 +2,7 @@
 
 ## Writing a Program
 
-The first step to using OpenVM is to write a Rust program that can be executed by an OpenVM virtual machine. Writing a program for OpenVM is very similar to writing a standard Rust program, with a few key differences necessary to support the OpenVM environment. For more detailed information about writing programs, see the [Writing Programs](./write-program.md) guide.
+The first step to using OpenVM is to write a Rust program that can be executed by an OpenVM virtual machine. Writing a program for OpenVM is very similar to writing a standard Rust program, with a few key differences necessary to support the OpenVM environment. For more detailed information about writing programs, see the [Writing Programs](./writing-a-program) guide.
 
 ## Building and Transpiling a Program
 
@@ -22,7 +22,7 @@ cargo openvm run --input <path_to_input | hex_string>
 
 Note if your program doesn't require inputs, you can omit the `--input` flag.
 
-For more information see the [build](./build.md) and [run](./run.md) docs.
+For more information see the [build](./compiling) and [run](./running-a-program) docs.
 
 ### Inputs
 
@@ -32,7 +32,7 @@ Each hex string (either in the file or as the direct input) is either:
 - Hex string of bytes, which is prefixed with `0x01`
 - Hex string of native field elements (represented as u32, little endian), prefixed with `0x02`
 
-If you are providing input for a struct of type `T` that will be deserialized by the `openvm::io::read()` function, then the corresponding hex string should be prefixed by `0x01` followed by the serialization of `T` into bytes according to `openvm::serde::to_vec`. The serialization will serialize primitive types (e.g., `u8, u16, u32, u64`) into little-endian bytes. All serialized bytes are zero-padded to a multiple of `4` byte length. For more details on how to serialize complex types into a VM-readable format, see the **Using StdIn** section of the [SDK](../advanced-usage/sdk.md#using-stdin) doc.
+If you are providing input for a struct of type `T` that will be deserialized by the `openvm::io::read()` function, then the corresponding hex string should be prefixed by `0x01` followed by the serialization of `T` into bytes according to `openvm::serde::to_vec`. The serialization will serialize primitive types (e.g., `u8, u16, u32, u64`) into little-endian bytes. All serialized bytes are zero-padded to a multiple of `4` byte length. For more details on how to serialize complex types into a VM-readable format, see the **Using StdIn** section of the [SDK](/book/advanced-usage/sdk#using-stdin) doc.
 
 ## Generating a Proof
 
@@ -42,7 +42,7 @@ To generate a proof, you first need to generate a proving and verifying key:
 cargo openvm keygen
 ```
 
-If you are using custom VM extensions, this will depend on the `openvm.toml` file which encodes the VM extension configuration; see the [custom extensions](../custom-extensions/overview.md) docs for more information about `openvm.toml`. After generating the keys, you can generate a proof by running:
+If you are using custom VM extensions, this will depend on the `openvm.toml` file which encodes the VM extension configuration; see the [custom extensions](/book/acceleration-using-extensions/overview) docs for more information about `openvm.toml`. After generating the keys, you can generate a proof by running:
 
 ```bash
 cargo openvm prove app --input <path_to_input | hex_string>
@@ -50,7 +50,7 @@ cargo openvm prove app --input <path_to_input | hex_string>
 
 Again, if your program doesn't require inputs, you can omit the `--input` flag.
 
-For more information on the `keygen` and `prove` commands, see the [prove](./prove.md) doc.
+For more information on the `keygen` and `prove` commands, see the [prove](./generating-proofs) doc.
 
 ## Verifying a Proof
 
@@ -60,7 +60,7 @@ To verify a proof using the CLI, you need to provide the verifying key and the p
 cargo openvm verify app
 ```
 
-For more information on the `verify` command, see the [verify](./verify.md) doc.
+For more information on the `verify` command, see the [verify](./verifying-proofs) doc.
 
 ## End-to-end EVM Proof Generation and Verification
 
@@ -69,7 +69,7 @@ The process above details the workflow necessary to build, prove, and verify a g
 To do (a), you need to run the following command. If you've run it previously on your machine, there is no need to do so again. This will write files necessary for EVM proving in `~/.openvm/`.
 
 ```bash
-cargo openvm setup
+cargo openvm setup --evm
 ```
 
 > ⚠️ **WARNING**
diff --git a/book/src/writing-apps/run.md b/docs/vocs/docs/pages/book/writing-apps/running-a-program.mdx
similarity index 92%
rename from book/src/writing-apps/run.md
rename to docs/vocs/docs/pages/book/writing-apps/running-a-program.mdx
index 66a8dbf439..26f3363998 100644
--- a/book/src/writing-apps/run.md
+++ b/docs/vocs/docs/pages/book/writing-apps/running-a-program.mdx
@@ -15,7 +15,7 @@ If your program doesn't require inputs, you can (and should) omit the `--input`
 
 ## Run Flags
 
-Many of the options for `cargo openvm run` will be passed to `cargo openvm build` if `--exe` is not specified. For more information on `build` (or `run`'s **Feature Selection**, **Compilation**, **Output**, **Display**, and/or **Manifest** options) see [Compiling](./writing-apps/build.md).
+Many of the options for `cargo openvm run` will be passed to `cargo openvm build` if `--exe` is not specified. For more information on `build` (or `run`'s **Feature Selection**, **Compilation**, **Output**, **Display**, and/or **Manifest** options) see [Compiling](./compiling).
 
 ### OpenVM Options
 
@@ -25,7 +25,7 @@ Many of the options for `cargo openvm run` will be passed to `cargo openvm build
 
 - `--config <CONFIG>`
 
-  **Description**: Path to the OpenVM config `.toml` file that specifies the VM extensions. By default will search the manifest directory for `openvm.toml`. If no file is found, OpenVM will use a default configuration. Currently the CLI only supports known extensions listed in the [Using Existing Extensions](../custom-extensions/overview.md) section. To use other extensions, use the [SDK](../advanced-usage/sdk.md).
+  **Description**: Path to the OpenVM config `.toml` file that specifies the VM extensions. By default will search the manifest directory for `openvm.toml`. If no file is found, OpenVM will use a default configuration. Currently the CLI only supports known extensions listed in the [Using Existing Extensions](/book/acceleration-using-extensions/overview) section. To use other extensions, use the [SDK](/book/advanced-usage/sdk).
 
 - `--output_dir <OUTPUT_DIR>`
 
diff --git a/book/src/writing-apps/solidity.md b/docs/vocs/docs/pages/book/writing-apps/solidity-sdk.mdx
similarity index 87%
rename from book/src/writing-apps/solidity.md
rename to docs/vocs/docs/pages/book/writing-apps/solidity-sdk.mdx
index 61d132c2f0..44a8444476 100644
--- a/book/src/writing-apps/solidity.md
+++ b/docs/vocs/docs/pages/book/writing-apps/solidity-sdk.mdx
@@ -1,8 +1,8 @@
 # Solidity SDK
 
-As a supplement to OpenVM, we provide a [Solidity SDK](https://github.com/openvm-org/openvm-solidity-sdk) containing OpenVM verifier contracts generated at official release commits using the `cargo openvm setup` [command](../advanced-usage/sdk.md#setup). The contracts are built at every _minor_ release as OpenVM guarantees verifier backward compatibility across patch releases.
+As a supplement to OpenVM, we provide a [Solidity SDK](https://github.com/openvm-org/openvm-solidity-sdk) containing OpenVM verifier contracts generated at official release commits using the `cargo openvm setup --evm` [command](/book/advanced-usage/sdk#setup). The contracts are built at every _minor_ release as OpenVM guarantees verifier backward compatibility across patch releases.
 
-Note that these builds are for the default aggregation VM config which should be sufficient for most users. If you use a custom config, you will need to manually generate the verifier contract using the [OpenVM SDK](../advanced-usage/sdk.md).
+Note that these builds are for the default aggregation VM config which should be sufficient for most users. If you use a custom config, you will need to manually generate the verifier contract using the [OpenVM SDK](/book/advanced-usage/sdk).
 
 ## Installation
 
@@ -37,7 +37,7 @@ contract MyContract {
 }
 ```
 
-The arguments to the `verify` function are the fields in the [EVM Proof JSON Format](./verify.md#evm-proof-json-format).
+The arguments to the `verify` function are the fields in the [EVM Proof JSON Format](./verifying-proofs#evm-proof-json-format).
 Since the builds use the default aggregation VM config, the number of public values is fixed to 32.
 
 If you want to import the verifier contract into your own repository for testing purposes, note that it is locked to Solidity version `0.8.19`. If your project uses a different version, the import may not compile. As a workaround, you can compile the contract separately and use `vm.etch()` to inject the raw bytecode into your tests.
diff --git a/book/src/writing-apps/verify.md b/docs/vocs/docs/pages/book/writing-apps/verifying-proofs.mdx
similarity index 96%
rename from book/src/writing-apps/verify.md
rename to docs/vocs/docs/pages/book/writing-apps/verifying-proofs.mdx
index 8a45f31ebd..c22d088c36 100644
--- a/book/src/writing-apps/verify.md
+++ b/docs/vocs/docs/pages/book/writing-apps/verifying-proofs.mdx
@@ -38,10 +38,10 @@ svm use 0.8.19
 The workflow for generating an end-to-end EVM proof requires first generating an aggregation proving key and EVM verifier contract. This can be done by running the following command:
 
 ```bash
-cargo openvm setup
+cargo openvm setup --evm
 ```
 
-Note that `cargo openvm setup` may attempt to download other files (i.e. KZG parameters) from an AWS S3 bucket into `~/.openvm/`.
+Note that `cargo openvm setup --evm` may attempt to download other files (i.e. KZG parameters) from an AWS S3 bucket into `~/.openvm/`.
 
 This command can take ~20mins on a `m6a.16xlarge` instance due to the keygen time.
 
diff --git a/book/src/writing-apps/write-program.md b/docs/vocs/docs/pages/book/writing-apps/writing-a-program.mdx
similarity index 98%
rename from book/src/writing-apps/write-program.md
rename to docs/vocs/docs/pages/book/writing-apps/writing-a-program.mdx
index 42685cfaf9..42fc848df8 100644
--- a/book/src/writing-apps/write-program.md
+++ b/docs/vocs/docs/pages/book/writing-apps/writing-a-program.mdx
@@ -122,7 +122,7 @@ This tells Rust to use the custom `main` handler when the environment is `no_std
 
 ## Building and running
 
-See the [overview](./overview.md) on how to build and run the program.
+See the [overview](./overview) on how to build and run the program.
 
 ## Using crates that depend on `getrandom`
 
diff --git a/docs/vocs/docs/pages/index.mdx b/docs/vocs/docs/pages/index.mdx
new file mode 100644
index 0000000000..d1f63141c6
--- /dev/null
+++ b/docs/vocs/docs/pages/index.mdx
@@ -0,0 +1,43 @@
+---
+content:
+    width: 100%
+layout: landing
+showLogo: false
+title: OpenVM
+description: A performant and modular zkVM framework built for customization and extensibility
+---
+
+import { HomePage } from "vocs/components";
+
+<HomePage.Root>
+  <HomePage.Logo />
+  <HomePage.Tagline>A performant and modular zkVM framework built for customization and extensibility.</HomePage.Tagline>
+    <div className="flex flex-col justify-between space-y-4 w-full max-w-[1000px]">
+    <div style={{textAlign: 'left', display: 'block'}}>
+    :::code-group
+
+    ```bash [Build with OpenVM]
+    # install `cargo openvm`
+    cargo +1.86 install --locked --git https://github.com/openvm-org/openvm.git --tag v1.4.0-rc.4 cargo-openvm
+
+    # create a starter OpenVM project
+    cargo openvm init fibonacci
+    ```
+    ```bash [Customize OpenVM]
+    // .. snip ..
+    let vm_config = SdkVmConfig::builder()
+        .system(Default::default())
+        .rv32i(Default::default())
+        .rv32m(Default::default())
+        .io(Default::default())
+        .build();
+    ```
+    :::
+    </div>
+    </div>
+  <HomePage.Description>OpenVM is an open-source zero-knowledge virtual machine (zkVM) framework focused on modularity at every level of the stack. OpenVM is designed for customization and extensibility without sacrificing performance or maintainability.</HomePage.Description>
+  <HomePage.Buttons>
+    <HomePage.Button href="/book/getting-started/install" variant="accent">Get started</HomePage.Button>
+    <HomePage.Button href="https://github.com/openvm-org/openvm">GitHub</HomePage.Button>
+  </HomePage.Buttons>
+</HomePage.Root>
\ No newline at end of file
diff --git a/docs/specs/circuit.md b/docs/vocs/docs/pages/specs/architecture/circuit-architecture.mdx
similarity index 89%
rename from docs/specs/circuit.md
rename to docs/vocs/docs/pages/specs/architecture/circuit-architecture.mdx
index 4238c7c27b..f2144e3eb5 100644
--- a/docs/specs/circuit.md
+++ b/docs/vocs/docs/pages/specs/architecture/circuit-architecture.mdx
@@ -5,8 +5,9 @@ randomness between AIR matrices to enable permutation arguments such as log-up.
 
 In the following, we will refer to a circuit as a collection of chips that communicate with one another over buses using a LogUp permutation argument. We refer to messages sent to such a bus as [interactions](https://github.com/openvm-org/stark-backend/blob/main/docs/interactions.md). Every _chip_ is an entity responsible for a certain operation (or set of operations), and it has an AIR to check the correctness of its execution.
 
-> [!NOTE]
-> A bus itself doesn't have any logic. It is just an index, and all related functionality is purely on the backend side.
+:::note
+A bus itself doesn't have any logic. It is just an index, and all related functionality is purely on the backend side.
+:::
 
 Usually we have _bridges_, which are basically APIs for buses. Using a bridge is preferred over communicating with a bus directly since bridges may enforce some special type of communication (for example, sending messages in pairs or communicating with different buses at once, thus synchronizing these communications).
 
@@ -24,7 +25,7 @@ The following must exist in any VM circuit:
 - **Program chip** and **program bus**. The program chip's trace matrix simply consists of the program code, one instruction per row, as a cached trace. A cached trace is used so that the commitment to the program code is the proof system trace commitment. Every time an instruction executor (to be defined later) executes an instruction, it sends this instruction, along with the `pc`, to the program bus via the program chip. The program chip keeps track of all accesses to later balance out the interactions.
 - **Connector chip**. If we only had the above interactions with the execution bus, then the initial execution state would have only been sent and the final one would have only been received. The connector chip publishes the initial and final states and balances this out (in particular, its trace is a matrix with two rows -- because it has a preprocessed trace).
 - **Phantom chip**. We call an instruction _phantom_ if it doesn't mutate execution state (and, of course, the state of the memory). Phantom instructions are sent to the phantom chip.
-- A set of memory-related chips and a bus (different depending on the persistence type -- see [Memory](./memory.md)),
+- A set of memory-related chips and a bus (different depending on the persistence type -- see [Memory](/specs/architecture/memory)),
 - **Execution bus**. Every time an instruction executor executes an instruction, it sends the execution state before the instruction to the execution bus (with multiplicity $1$) and receives the execution state after the instruction from it. It has a convenient **execution bridge** that provides functions which do these two interactions at once.
 
 Notably, there is no CPU chip where the full transcript of executed instructions is materialized in a single trace matrix. The transcript of memory accesses is also not materialized in a single trace matrix.
@@ -39,9 +40,9 @@ When the program is being run, in the simple scenario, the following happens at
   - This instruction executor returns the new execution state (and maybe reports that the execution is finished). The timestamp and program counter change accordingly.
 
 There are limitations to how many interactions/trace rows/etc. we can have in total; see [soundness criteria](https://github.com/openvm-org/stark-backend/blob/main/docs/Soundness_of_Interactions_via_LogUp.pdf). If executing the full program would lead us to overflowing these limits, the program needs to be executed in several segments. Then the process is slightly different:
-- After executing an instruction, we may decide (based on `SegmentationStrategy`, which looks at the traces) to _segment_ our execution at this point. In this case, the execution will be split into several segments.
+- After executing an instruction, we may decide to _segment_ our execution at this point. In this case, the execution will be split into several segments.
 - The timestamp resets on segmentation.
-- Each segment is going to be proven separately. Of course, this means that adjacent segments need to agree on some things (mainly memory state). See [Continuations](./continuations.md) for full details.
+- Each segment is going to be proven separately. Of course, this means that adjacent segments need to agree on some things (mainly memory state). See [Continuations](/specs/architecture/continuations) for full details.
 
 ## Instruction executors
 
@@ -54,13 +55,15 @@ Note that technically these are parts of the same chip and therefore generate on
 As already mentioned, it is not required that an instruction executor must be implemented in two parts:
 the entire chip with the combined functionality of the adapter and core can be implemented all at once.
 
-> [!IMPORTANT]
-> It is a responsibility of the instruction executor (more specifically, the adapter) to update the execution state. It is also its responsibility to constrain that the timestamp increases. If any of these is not done correctly, the proof of correctness will fail to be generated.
+:::warning
+It is a responsibility of the instruction executor (more specifically, the adapter) to update the execution state. It is also its responsibility to constrain that the timestamp increases. If any of these is not done correctly, the proof of correctness will fail to be generated.
+:::
 
-To prevent [timestamp overflow](./memory.md#time-goes-forward), we **require** that each instruction executor chip must satisfy the following condition:
+To prevent [timestamp overflow](/specs/architecture/memory#time-goes-forward), we **require** that each instruction executor chip must satisfy the following condition:
 
-> [!IMPORTANT]
-> In the AIR, the amount that the timestamp is constrained to increase during execution of a single instruction is at most `num_interactions * num_rows_per_execution`.
+:::warning
+In the AIR, the amount that the timestamp is constrained to increase during execution of a single instruction is at most `num_interactions * num_rows_per_execution`.
+:::
 
 Here `num_interactions` is the number of interactions in the AIR: this is a static property of the AIR.
 The number of AIR interactions does not depend on the number of trace rows, and it doesn't depend on whether messages are actually sent or not. In general, the trace for the execution of a single instruction can
@@ -71,7 +74,7 @@ We check that all VM chips contained in the OpenVM system and the standard VM ex
 
 ## What to take into account when adding a new chip
 
-- [Ensure memory consistency](./memory.md#what-to-take-into-account-when-adding-a-new-chip)
+- [Ensure memory consistency](/specs/architecture/memory#what-to-take-into-account-when-adding-a-new-chip)
 - Do not forget to constrain that `is_valid` is boolean in your core AIR.
 - If your chip generates some number of trace rows, and this number is not a power of two, the trace is padded with all-zero rows. It should correspond to a legitimate operation, most likely `is_valid = 0` though. For example, if your AIR asserts that the value in the first column is 1 less than the value in the second column, you cannot just write `builder.assert_eq(local.x + 1, local.y)`, because this is not the case for the padding rows.
 
diff --git a/docs/specs/continuations.md b/docs/vocs/docs/pages/specs/architecture/continuations.mdx
similarity index 88%
rename from docs/specs/continuations.md
rename to docs/vocs/docs/pages/specs/architecture/continuations.mdx
index 87d83e5f19..edc8a80085 100644
--- a/docs/specs/continuations.md
+++ b/docs/vocs/docs/pages/specs/architecture/continuations.mdx
@@ -8,16 +8,16 @@ VM), which operates without continuations enabled.
 The aggregation program takes a variable number of consecutive segment proofs and consolidates them into a single proof
 that captures the entire range of segments.
 
-![Aggregation example](../../assets/agg.png)
+![Aggregation example](/agg.png)
 
 The following figure shows that the shape of the aggregation tree is not fixed.
 
-![Another aggregation example](../../assets/agg-2.png)
+![Another aggregation example](/agg-2.png)
 
 In some cases, a minimal SNARK proof is not needed. We can use the following aggregation architecture to generate a
 STARK proof which proves the whole execution.
 
-![STARK aggregation example](../../assets/agg-stark.png)
+![STARK aggregation example](/agg-stark.png)
 
 We will now give an overview of the steps of the overall aggregation, starting from the final smart contract verifier
 and going down to the application proof.
@@ -238,7 +238,7 @@ memory argument.
 
 The overall runtime execution of a program is broken into **segments** (the logic of when to segment can be custom and
 depend on many factors). Each segment is proven in a separate STARK VM circuit as described
-in [Circuit Architecture](./circuit.md). The public values of the circuit must contain the pre- and post-state commitments
+in [Circuit Architecture](/specs/architecture/circuit-architecture). The public values of the circuit must contain the pre- and post-state commitments
 to the segment. The state consists of the active program counter and the full state of memory. (Recall in our
 architecture that registers are part of memory, so register state is included in memory state).
 
@@ -270,12 +270,12 @@ multiple accesses.
 
 Persistent memory requires three chips: the `PersistentBoundaryChip`, the `MemoryMerkleChip`, and a chip to assist in
 hashing, which is by default the `Poseidon2Chip`. To simplify the discussion, define constants `C` equal to the number
-of field elements in a hash value, `L` where the addresses in an address space are $0..2^L$, `M` and `AS_OFFSET` where
-the address spaces are `AS_OFFSET..AS_OFFSET + 2^M`, and `H = M + L - log2(C)`. `H` is the height of the Merkle tree in
-the sense that the leaves are at distance `H` from the root. We define the following interactions:
+of field elements in a hash value, `L` where the addresses in an address space are $0..2^L$, `M` and `ADDR_SPACE_OFFSET`
+where the address spaces are `ADDR_SPACE_OFFSET..ADDR_SPACE_OFFSET + 2^M`, and `H = M + L - log2(C)`. `H` is the height
+of the Merkle tree in the sense that the leaves are at distance `H` from the root. We define the following interactions:
 
-On the <span style="color:green">MERKLE_BUS</span>, we have interactions of the form
-<span style="color:green">**(expand_direction: {-1, 0, 1}, height: F, labels: (F, F), hash: [F; C])**</span>, where
+On the `MERKLE_BUS`, we have interactions of the form
+`(expand_direction: {-1, 0, 1}, height: F, labels: (F, F), hash: [F; C])`, where
 
 - **expand_direction** represents whether **hash** is the initial (1) or final (-1) hash value of the node represented
   by **node_label**. If zero, the row is a dummy row.
@@ -298,19 +298,19 @@ Each (IO part of a) row in the `MemoryMerkleChip` trace contains the fields
 **(height, parent_labels, parent_hash, left_child_labels, left_hash, right_child_labels, right_hash)**
 and has the following interactions:
 
-- Send <span style="color:green">**(expand_direction, height + 1, parent_labels, parent_hash)**</span>
-  on <span style="color:green">MERKLE_BUS</span> with multiplicity `expand_direction`
-- Receive <span style="color:green">**(expand_direction, height, left_child_labels, left_hash)**</span>
-  on <span style="color:green">MERKLE_BUS</span> with multiplicity `expand_direction`
-- Receive <span style="color:green">**(expand_direction, height, right_child_labels, right_hash)**</span>
-  on <span style="color:green">MERKLE_BUS</span> with multiplicity `expand_direction`
+- Send `(expand_direction, height + 1, parent_labels, parent_hash)`
+  on `MERKLE_BUS` with multiplicity `expand_direction`
+- Receive `(expand_direction, height, left_child_labels, left_hash)`
+  on `MERKLE_BUS` with multiplicity `expand_direction`
+- Receive `(expand_direction, height, right_child_labels, right_hash)`
+  on `MERKLE_BUS` with multiplicity `expand_direction`
 
 The `PersistentBoundaryChip` has rows of the form
 `(expand_direction, address_space, leaf_label, values, hash, timestamp)`
-and has the following interactions on the <span style="color:green">MERKLE_BUS</span>:
+and has the following interactions on the `MERKLE_BUS`:
 
-- Send <span style="color:green">**(1, 0, (as - AS_OFFSET) \* 2^L, node\*label, hash_initial)**</span>
-- Receive <span style="color:green">**(-1, 0, (as - AS_OFFSET) \* 2^L, node_label, hash_final)**</span>
+- Send `(1, 0, (as - ADDR_SPACE_OFFSET) \* 2^L, node\*label, hash_initial)`
+- Receive `(-1, 0, (as - ADDR_SPACE_OFFSET) \* 2^L, node_label, hash_final)`
 
 It receives `values` from the `MEMORY_BUS` and constrains `hash = compress(values, 0)` via the `POSEIDON2_DIRECT_BUS`.
 The aggregation program takes a variable number of consecutive segment proofs and consolidates them into a single proof
diff --git a/docs/specs/memory.md b/docs/vocs/docs/pages/specs/architecture/memory.mdx
similarity index 92%
rename from docs/specs/memory.md
rename to docs/vocs/docs/pages/specs/architecture/memory.mdx
index 3d8ea1dc1d..d1281f4568 100644
--- a/docs/specs/memory.md
+++ b/docs/vocs/docs/pages/specs/architecture/memory.mdx
@@ -163,17 +163,18 @@ Both boundary chips perform, for every subsegment ever existed in our nice set,
 
 The following invariants **must** be maintained by the memory architecture:
 1. In the MEMORY_BUS, the `timestamp` is always in range `[0, 2^timestamp_max_bits)` where `timestamp_max_bits <= F::bits() - 2` is a configuration constant.
-2. In the MEMORY_BUS, the `address_space` is always in range `[0, 1 + 2^as_height)` where `as_height` is a configuration constant satisfying `as_height < F::bits() - 2`. (Our current implementation only supports `as_height` less than the max bits supported by the VariableRangeCheckerBus).
+2. In the MEMORY_BUS, the `address_space` is always in range `[0, 1 + 2^addr_space_height)` where `addr_space_height` is a configuration constant satisfying `addr_space_height < F::bits() - 2`. (Our current implementation only supports `addr_space_height` less than the max bits supported by the VariableRangeCheckerBus).
 3. In the MEMORY_BUS, the `pointer` is always in range `[0, 2^pointer_max_bits)` where `pointer_max_bits <= F::bits() - 2` is a configuration constant.
 
-Invariant 1 is guaranteed by [time goes forward](#time-goes-forward) under the [assumption](./circuit.md#instruction-executors) that the timestamp increase during instruction execution is bounded by the number of AIR interactions.
+Invariant 1 is guaranteed by [time goes forward](#time-goes-forward) under the [assumption](/specs/architecture/circuit-architecture#instruction-executors) that the timestamp increase during instruction execution is bounded by the number of AIR interactions.
 
 Invariant 2 and 3 are guaranteed at timestamp `0` in the MEMORY_BUS by the boundary chips:
 - VolatileBoundaryChip constrains the range checks outright.
 - PersistentBoundaryChip populates the MEMORY_BUS at timestamp `0` from the initial memory state, which is committed to in the public value `initial_memory_root`. PersistentBoundaryChip upholds Invariants 2 and 3 **assuming** that the initial memory state only contains addresses in the required range. This assumption needs to be checked outside the scope of the circuit.
 
-> [!IMPORTANT]
-> At all later timestamps, it is the responsibility of each chip to ensure their memory accesses maintain Invariants 2 and 3.
+:::warning
+At all later timestamps, it is the responsibility of each chip to ensure their memory accesses maintain Invariants 2 and 3.
+:::
 
 We note an observation that may be useful in soundness analysis of instruction executor chips: if the `MemoryBridge` is used to add the interactions necessary for a write operation, then under the assumptions that time goes forward and that all memory accesses at previous timestamps are in valid range, any attempt to write to an address out of range will lead to an unbalanced MEMORY_BUS because it will require a send at an earlier timestamp that was also out of bounds.
 
@@ -182,8 +183,8 @@ We note an observation that may be useful in soundness analysis of instruction e
 Assume that the MEMORY_BUS interactions and the constraints mentioned above are satisfied.
 
 ### Time goes forward
-
-In the connector chip, we constrain that the final timestamp is less than $`2^\text{timestamp\_max\_bits}`$ and that the start timestamp is equal to `1`. It is [guaranteed](https://github.com/openvm-org/stark-backend/blob/main/docs/interactions.md) that the total number of interaction messages is less than $p$. In our current circuit set, all chips increase timestamp [less than they do interactions](./circuit.md#inspection-of-vm-chip-timestamp-increments), which guarantees that the final timestamp cannot overflow: its actual (not mod $p$) value is less than $`2^\text{timestamp\_max\_bits}`$. Given that, our check that `timestamp - prev_timestamp - 1 < 2^timestamp_max_bits` guarantees that `prev_timestamp < timestamp` everywhere we check it.
+  
+In the connector chip, we constrain that the final timestamp is less than $`2^\text{timestamp\_max\_bits}`$ and that the start timestamp is equal to `1`. It is [guaranteed](https://github.com/openvm-org/stark-backend/blob/main/docs/interactions.md) that the total number of interaction messages is less than $p$. In our current circuit set, all chips increase timestamp [less than they do interactions](/specs/architecture/circuit-architecture#inspection-of-vm-chip-timestamp-increments), which guarantees that the final timestamp cannot overflow: its actual (not mod $p$) value is less than $`2^\text{timestamp\_max\_bits}`$. Given that, our check that `timestamp - prev_timestamp - 1 < 2^timestamp_max_bits` guarantees that `prev_timestamp < timestamp` everywhere we check it.
 
 ### Memory consistency
 
@@ -197,8 +198,9 @@ To prove memory consistency, we need to show that, if we only consider basic int
 
 For every row that gets handled by a memory bridge, draw a directed edge from the previous timestamp to the new timestamp with capacity $1$. We know that in the obtained network, where the source and the sink are defined by the boundary interactions, all edges go left to right, and the maximal flow is $1$ (because there is only one edge from the source, therefore the minimal cut is $1$). Since all edges go left to right, there are no circulation in this network. Hence, all edges represent exactly one path from the source to the sink. Therefore, for every vertex (which is a timestamp), the value for the edge going to this vertex (the last receive interaction) equals the value for the edge going from this vertex (the considered send instruction).
 
-> [!NOTE]
-> Technically, it is possible to add artificial rows in the access adapter AIRs that do nothing: for example, the one corresponding to splitting a segment `[7, 11)` into two and the one merging it back, all at the same timestamp and with the same data completely unrelated to the actual execution. However, this cannot be abused by memory accesses: informally, because this kind of happens at the same time and nobody gets to read these values, and formally, because even if a flow has a circulation, it still can be decomposed into paths and a circulation, and we only consider the path in the argument above.
+:::info
+Technically, it is possible to add artificial rows in the access adapter AIRs that do nothing: for example, the one corresponding to splitting a segment `[7, 11)` into two and the one merging it back, all at the same timestamp and with the same data completely unrelated to the actual execution. However, this cannot be abused by memory accesses: informally, because this kind of happens at the same time and nobody gets to read these values, and formally, because even if a flow has a circulation, it still can be decomposed into paths and a circulation, and we only consider the path in the argument above.
+:::
 
 ## Volatile and persistent memory
 
@@ -236,7 +238,7 @@ self.addr_lt_air
 ### Persistent Memory: `PersistentBoundaryChip`
 
 - **Purpose:**
-  When operating in persistent memory mode, the final state of memory must be verifiable. The `PersistentBoundaryChip` commits not only to the final memory state but also to the initial state provided by the program. These commitments become part of the public values used later in proof aggregation, see [Continuations](./continuations.md).
+  When operating in persistent memory mode, the final state of memory must be verifiable. The `PersistentBoundaryChip` commits not only to the final memory state but also to the initial state provided by the program. These commitments become part of the public values used later in proof aggregation, see [Continuations](/specs/architecture/continuations).
 
 - **Key Points:**
   - **Commitments:**
diff --git a/docs/specs/ISA.md b/docs/vocs/docs/pages/specs/openvm/isa.mdx
similarity index 97%
rename from docs/specs/ISA.md
rename to docs/vocs/docs/pages/specs/openvm/isa.mdx
index 717a8ced02..14b71fa05c 100644
--- a/docs/specs/ISA.md
+++ b/docs/vocs/docs/pages/specs/openvm/isa.mdx
@@ -19,7 +19,7 @@ This specification describes the overall architecture and default VM extensions
 
 In addition to these default extensions, developers are able to extend the ISA by defining their own custom VM
 extensions. For reader convenience, we provide a mapping between the code-level representation of opcodes in OpenVM and
-the opcodes below [here](./isa-table.md).
+the opcodes below [here](/specs/reference/instruction-reference).
 
 ## Constants and Configuration Parameters
 
@@ -295,7 +295,7 @@ always advance the program counter by `DEFAULT_PC_STEP`.
 ### RV32IM Extension
 
 The RV32IM extension introduces OpenVM opcodes which support 32-bit RISC-V via transpilation from a standard RV32IM ELF
-binary, specified [here](./RISCV.md). These consist of opcodes corresponding 1-1 with RV32IM opcodes, as well as
+binary, specified [here](/specs/reference/riscv-custom-code). These consist of opcodes corresponding 1-1 with RV32IM opcodes, as well as
 additional user IO opcodes and phantom sub-instructions to support input and debug printing on the host. We denote the
 OpenVM opcode corresponding to a RV32IM opcode by appending `_RV32`.
 
@@ -306,7 +306,7 @@ instructions preserve this constraint.
 The `i`th RISC-V register is represented by the block `[4 * i:4]_1` of 4 limbs in address space `1`. All memory
 addresses in address space `1` behave uniformly, and in particular writes to the block `[0:4]_1` corresponding to the
 RISC-V register `x0` are allowed in the RV32IM extension. However, as detailed
-in [RV32IM Transpilation](./transpiler.md#rv32im-transpilation), any OpenVM program transpiled from a RV32IM ELF will
+in [RV32IM Transpilation](/specs/reference/transpiler#rv32im-transpilation), any OpenVM program transpiled from a RV32IM ELF will
 never contain such a write and conforms to the RV32IM specification.
 
 #### ALU
@@ -439,7 +439,7 @@ The RV32IM extension defines the following phantom sub-instructions.
 |-------------------| ------------ | -------- |----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 | Rv32HintInput     | 0x20         | `_`      | Pops a vector `hint` of field elements from the input stream and resets the hint stream to equal the vector `[(hint.len() as u32).to_le_bytes()), hint].concat()`.                                                                                             |
 | Rv32PrintStr      | 0x21         | `a,b,_`  | Peeks at `[r32{0}(a)..r32{0}(a) + r32{0}(b)]_2`, tries to convert to byte array and then UTF-8 string and prints to host stdout. Prints error message if conversion fails. Does not change any VM state.                                                       |
-| Rv32HintRandom    | 0x22         | `a,_,_`  | Resets the hint stream to `4 * r32{0}(a)` random bytes. The source of randomness is the host operating system (`rand::rngs::OsRng`). Its result is not constrained in any way.                                                                                 |
+| Rv32HintRandom    | 0x22         | `a,_,_`  | Resets the hint stream to `4 * r32{0}(a)` random bytes. The source of randomness is deterministic using a fixed-seed RNG (`rand::rngs::StdRng`). Its result is not constrained in any way.                                                                                 |
 | Rv32HintLoadByKey | 0x23         | `a,b,_`  | Look up the value by key `[r32{0}{a}:r32{0}{b}]_2` and prepend the value into `input_stream`. The logical value is `Vec<Vec<F>>`. The serialization of `Vec` follows the format `[length, <content>]`. Both length and content encoded as little-endian bytes. |
 ### Native Extension
 
@@ -460,7 +460,7 @@ reads but not allowed for writes. When using immediates, we interpret `[a]_0` as
 | STOREW       | `a,b,c,4,4` | Set `[[c]_4 + b]_4 = [a]_4`.                                                                                                                                                                                                                                                               |
 | LOADW4       | `a,b,c,4,4` | Set `[a:4]_4 = [[c]_4 + b:4]_4`.                                                                                                                                                                                                                                                           |
 | STOREW4      | `a,b,c,4,4` | Set `[[c]_4 + b:4]_4 = [a:4]_4`.                                                                                                                                                                                                                                                           |
-| JAL          | `a,b,_,4`   | Jump to address and link: set `[a]_4 = (pc + DEFAULT_PC_STEP)` and `pc = pc + b`.                                                                                                                                                                                                          |
+| JAL          | `a,b,_,4`   | Jump to address and link: set `[a]_4 = (pc + DEFAU````LT````_PC_STEP)` and `pc = pc + b`.                                                                                                                                                                                                          |
 | RANGE_CHECK  | `a,b,c,4`   | Assert that `[a]_4 = x + y * 2^16` for some `x < 2^b` and `y < 2^c`. `b` must be in [0,16] and `c` must be in [0, 14].                                                                                                                                                                     |
 | BEQ          | `a,b,c,d,e` | If `[a]_d == [b]_e`, then set `pc = pc + c`.                                                                                                                                                                                                                                               |
 | BNE          | `a,b,c,d,e` | If `[a]_d != [b]_e`, then set `pc = pc + c`.                                                                                                                                                                                                                                               |
@@ -523,7 +523,7 @@ We have the following special opcodes tailored to optimize FRI proof verificatio
 
 | Name                                                                                                                                                                                                                     | Operands        | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
 | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
-| VERIFY_BATCH `[CHUNK, PID]` <br/><br/> Here `CHUNK` and `PID` are **constants** that determine different opcodes. `PID` is an internal identifier for particular Poseidon2 constants dependent on the field (see below). | `a,b,c,d,e,f,g` | Further described [here](../../extensions/native/circuit/src/poseidon2/README.md). Due to already having a large number of operands, the address space is fixed to be `AS::Native = 4`. Computes `mmcs::verify_batch`. In the native address space, `[a], [b], [e], [f]` should be the array start pointers for the dimensions array, the opened values array (which contains more arrays) and the commitment (which is an array of length `CHUNK`). `[c]` should be the length of the opened values array (and so should be equal to the length of the dimensions array as well). `[d]` should be the hint id of proofs. `g` should be the reciprocal of the size (in field elements) of the values contained in the opened values array: if the opened values array contains field elements, `g` should be 1; if the opened values array contains extension field elements, `g` should be 1/4. |
+| VERIFY_BATCH `[CHUNK, PID]` <br/><br/> Here `CHUNK` and `PID` are **constants** that determine different opcodes. `PID` is an internal identifier for particular Poseidon2 constants dependent on the field (see below). | `a,b,c,d,e,f,g` | Further described [here](https://github.com/openvm-org/openvm/blob/main/extensions/native/circuit/src/poseidon2/README.md). Due to already having a large number of operands, the address space is fixed to be `AS::Native = 4`. Computes `mmcs::verify_batch`. In the native address space, `[a], [b], [e], [f]` should be the array start pointers for the dimensions array, the opened values array (which contains more arrays) and the commitment (which is an array of length `CHUNK`). `[c]` should be the length of the opened values array (and so should be equal to the length of the dimensions array as well). `[d]` should be the hint id of proofs. `g` should be the reciprocal of the size (in field elements) of the values contained in the opened values array: if the opened values array contains field elements, `g` should be 1; if the opened values array contains extension field elements, `g` should be 1/4. |
 | FRI_REDUCED_OPENING                                                                                                                                                                                                      | `a,b,c,d,e,f,g` | Let `a_ptr = [a]_4`, `b_ptr = [b]_4`, `length = [c]_4`, `alpha = [d:EXT_DEG]_4`, `hint_id = [f]_4`, `is_init = [g]_4`. `a_ptr` is the address of Felt array `a_arr` and `b_ptr` is the address of Ext array `b_arr`. Compute `sum((b_arr[i] - a_arr[i]) * alpha ^ i)` for `i=0..length` and write the value into `[e:EXT_DEG]_4`. It is required that `is_init` is boolean. If `is_init == 0`, read content of `a_arr` from the hint space at index `hint_id` and write into `a_arr`. Otherwise, read `a_arr` from memory.  This instruction removes elements from `hint_space[hint_id]` as they are read. |
 
 #### Phantom Sub-Instructions
diff --git a/docs/specs/README.md b/docs/vocs/docs/pages/specs/openvm/overview.mdx
similarity index 69%
rename from docs/specs/README.md
rename to docs/vocs/docs/pages/specs/openvm/overview.mdx
index 6e199ef37e..ae85882d12 100644
--- a/docs/specs/README.md
+++ b/docs/vocs/docs/pages/specs/openvm/overview.mdx
@@ -2,17 +2,17 @@
 
 OpenVM is a modular framework to co-design and build a custom zkVM, ISA, and supporting programming language frontend simultaneously.
 
-- The [Circuit Architecture](./circuit.md) explains the zkVM circuit architecture, which focuses on maximizing modularity and composability. The architecture supports adding arbitrary chips to handle custom instructions using a **VM extension** framework.
+- The [Circuit Architecture](/specs/architecture/circuit-architecture) explains the zkVM circuit architecture, which focuses on maximizing modularity and composability. The architecture supports adding arbitrary chips to handle custom instructions using a **VM extension** framework.
   - There are a few required system chips: Program, Public Values, Connector, Range Checker, Memory (which is handled with several chips), and Poseidon2. These chips are required in any VM instantiation, and all other functionality is handled by circuits from VM extensions.
-- The [Instruction Set Architecture](./ISA.md) specifies the ISA framework and lists the currently supported instructions in different VM extensions. The ISA is specialized for zkVMs and provides additional flexibility over traditional machine architectures:
+- The [Instruction Set Architecture](/specs/openvm/isa) specifies the ISA framework and lists the currently supported instructions in different VM extensions. The ISA is specialized for zkVMs and provides additional flexibility over traditional machine architectures:
   - Support for multiple machine architectures interoperating over multiple memory address spaces. These address spaces also allow support for both register and stack based architectures.
   - Variable word size, which allows flexible support for different register sizes and also higher bandwidth memory accesses.
 - Programming language support is provided using Rust as the language frontend. Support for Rust relies on compilation to a 32-bit RISC-V ELF binary which is then transpiled to OpenVM assembly. VM extensions can specify additional instructions which are either (1) **intrinsics**, which can read from and write to RISC-V registers and memory in address spaces 1 and 2 or (2) **kernels**, which can operate over arbitrary address spaces, including address spaces 1 and 2.
-  - Intrinsics are supported in the Rust frontend by inserting custom RISC-V directives to be passed through LLVM into the RISC-V ELF. The [RISC-V custom instructions](./RISCV.md) specification explains the custom instruction format in the RISC-V ELF for each default VM extension.
-  - Each VM extension with intrinsics specifies an extensible [transpiler](./transpiler.md) component to convert its instructions in the RISC-V ELF into OpenVM assembly. The transpiler comes with support for RV32IM and the set of default extensions, and it is extensible for new VM extensions.
+  - Intrinsics are supported in the Rust frontend by inserting custom RISC-V directives to be passed through LLVM into the RISC-V ELF. The [RISC-V custom instructions](/specs/reference/riscv-custom-code) specification explains the custom instruction format in the RISC-V ELF for each default VM extension.
+  - Each VM extension with intrinsics specifies an extensible [transpiler](/specs/reference/transpiler) component to convert its instructions in the RISC-V ELF into OpenVM assembly. The transpiler comes with support for RV32IM and the set of default extensions, and it is extensible for new VM extensions.
   - VM extensions with kernels compile directly to OpenVM assembly and may be used outside of the Rust frontend, or called from within Rust via procedural macros. At present, a standalone Rust eDSL is supported for the native VM extension.
 - We provide a general recursion library written in a standalone Rust eDSL which compiles to the native VM extension for OpenVM. The library supports inner aggregation of arbitrary STARK proofs, outer aggregation using Halo2-based SNARKs, and on-chain EVM verification of the aggregated SNARK proofs.
-- All VMs within our framework support parallel proving of programs with unbounded cycle count using continuations and proof aggregation. Our [continuations design](./continuations.md) maximizes proving parallelism and does not rely on interactive communication between continuation segments.
+- All VMs within our framework support parallel proving of programs with unbounded cycle count using continuations and proof aggregation. Our [continuations design](/specs/architecture/continuations) maximizes proving parallelism and does not rely on interactive communication between continuation segments.
 
 ## VM Extensions
 
@@ -26,7 +26,7 @@ A new extension of the overall architecture consists of three components:
 
 These three components should be organized into three separate crates. When introducing a new extension with name `$name`, we recommend naming the crates as follows:
 
-- `openvm-$name-guest`: the guest library crate. This crate specifies the custom RISC-V instructions to be added. To avoid opcode collisions, we keep a list of currently supported custom instructions in [this](./RISCV.md) file.
+- `openvm-$name-guest`: the guest library crate. This crate specifies the custom RISC-V instructions to be added. To avoid opcode collisions, we keep a list of currently supported custom instructions in [this](/specs/reference/riscv-custom-code) file.
 - `openvm-$name-transpiler`: the transpiler extension crate. This crate needs to import `openvm-$name-guest` to get the custom RISC-V instruction definitions. The `openvm-$name-transpiler` crate specifies the new OpenVM instruction definitions (represented in field elements) as well as the transpiler extension.
 - `openvm-$name-circuit`: the circuit extension crate that defines new chips. This crate needs to import `openvm-$name-transpiler` to get the new OpenVM instruction definitions.
 
@@ -34,10 +34,10 @@ These three components should be organized into three separate crates. When intr
 
 More details about OpenVM are provided in the specific pages below.
 
-- [Circuit Architecture](./circuit.md)
-- [Instruction Set Architecture](./ISA.md)
-- [Code-Level Instruction Mapping](./isa-table.md)
-- [RISC-V custom instructions](./RISCV.md)
-- [Transpiler from RISC-V ELF to OpenVM assembly](./transpiler.md)
-- [Continuations](./continuations.md)
-- [Memory Architecture](./memory.md)
\ No newline at end of file
+- [Circuit Architecture](/specs/architecture/circuit-architecture)
+- [Instruction Set Architecture](/specs/openvm/isa)
+- [Code-Level Instruction Mapping](/specs/reference/instruction-reference)
+- [RISC-V custom instructions](/specs/reference/riscv-custom-code)
+- [Transpiler from RISC-V ELF to OpenVM assembly](/specs/reference/transpiler)
+- [Continuations](/specs/architecture/continuations)
+- [Memory Architecture](/specs/architecture/memory)
\ No newline at end of file
diff --git a/docs/specs/isa-table.md b/docs/vocs/docs/pages/specs/reference/instruction-reference.mdx
similarity index 99%
rename from docs/specs/isa-table.md
rename to docs/vocs/docs/pages/specs/reference/instruction-reference.mdx
index 7b7f374065..fa44faebb0 100644
--- a/docs/specs/isa-table.md
+++ b/docs/vocs/docs/pages/specs/reference/instruction-reference.mdx
@@ -1,7 +1,7 @@
 # OpenVM Instruction Mapping
 
 In this document, we provide a mapping between the representation of instructions
-in the OpenVM codebase and the instructions in the [ISA specification](../specs/ISA.md).
+in the OpenVM codebase and the instructions in the [ISA specification](/specs/openvm/isa).
 
 - Instructions in OpenVM implement the `LocalOpcode` trait. Different groups of `LocalOpcode`s from different VM extensions may be combined to form a set of instructions for a customized VM using several extensions.
 - The PHANTOM instruction may be extended in each VM extension by adding new sub-instructions with different `PhantomDiscriminant` values.
diff --git a/docs/specs/RISCV.md b/docs/vocs/docs/pages/specs/reference/riscv-custom-code.mdx
similarity index 98%
rename from docs/specs/RISCV.md
rename to docs/vocs/docs/pages/specs/reference/riscv-custom-code.mdx
index 82ad9c1445..ab64c893e6 100644
--- a/docs/specs/RISCV.md
+++ b/docs/vocs/docs/pages/specs/reference/riscv-custom-code.mdx
@@ -1,6 +1,6 @@
 # RISC-V Custom Code for VM Extensions
 
-VM extensions in OpenVM are supported in the Rust frontend by inserting custom RISC-V machine code to be passed through LLVM into the RISC-V ELF using a standard 32-bit RISC-V encoding. This document specifies the custom machine code used for the default set of VM extensions. This custom code will be transpiled to OpenVM assembly using the modular transpiler specified [here](./transpiler.md).
+VM extensions in OpenVM are supported in the Rust frontend by inserting custom RISC-V machine code to be passed through LLVM into the RISC-V ELF using a standard 32-bit RISC-V encoding. This document specifies the custom machine code used for the default set of VM extensions. This custom code will be transpiled to OpenVM assembly using the modular transpiler specified [here](/specs/reference/transpiler).
 The default VM extensions that support transpilation are:
 
 - [RV32IM](#rv32im-extension): An extension supporting the 32-bit RISC-V ISA with multiplication.
@@ -16,7 +16,7 @@ The default VM extensions that support transpilation are:
 We divide the types of custom RISC-V machine code associated with VM extensions into two categories:
 
 - **Intrinsic Instruction:** the custom machine code is a single custom RISC-V instruction, compliant with the RISC-V specification.
-- **Kernel Code:** the custom machine code is a 32-bit aligned binary sequence with bit length a multiple of 32. The machine code does not need to conform to any RISC-V ISA specification. Kernel code is used as a means to statically link foreign OpenVM assembly code into the ELF without a custom linker. Kernel code cannot be executed directly by a RISC-V machine without additional toolchain support from the [transpiler](./transpiler.md#openvm-kernel-code-transpilation).
+- **Kernel Code:** the custom machine code is a 32-bit aligned binary sequence with bit length a multiple of 32. The machine code does not need to conform to any RISC-V ISA specification. Kernel code is used as a means to statically link foreign OpenVM assembly code into the ELF without a custom linker. Kernel code cannot be executed directly by a RISC-V machine without additional toolchain support from the [transpiler](/specs/reference/transpiler#openvm-kernel-code-transpilation).
 
 ## Conventions for RISC-V Intrinsic Instructions
 
@@ -115,7 +115,7 @@ We support a single branch instruction, `beq256`, which is B-type.
 
 ## Native (Kernel) Extension
 
-The following are _not_ intrinsic instructions, but custom RISC-V instructions used to frame the insertion of custom kernel code. They are not meant to be used alone. See the [transpiler](./transpiler.md#openvm-kernel-code-transpilation) for more details.
+The following are _not_ intrinsic instructions, but custom RISC-V instructions used to frame the insertion of custom kernel code. They are not meant to be used alone. See the [transpiler](/specs/reference/transpiler#openvm-kernel-code-transpilation) for more details.
 
 These use the _custom-0_ opcode prefix and funct3 = 0b111.
 
@@ -147,7 +147,7 @@ For developer convenience, in the Rust function bindings for these intrinsics, e
 
 We use `config.mod_idx(N)` to denote the index of `N` in this list. In the list below, `idx` denotes `config.mod_idx(N)`.
 
-**Note:** The output for the first 4 instructions is not guaranteed to be less than `N`. See the [ISA specification](./ISA.md#algebra-extension) for more details.
+**Note:** The output for the first 4 instructions is not guaranteed to be less than `N`. See the [ISA specification](/specs/openvm/isa#algebra-extension) for more details.
 
 | RISC-V Inst  | FMT | opcode[6:0] | funct3 | funct7    | RISC-V description and notes                                                                                                                                                                                                                                                                                                                                                                                                |
 | ------------ | --- | ----------- | ------ | --------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
diff --git a/docs/specs/transpiler.md b/docs/vocs/docs/pages/specs/reference/transpiler.mdx
similarity index 97%
rename from docs/specs/transpiler.md
rename to docs/vocs/docs/pages/specs/reference/transpiler.mdx
index fded65b6d8..bb16d6fd5c 100644
--- a/docs/specs/transpiler.md
+++ b/docs/vocs/docs/pages/specs/reference/transpiler.mdx
@@ -1,6 +1,6 @@
 # RISC-V ELF Transpilation to OpenVM Executable
 
-The OpenVM framework supports transpilation of a RISC-V ELF consisting of the RV32IM instruction set as well as [custom RISC-V instructions](./RISCV.md) specified by VM extensions into an OpenVM executable.
+The OpenVM framework supports transpilation of a RISC-V ELF consisting of the RV32IM instruction set as well as [custom RISC-V instructions](/specs/reference/riscv-custom-code) specified by VM extensions into an OpenVM executable.
 
 ## Transpiler Framework
 
@@ -10,11 +10,11 @@ The transpiler is a function that converts a [RISC-V ELF](https://github.com/ris
 - Starting program counter `pc_0`
 - Initial data memory
 
-The OpenVM executable forms a part of the [initial VM state](./ISA.md#virtual-machine-state).
+The OpenVM executable forms a part of the [initial VM state](/specs/openvm/isa#virtual-machine-state).
 
 We define a RISC-V **machine code block** to be a 32-bit aligned contiguous sequence of bits in the RISC-V program memory, where the bit length is variable and a multiple of 32. The code block _may_ contain instructions from standard or non-standard RISC-V ISA extensions, but it may also contain arbitrary bits.
 
-The transpiler is configured upon construction with the set of VM extensions to support. In order to be supported by the transpiler, a VM extension must specify a set of RISC-V machine code blocks and rules for mapping each code block to a sequences of _potentially multiple_ [OpenVM instructions](./ISA.md#openvm-instruction-set).
+The transpiler is configured upon construction with the set of VM extensions to support. In order to be supported by the transpiler, a VM extension must specify a set of RISC-V machine code blocks and rules for mapping each code block to a sequences of _potentially multiple_ [OpenVM instructions](/specs/openvm/isa#openvm-instruction-set).
 
 The transpilation rules must satisfy:
 
@@ -41,7 +41,7 @@ The OpenVM ISA treats `[0:4]_1` as normal read/write memory and makes no guarant
 
 ## Transpiler Specification for Default VM Extensions
 
-This section specifies the behavior of the transpiler for the default VM extensions with the custom RISC-V instructions specified [here](./RISCV.md). We use the following notation:
+This section specifies the behavior of the transpiler for the default VM extensions with the custom RISC-V instructions specified [here](/specs/reference/riscv-custom-code). We use the following notation:
 
 - Let `ind(rd)` denote `4 * (register index)`, which is in `0..128`. In particular, it fits in one field element.
 - We use `itof` for the function that takes 12-bits (or 21-bits in case of J-type) to a signed integer and then mapping to the corresponding field element. So `0b11…11` goes to `-1` in `F`.
@@ -50,7 +50,7 @@ This section specifies the behavior of the transpiler for the default VM extensi
 - We use `zero_extend_24` to convert an unsigned integer with at most 24 bits into a 24-bit unsigned integer by zero extension. This is used in conjunction with `utof` to convert unsigned integers to field elements.
 - We use `sign_of(imm)` to get the sign bit of the immediate `imm`.
 - The notation `imm[0:4]` means the lowest 5 bits of the immediate.
-- For a phantom instruction `ins`, `disc(ins)` is the discriminant specified in the [ISA specification](./ISA.md#system-instructions).
+- For a phantom instruction `ins`, `disc(ins)` is the discriminant specified in the [ISA specification](/specs/openvm/isa#system-instructions).
 - For a phantom instruction `ins` and a 16-bit `c_upper`, `phantom_c(c_upper, ins) = c_upper << 16 | disc(ins)` is the corresponding 32-bit operand `c` for PHANTOM.
 
 We now specify the transpilation for system instructions and the default set of VM extensions.
@@ -215,7 +215,7 @@ Each VM extension's behavior is specified below.
 
 ## OpenVM Kernel Code Transpilation
 
-This section specifies the transpilation of custom RISC-V [kernel code](./RISCV.md#classification-of-custom-risc-v-machine-code) to OpenVM instructions.
+This section specifies the transpilation of custom RISC-V [kernel code](/specs/reference/riscv-custom-code#classification-of-custom-risc-v-machine-code) to OpenVM instructions.
 This transpilation differs from the ones described above in that a custom RISC-V code block of more than 32-bits is used to specify a single OpenVM instruction,
 and a single 32-bit RISC-V instruction is also used to specify multiple (nonexistent) instructions.
 
diff --git a/docs/vocs/docs/public/OpenVM-favicon.svg b/docs/vocs/docs/public/OpenVM-favicon.svg
new file mode 100644
index 0000000000..4421283573
--- /dev/null
+++ b/docs/vocs/docs/public/OpenVM-favicon.svg
@@ -0,0 +1,20 @@
+<svg width="420" height="420" viewBox="0 0 420 420" fill="none" xmlns="http://www.w3.org/2000/svg">
+<path d="M383 267.921V268.922C382.858 271.315 382.119 273.634 380.848 275.669C379.578 277.704 377.816 279.391 375.725 280.575L361.857 288.584L218.276 371.078C216.062 372.345 213.553 373.008 211 373C208.449 372.993 205.944 372.331 203.725 371.078L60.1436 288.584L46.2758 280.575C44.0525 279.31 42.2069 277.479 40.9284 275.27C39.6499 273.06 38.9844 270.551 39.0003 268.001C39.0027 265.465 39.6767 262.974 40.9542 260.78C42.2316 258.587 44.0671 256.768 46.2758 255.507L203.725 164.924C205.941 163.663 208.448 163 211 163C213.552 163 216.06 163.663 218.276 164.924L375.725 255.507C377.927 256.755 379.76 258.56 381.037 260.74C382.315 262.919 382.992 265.397 383 267.921Z" fill="#CBBAE4"/>
+<path d="M80.8446 256.157L204.392 184.815C206.462 183.626 208.806 183 211.191 183C213.577 183 215.92 183.626 217.99 184.815L341.176 256.157C343.249 257.347 344.972 259.065 346.171 261.138C347.369 263.21 348 265.564 348 267.96C348 270.356 347.369 272.709 346.171 274.782C344.972 276.854 343.249 278.573 341.176 279.763L217.749 351.185C215.679 352.374 213.335 353 210.95 353C208.565 353 206.221 352.374 204.151 351.185L80.724 279.763C78.667 278.554 76.963 276.824 75.7829 274.746C74.6027 272.668 73.988 270.316 74.0002 267.925C74.0124 265.534 74.6513 263.188 75.8526 261.122C77.0539 259.057 78.7753 257.344 80.8446 256.157Z" fill="#7A63BA"/>
+<path d="M211 373.645V415C208.449 414.993 205.943 414.327 203.724 413.065L46.276 321.971C44.0498 320.702 42.2019 318.859 40.9229 316.634C39.644 314.41 38.9803 311.883 39.0004 309.315V268C38.9845 270.567 39.65 273.092 40.9286 275.316C42.2071 277.541 44.0526 279.384 46.276 280.656L60.1437 288.718L203.724 371.75C205.947 372.998 208.453 373.65 211 373.645Z" fill="#7A63BA"/>
+<path d="M382.999 269V309.298C383.021 311.877 382.359 314.415 381.081 316.652C379.803 318.89 377.954 320.747 375.724 322.032L218.276 413.066C216.062 414.341 213.553 415.008 211 415V373.694C213.553 373.702 216.062 373.035 218.276 371.76L361.856 288.746L375.724 280.686C377.81 279.498 379.568 277.808 380.839 275.767C382.109 273.727 382.851 271.401 382.999 269Z" fill="#36156A"/>
+<path d="M94.1895 257.488L204.868 193.613C206.699 192.556 208.776 192 210.89 192C213.004 192 215.081 192.556 216.911 193.613L327.75 257.488C329.643 258.526 331.221 260.053 332.32 261.909C333.42 263.765 334 265.883 334 268.04C334 270.197 333.42 272.315 332.32 274.171C331.221 276.027 329.643 277.554 327.75 278.592L217.072 342.387C215.241 343.444 213.164 344 211.05 344C208.936 344 206.859 343.444 205.029 342.387L94.3098 278.592C92.4116 277.565 90.8246 276.047 89.7146 274.197C88.6046 272.347 88.0125 270.233 88.0002 268.076C87.9879 265.918 88.5557 263.798 89.6445 261.935C90.7334 260.072 92.3032 258.536 94.1895 257.488Z" fill="#28055D"/>
+<path d="M53 309.697L107 341V331.902L53 301V309.697Z" fill="#F3E1FF"/>
+<path d="M383 190.456V191.421C382.862 193.825 382.124 196.157 380.853 198.201C379.582 200.246 377.819 201.94 375.725 203.126L361.857 211.171L218.276 294.029C216.066 295.314 213.556 295.994 211 296C208.446 295.979 205.941 295.3 203.725 294.029L60.1439 211.171L46.2762 203.126C44.0472 201.862 42.1971 200.025 40.9177 197.804C39.6383 195.583 38.9763 193.06 39.0006 190.496C39.0082 187.945 39.6839 185.439 40.9604 183.23C42.237 181.021 44.0698 179.186 46.2762 177.907L203.725 86.9633C205.934 85.6774 208.444 85 211 85C213.556 85 216.066 85.6774 218.276 86.9633L375.725 177.907C377.933 179.173 379.769 181 381.047 183.204C382.324 185.407 382.998 187.909 383 190.456Z" fill="#CBBAE4"/>
+<path d="M80.7854 178.804L204.363 107.802C206.433 106.621 208.778 106 211.164 106C213.549 106 215.894 106.621 217.964 107.802L341.18 178.764C343.253 179.951 344.975 181.662 346.172 183.723C347.37 185.784 348 188.123 348 190.504C348 192.886 347.37 195.225 346.172 197.286C344.975 199.347 343.253 201.058 341.18 202.245L217.843 273.167C215.78 274.367 213.433 275 211.043 275C208.653 275 206.306 274.367 204.242 273.167L80.7854 202.245C78.7218 201.055 77.0087 199.346 75.8177 197.289C74.6268 195.232 74 192.899 74 190.525C74 188.15 74.6268 185.817 75.8177 183.76C77.0087 181.703 78.7218 179.994 80.7854 178.804Z" fill="#7A63BA"/>
+<path d="M211 295.938V337C208.446 336.979 205.94 336.304 203.724 335.039L46.2761 244.63C44.0532 243.375 42.2068 241.553 40.9278 239.351C39.6488 237.149 38.9835 234.646 39.0006 232.103V191C38.9763 193.551 39.6383 196.061 40.9177 198.271C42.1971 200.481 44.0472 202.31 46.2761 203.567L60.1439 211.571L203.724 294.016C205.944 295.267 208.45 295.929 211 295.938Z" fill="#7A63BA"/>
+<path d="M382.999 192V232.022C383.022 234.578 382.361 237.093 381.082 239.309C379.803 241.526 377.953 243.362 375.724 244.629L218.276 335.039C216.065 336.318 213.556 336.994 211 337V296.017C213.556 296.011 216.065 295.335 218.276 294.056L361.856 211.611L375.724 203.606C377.813 202.429 379.572 200.751 380.843 198.724C382.114 196.697 382.855 194.385 382.999 192Z" fill="#36156A"/>
+<path d="M94.2519 180.47L204.976 116.583C206.814 115.545 208.889 115 211 115C213.111 115 215.186 115.545 217.024 116.583L327.748 180.47C329.641 181.508 331.22 183.036 332.32 184.893C333.42 186.749 334 188.867 334 191.025C334 193.182 333.42 195.3 332.32 197.157C331.22 199.014 329.641 200.541 327.748 201.579L217.024 265.387C215.193 266.444 213.115 267 211 267C208.885 267 206.807 266.444 204.976 265.387L94.2519 201.579C92.3589 200.541 90.7799 199.014 89.6801 197.157C88.5802 195.3 88 193.182 88 191.025C88 188.867 88.5802 186.749 89.6801 184.893C90.7799 183.036 92.3589 181.508 94.2519 180.47Z" fill="#28055D"/>
+<path d="M53 232.729L107 264V254.871L53 224V232.729Z" fill="#F3E1FF"/>
+<path d="M383 113.461V114.426C382.858 116.829 382.119 119.159 380.848 121.203C379.578 123.247 377.816 124.941 375.725 126.13L361.857 134.174L218.276 217.029C216.066 218.314 213.556 218.994 211 219C208.446 218.979 205.94 218.3 203.725 217.029L60.1436 134.174L46.2758 126.13C44.0525 124.86 42.2069 123.021 40.9284 120.802C39.6499 118.582 38.9844 116.063 39.0003 113.501C39.0038 110.948 39.6779 108.442 40.9549 106.232C42.2318 104.023 44.0667 102.188 46.2758 100.912L203.725 9.93224C205.941 8.66603 208.448 8 211 8C213.552 8 216.06 8.66603 218.276 9.93224L375.725 100.871C377.939 102.141 379.779 103.975 381.056 106.186C382.334 108.397 383.005 110.907 383 113.461Z" fill="#CBBAE4"/>
+<path d="M80.8446 101.818L204.392 30.8025C206.462 29.6214 208.806 29 211.191 29C213.577 29 215.92 29.6214 217.99 30.8025L341.176 101.818C343.249 103 344.972 104.706 346.171 106.765C347.369 108.823 348 111.161 348 113.54C348 115.92 347.369 118.257 346.171 120.316C344.972 122.374 343.249 124.081 341.176 125.262L217.749 196.197C215.679 197.379 213.335 198 210.95 198C208.565 198 206.221 197.379 204.151 196.197L80.724 125.262C78.667 124.062 76.963 122.344 75.7829 120.28C74.6027 118.217 73.988 115.88 74.0002 113.505C74.0124 111.131 74.6513 108.801 75.8526 106.749C77.0539 104.698 78.7753 102.997 80.8446 101.818Z" fill="#7A63BA"/>
+<path d="M211 218.685V260C208.449 259.993 205.943 259.327 203.724 258.065L46.276 167.011C44.0498 165.742 42.2019 163.899 40.9229 161.675C39.644 159.45 38.9803 156.923 39.0004 154.355V113C38.9845 115.567 39.65 118.092 40.9286 120.316C42.2071 122.541 44.0526 124.384 46.276 125.656L60.1437 133.718L203.724 216.751C205.944 218.01 208.45 218.677 211 218.685Z" fill="#7A63BA"/>
+<path d="M382.999 114V154.298C383.025 156.877 382.365 159.417 381.086 161.655C379.807 163.894 377.956 165.75 375.724 167.032L218.275 258.066C216.062 259.341 213.553 260.008 211 260V218.735C213.556 218.729 216.065 218.048 218.275 216.76L361.856 133.746L375.724 125.686C377.81 124.498 379.568 122.808 380.838 120.767C382.109 118.727 382.851 116.401 382.999 114Z" fill="#36156A"/>
+<path d="M94.2724 102.488L205.004 38.6126C206.835 37.5561 208.913 37 211.028 37C213.143 37 215.221 37.5561 217.053 38.6126L327.784 102.488C329.667 103.529 331.237 105.055 332.33 106.907C333.423 108.759 334 110.87 334 113.02C334 115.17 333.423 117.281 332.33 119.133C331.237 120.986 329.667 122.511 327.784 123.552L217.053 187.387C215.221 188.444 213.143 189 211.028 189C208.913 189 206.835 188.444 205.004 187.387L94.2322 123.592C92.3411 122.551 90.7648 121.021 89.6684 119.163C88.5721 117.304 87.9959 115.186 88 113.028C88.0041 110.871 88.5885 108.755 89.692 106.9C90.7954 105.046 92.3774 103.522 94.2724 102.488Z" fill="#7A63BA"/>
+<path d="M53 154.697L107 186V176.902L53 146V154.697Z" fill="#F3E1FF"/>
+</svg>
diff --git a/docs/vocs/docs/public/OpenVM-horizontal.svg b/docs/vocs/docs/public/OpenVM-horizontal.svg
new file mode 100644
index 0000000000..94183060e3
--- /dev/null
+++ b/docs/vocs/docs/public/OpenVM-horizontal.svg
@@ -0,0 +1,28 @@
+<svg width="1345" height="420" viewBox="0 0 1345 420" fill="none" xmlns="http://www.w3.org/2000/svg">
+<g clip-path="url(#clip0_1861_17196)">
+<path d="M333.2 250.859V251.581C333.098 253.307 332.567 254.98 331.654 256.448C330.741 257.917 329.475 259.133 327.972 259.988L318.006 265.766L214.828 325.278C213.238 326.192 211.434 326.67 209.6 326.665C207.767 326.66 205.966 326.182 204.372 325.278L101.194 265.766L91.2284 259.988C89.6307 259.075 88.3045 257.754 87.3858 256.16C86.467 254.566 85.9888 252.756 86.0002 250.916C86.002 249.087 86.4863 247.29 87.4043 245.707C88.3222 244.125 89.6413 242.813 91.2284 241.903L204.372 176.555C205.964 175.645 207.766 175.167 209.6 175.167C211.434 175.167 213.236 175.645 214.828 176.555L327.972 241.903C329.554 242.803 330.871 244.105 331.789 245.678C332.707 247.251 333.194 249.038 333.2 250.859Z" fill="#CBBAE4"/>
+<path d="M116.099 242.451L204.806 191.375C206.292 190.523 207.975 190.075 209.687 190.075C211.4 190.075 213.083 190.523 214.569 191.375L303.016 242.451C304.504 243.303 305.741 244.534 306.602 246.017C307.462 247.501 307.915 249.186 307.915 250.902C307.915 252.617 307.462 254.302 306.602 255.786C305.741 257.27 304.504 258.5 303.016 259.352L214.396 310.486C212.91 311.338 211.227 311.785 209.514 311.785C207.801 311.785 206.119 311.338 204.632 310.486L116.013 259.352C114.536 258.486 113.312 257.248 112.465 255.76C111.618 254.273 111.176 252.589 111.185 250.877C111.194 249.165 111.652 247.485 112.515 246.006C113.377 244.528 114.613 243.302 116.099 242.451Z" fill="#7A63BA"/>
+<path d="M209.6 326.635V356.276C207.767 356.271 205.966 355.793 204.372 354.889L91.2285 289.599C89.6288 288.689 88.3009 287.369 87.3818 285.774C86.4627 284.179 85.9859 282.368 86.0003 280.528V250.916C85.9889 252.756 86.4671 254.566 87.3859 256.16C88.3046 257.754 89.6309 259.075 91.2285 259.987L101.194 265.765L204.372 325.278C205.969 326.172 207.77 326.639 209.6 326.635Z" fill="#7A63BA"/>
+<path d="M333.2 251.581V280.47C333.216 282.319 332.74 284.138 331.822 285.742C330.903 287.347 329.575 288.678 327.972 289.599L214.829 354.861C213.238 355.775 211.435 356.253 209.601 356.247V326.636C211.435 326.641 213.238 326.163 214.829 325.249L318.007 265.737L327.972 259.959C329.471 259.107 330.735 257.895 331.648 256.432C332.56 254.969 333.094 253.302 333.2 251.581Z" fill="#36156A"/>
+<path d="M125.574 243.376L205.21 197.384C206.528 196.624 208.022 196.223 209.543 196.223C211.064 196.223 212.559 196.624 213.876 197.384L293.628 243.376C294.989 244.124 296.125 245.223 296.916 246.56C297.707 247.896 298.125 249.421 298.125 250.974C298.125 252.527 297.707 254.052 296.916 255.389C296.125 256.725 294.989 257.825 293.628 258.572L213.991 304.506C212.674 305.267 211.18 305.668 209.659 305.668C208.137 305.668 206.643 305.267 205.326 304.506L125.66 258.572C124.295 257.833 123.153 256.74 122.354 255.408C121.555 254.075 121.129 252.553 121.12 251C121.112 249.447 121.52 247.92 122.304 246.578C123.087 245.237 124.217 244.131 125.574 243.376Z" fill="#28055D"/>
+<path d="M95.9371 280.817L135.019 303.38V296.822L95.9371 274.548V280.817Z" fill="#F3E1FF"/>
+<path d="M333.2 195.304V195.997C333.101 197.724 332.57 199.398 331.657 200.867C330.744 202.336 329.477 203.552 327.972 204.404L318.006 210.182L214.828 269.694C213.24 270.617 211.437 271.106 209.6 271.11C207.765 271.095 205.964 270.607 204.372 269.694L101.194 210.182L91.2287 204.404C89.627 203.497 88.2975 202.177 87.3781 200.581C86.4587 198.986 85.983 197.174 86.0005 195.333C86.0059 193.5 86.4914 191.701 87.4088 190.114C88.3261 188.528 89.6432 187.209 91.2287 186.29L204.372 120.971C205.96 120.048 207.764 119.561 209.6 119.561C211.437 119.561 213.241 120.048 214.828 120.971L327.972 186.29C329.559 187.2 330.878 188.513 331.796 190.095C332.714 191.678 333.198 193.474 333.2 195.304Z" fill="#CBBAE4"/>
+<path d="M116.099 186.897L204.805 135.705C206.291 134.853 207.974 134.406 209.687 134.406C211.399 134.406 213.082 134.853 214.568 135.705L303.015 186.868C304.503 187.724 305.739 188.957 306.599 190.443C307.458 191.929 307.911 193.616 307.911 195.333C307.911 197.05 307.458 198.736 306.599 200.222C305.739 201.708 304.503 202.941 303.015 203.797L214.482 254.932C213 255.797 211.316 256.254 209.6 256.254C207.884 256.254 206.2 255.797 204.718 254.932L116.099 203.797C114.617 202.939 113.388 201.707 112.533 200.224C111.678 198.741 111.228 197.059 111.228 195.347C111.228 193.635 111.678 191.953 112.533 190.47C113.388 188.987 114.617 187.755 116.099 186.897Z" fill="#7A63BA"/>
+<path d="M209.6 271.081V300.722C207.765 300.707 205.964 300.219 204.372 299.306L91.2287 234.045C89.6313 233.139 88.3044 231.824 87.3853 230.234C86.4662 228.645 85.9882 226.839 86.0005 225.002V195.333C85.983 197.174 86.4587 198.986 87.3781 200.581C88.2975 202.176 89.627 203.496 91.2287 204.404L101.194 210.182L204.372 269.694C205.967 270.597 207.767 271.075 209.6 271.081Z" fill="#7A63BA"/>
+<path d="M333.2 195.997V224.887C333.217 226.732 332.741 228.548 331.822 230.147C330.903 231.747 329.574 233.072 327.972 233.987L214.829 299.248C213.241 300.171 211.437 300.66 209.601 300.664V271.081C211.437 271.077 213.241 270.588 214.829 269.666L318.007 210.153L327.972 204.375C329.473 203.526 330.738 202.314 331.651 200.851C332.564 199.387 333.097 197.719 333.2 195.997Z" fill="#36156A"/>
+<path d="M125.574 187.735L205.21 141.743C206.532 140.996 208.025 140.604 209.543 140.604C211.061 140.604 212.554 140.996 213.876 141.743L293.512 187.735C294.874 188.482 296.01 189.582 296.801 190.918C297.592 192.255 298.009 193.78 298.009 195.333C298.009 196.886 297.592 198.411 296.801 199.748C296.01 201.084 294.874 202.184 293.512 202.931L213.876 248.865C212.559 249.626 211.064 250.026 209.543 250.026C208.022 250.026 206.528 249.626 205.21 248.865L125.574 202.931C124.212 202.184 123.077 201.084 122.286 199.748C121.494 198.411 121.077 196.886 121.077 195.333C121.077 193.78 121.494 192.255 122.286 190.918C123.077 189.582 124.212 188.482 125.574 187.735Z" fill="#28055D"/>
+<path d="M95.9371 225.262L135.019 247.825V241.238L95.9371 218.965V225.262Z" fill="#F3E1FF"/>
+<path d="M333.2 139.749V140.443C333.098 142.169 332.567 143.842 331.654 145.31C330.741 146.779 329.475 147.995 327.972 148.85L318.006 154.627L214.828 214.14C213.24 215.063 211.437 215.551 209.6 215.555C207.765 215.54 205.964 215.053 204.372 214.14L101.194 154.627L91.2284 148.85C89.6307 147.937 88.3045 146.616 87.3858 145.022C86.467 143.428 85.9888 141.618 86.0002 139.778C86.0027 137.945 86.4872 136.145 87.4048 134.558C88.3224 132.97 89.641 131.653 91.2284 130.736L204.372 65.3879C205.964 64.4784 207.766 64 209.6 64C211.434 64 213.236 64.4784 214.828 65.3879L327.972 130.707C329.563 131.619 330.885 132.936 331.803 134.524C332.721 136.112 333.203 137.915 333.2 139.749Z" fill="#CBBAE4"/>
+<path d="M116.099 131.314L204.806 80.1216C206.292 79.2702 207.975 78.8223 209.687 78.8223C211.4 78.8223 213.083 79.2702 214.569 80.1216L303.016 131.314C304.504 132.166 305.741 133.396 306.602 134.88C307.462 136.364 307.915 138.049 307.915 139.764C307.915 141.479 307.462 143.164 306.602 144.648C305.741 146.132 304.504 147.362 303.016 148.214L214.396 199.349C212.91 200.2 211.227 200.648 209.514 200.648C207.801 200.648 206.119 200.2 204.632 199.349L116.013 148.214C114.536 147.349 113.312 146.11 112.465 144.623C111.618 143.135 111.176 141.451 111.185 139.739C111.194 138.027 111.652 136.347 112.515 134.869C113.377 133.39 114.613 132.164 116.099 131.314Z" fill="#7A63BA"/>
+<path d="M209.6 215.526V245.138C207.767 245.133 205.966 244.656 204.372 243.752L91.2285 178.49C89.6288 177.58 88.3009 176.26 87.3818 174.665C86.4627 173.07 85.9859 171.26 86.0003 169.419V139.778C85.9889 141.618 86.4671 143.428 87.3859 145.022C88.3046 146.616 89.6309 147.937 91.2285 148.85L101.194 154.627L204.372 214.14C205.967 215.043 207.767 215.52 209.6 215.526Z" fill="#7A63BA"/>
+<path d="M333.2 140.443V169.332C333.219 171.181 332.745 173.002 331.826 174.607C330.907 176.211 329.577 177.542 327.972 178.461L214.829 243.723C213.238 244.637 211.435 245.115 209.601 245.109V215.526C211.437 215.522 213.241 215.034 214.829 214.111L318.007 154.598L327.972 148.821C329.471 147.969 330.735 146.757 331.648 145.294C332.56 143.831 333.094 142.164 333.2 140.443Z" fill="#36156A"/>
+<path d="M125.574 132.18L205.211 86.1882C206.528 85.4275 208.022 85.0271 209.543 85.0271C211.064 85.0271 212.559 85.4275 213.876 86.1882L293.512 132.18C294.867 132.93 295.996 134.028 296.782 135.362C297.568 136.696 297.983 138.216 297.983 139.764C297.983 141.312 297.568 142.832 296.782 144.165C295.996 145.499 294.867 146.598 293.512 147.347L213.876 193.31C212.559 194.071 211.064 194.472 209.543 194.472C208.022 194.472 206.528 194.071 205.211 193.31L125.545 147.376C124.185 146.626 123.051 145.525 122.263 144.187C121.474 142.848 121.06 141.323 121.063 139.77C121.066 138.216 121.486 136.693 122.28 135.357C123.073 134.022 124.211 132.925 125.574 132.18Z" fill="#7A63BA"/>
+<path d="M95.9371 169.679L135.019 192.242V185.684L95.9371 163.41V169.679Z" fill="#F3E1FF"/>
+</g>
+<path d="M569.995 213.8C569.995 258.2 540.795 289.4 498.795 289.4C456.995 289.4 428.195 258.4 428.195 213.8C428.195 169.4 457.195 138.2 498.995 138.2C540.795 138.2 569.995 169.2 569.995 213.8ZM542.995 213.8C542.995 182.6 525.795 162.8 498.995 162.8C472.195 162.8 454.995 182.6 454.995 213.8C454.995 245 472.195 264.8 498.995 264.8C525.795 264.8 542.995 244.6 542.995 213.8ZM590.221 332.8V188.2H612.821L614.421 203.2C620.421 191.8 633.421 185.2 648.421 185.2C676.221 185.2 694.621 205.4 694.621 236.4C694.621 267.2 677.821 289.6 648.421 289.6C633.621 289.6 620.821 283.8 614.621 273.8V332.8H590.221ZM614.821 237.6C614.821 255.4 625.821 267.6 642.621 267.6C659.821 267.6 670.021 255.2 670.021 237.6C670.021 220 659.821 207.4 642.621 207.4C625.821 207.4 614.821 219.8 614.821 237.6ZM758.249 289.6C728.849 289.6 708.249 268.2 708.249 237.6C708.249 206.6 728.449 185.2 757.449 185.2C787.049 185.2 805.849 205 805.849 235.8V243.2L731.449 243.4C733.249 260.8 742.449 269.6 758.649 269.6C772.049 269.6 780.849 264.4 783.649 255H806.249C802.049 276.6 784.049 289.6 758.249 289.6ZM757.649 205.2C743.249 205.2 734.449 213 732.049 227.8H781.649C781.649 214.2 772.249 205.2 757.649 205.2ZM849.777 287H825.377V188.2H847.977L849.977 201C856.177 191 868.177 185.2 881.577 185.2C906.377 185.2 919.177 200.6 919.177 226.2V287H894.777V232C894.777 215.4 886.577 207.4 873.977 207.4C858.977 207.4 849.777 217.8 849.777 233.8V287ZM977.009 287L921.809 140.8H949.009L981.009 225C984.209 233.8 987.009 242.4 990.209 254.2C993.809 241.4 996.809 232.4 999.609 225L1031.21 140.8H1057.81L1003.41 287H977.009ZM1100.85 287H1076.05V140.8H1100.85L1148.05 257.2L1195.25 140.8H1220.45V287H1195.65V242C1195.65 212.6 1195.65 204 1197.05 193.6L1159.85 287H1136.25L1099.25 193.8C1100.65 202.6 1100.85 216.4 1100.85 234.6V287Z" fill="#42276E"/>
+<defs>
+<clipPath id="clip0_1861_17196">
+<rect width="420" height="420" fill="white"/>
+</clipPath>
+</defs>
+</svg>
diff --git a/docs/vocs/docs/public/agg-2.png b/docs/vocs/docs/public/agg-2.png
new file mode 100644
index 0000000000..11fcc8de81
Binary files /dev/null and b/docs/vocs/docs/public/agg-2.png differ
diff --git a/docs/vocs/docs/public/agg-stark.png b/docs/vocs/docs/public/agg-stark.png
new file mode 100644
index 0000000000..ee479d69d5
Binary files /dev/null and b/docs/vocs/docs/public/agg-stark.png differ
diff --git a/docs/vocs/docs/public/agg.png b/docs/vocs/docs/public/agg.png
new file mode 100644
index 0000000000..2e03723445
Binary files /dev/null and b/docs/vocs/docs/public/agg.png differ
diff --git a/docs/vocs/docs/snippets/examples b/docs/vocs/docs/snippets/examples
new file mode 120000
index 0000000000..07b1cee43d
--- /dev/null
+++ b/docs/vocs/docs/snippets/examples
@@ -0,0 +1 @@
+../../../../examples/
\ No newline at end of file
diff --git a/docs/vocs/docs/snippets/examples_sdk b/docs/vocs/docs/snippets/examples_sdk
new file mode 120000
index 0000000000..b54f1fd2b4
--- /dev/null
+++ b/docs/vocs/docs/snippets/examples_sdk
@@ -0,0 +1 @@
+../../../../crates/sdk/examples
\ No newline at end of file
diff --git a/docs/vocs/docs/styles.css b/docs/vocs/docs/styles.css
new file mode 100644
index 0000000000..b819754cd7
--- /dev/null
+++ b/docs/vocs/docs/styles.css
@@ -0,0 +1 @@
+@import "https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/katex.min.css";
diff --git a/docs/vocs/package.json b/docs/vocs/package.json
new file mode 100644
index 0000000000..300b5ed51b
--- /dev/null
+++ b/docs/vocs/package.json
@@ -0,0 +1,29 @@
+{
+	"name": "OpenVM Docs",
+	"private": true,
+	"version": "0.0.0",
+	"type": "module",
+	"scripts": {
+		"dev": "vocs dev",
+		"build": "vocs build",
+		"preview": "vocs preview",
+		"vercel-build": "cp -rL docs/snippets/examples docs/snippets/examples-tmp && rm -rf docs/snippets/examples && mv docs/snippets/examples-tmp docs/snippets/examples && cp -rL docs/snippets/examples_sdk docs/snippets/examples_sdk-tmp && rm -rf docs/snippets/examples_sdk && mv docs/snippets/examples_sdk-tmp docs/snippets/examples_sdk && pnpm build"
+	},
+	"dependencies": {
+		"katex": "^0.16.22",
+		"react": "^19.1.1",
+		"react-dom": "^19.1.1",
+		"rehype-katex": "^7.0.1",
+		"remark-math": "^6.0.0",
+		"remark-mdx-disable-explicit-jsx": "^0.1.0",
+		"vocs": "^1.0.13"
+	},
+	"devDependencies": {
+		"@types/node": "^24.3.0",
+		"@types/react": "^19.1.10",
+		"glob": "^11.0.3",
+		"patch-package": "^8.0.0",
+		"postinstall-postinstall": "^2.1.0",
+		"typescript": "^5.9.2"
+	}
+}
\ No newline at end of file
diff --git a/docs/vocs/pnpm-lock.yaml b/docs/vocs/pnpm-lock.yaml
new file mode 100644
index 0000000000..0dfe9ad8bb
--- /dev/null
+++ b/docs/vocs/pnpm-lock.yaml
@@ -0,0 +1,7107 @@
+lockfileVersion: '6.0'
+
+settings:
+  autoInstallPeers: true
+  excludeLinksFromLockfile: false
+
+dependencies:
+  katex:
+    specifier: ^0.16.22
+    version: 0.16.22
+  react:
+    specifier: ^19.1.1
+    version: 19.1.1
+  react-dom:
+    specifier: ^19.1.1
+    version: 19.1.1(react@19.1.1)
+  rehype-katex:
+    specifier: ^7.0.1
+    version: 7.0.1
+  remark-math:
+    specifier: ^6.0.0
+    version: 6.0.0
+  remark-mdx-disable-explicit-jsx:
+    specifier: ^0.1.0
+    version: 0.1.0
+  vocs:
+    specifier: ^1.0.13
+    version: 1.0.13(@types/node@24.3.0)(@types/react@19.1.10)(acorn@8.15.0)(react-dom@19.1.1)(react@19.1.1)(rollup@4.46.2)(typescript@5.9.2)
+
+devDependencies:
+  '@types/node':
+    specifier: ^24.3.0
+    version: 24.3.0
+  '@types/react':
+    specifier: ^19.1.10
+    version: 19.1.10
+  glob:
+    specifier: ^11.0.3
+    version: 11.0.3
+  patch-package:
+    specifier: ^8.0.0
+    version: 8.0.0
+  postinstall-postinstall:
+    specifier: ^2.1.0
+    version: 2.1.0
+  typescript:
+    specifier: ^5.9.2
+    version: 5.9.2
+
+packages:
+
+  /@ampproject/remapping@2.3.0:
+    resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==}
+    engines: {node: '>=6.0.0'}
+    dependencies:
+      '@jridgewell/gen-mapping': 0.3.13
+      '@jridgewell/trace-mapping': 0.3.30
+    dev: false
+
+  /@antfu/install-pkg@1.1.0:
+    resolution: {integrity: sha512-MGQsmw10ZyI+EJo45CdSER4zEb+p31LpDAFp2Z3gkSd1yqVZGi0Ebx++YTEMonJy4oChEMLsxZ64j8FH6sSqtQ==}
+    dependencies:
+      package-manager-detector: 1.3.0
+      tinyexec: 1.0.1
+    dev: false
+
+  /@antfu/utils@8.1.1:
+    resolution: {integrity: sha512-Mex9nXf9vR6AhcXmMrlz/HVgYYZpVGJ6YlPgwl7UnaFpnshXs6EK/oa5Gpf3CzENMjkvEx2tQtntGnb7UtSTOQ==}
+    dev: false
+
+  /@babel/code-frame@7.27.1:
+    resolution: {integrity: sha512-cjQ7ZlQ0Mv3b47hABuTevyTuYN4i+loJKGeV9flcCgIK37cCXRh+L1bd3iBHlynerhQ7BhCkn2BPbQUL+rGqFg==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@babel/helper-validator-identifier': 7.27.1
+      js-tokens: 4.0.0
+      picocolors: 1.1.1
+    dev: false
+
+  /@babel/compat-data@7.28.0:
+    resolution: {integrity: sha512-60X7qkglvrap8mn1lh2ebxXdZYtUcpd7gsmy9kLaBJ4i/WdY8PqTSdxyA8qraikqKQK5C1KRBKXqznrVapyNaw==}
+    engines: {node: '>=6.9.0'}
+    dev: false
+
+  /@babel/core@7.28.3:
+    resolution: {integrity: sha512-yDBHV9kQNcr2/sUr9jghVyz9C3Y5G2zUM2H2lo+9mKv4sFgbA8s8Z9t8D1jiTkGoO/NoIfKMyKWr4s6CN23ZwQ==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@ampproject/remapping': 2.3.0
+      '@babel/code-frame': 7.27.1
+      '@babel/generator': 7.28.3
+      '@babel/helper-compilation-targets': 7.27.2
+      '@babel/helper-module-transforms': 7.28.3(@babel/core@7.28.3)
+      '@babel/helpers': 7.28.3
+      '@babel/parser': 7.28.3
+      '@babel/template': 7.27.2
+      '@babel/traverse': 7.28.3
+      '@babel/types': 7.28.2
+      convert-source-map: 2.0.0
+      debug: 4.4.1
+      gensync: 1.0.0-beta.2
+      json5: 2.2.3
+      semver: 6.3.1
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /@babel/generator@7.28.3:
+    resolution: {integrity: sha512-3lSpxGgvnmZznmBkCRnVREPUFJv2wrv9iAoFDvADJc0ypmdOxdUtcLeBgBJ6zE0PMeTKnxeQzyk0xTBq4Ep7zw==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@babel/parser': 7.28.3
+      '@babel/types': 7.28.2
+      '@jridgewell/gen-mapping': 0.3.13
+      '@jridgewell/trace-mapping': 0.3.30
+      jsesc: 3.1.0
+    dev: false
+
+  /@babel/helper-compilation-targets@7.27.2:
+    resolution: {integrity: sha512-2+1thGUUWWjLTYTHZWK1n8Yga0ijBz1XAhUXcKy81rd5g6yh7hGqMp45v7cadSbEHc9G3OTv45SyneRN3ps4DQ==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@babel/compat-data': 7.28.0
+      '@babel/helper-validator-option': 7.27.1
+      browserslist: 4.25.2
+      lru-cache: 5.1.1
+      semver: 6.3.1
+    dev: false
+
+  /@babel/helper-globals@7.28.0:
+    resolution: {integrity: sha512-+W6cISkXFa1jXsDEdYA8HeevQT/FULhxzR99pxphltZcVaugps53THCeiWA8SguxxpSp3gKPiuYfSWopkLQ4hw==}
+    engines: {node: '>=6.9.0'}
+    dev: false
+
+  /@babel/helper-module-imports@7.27.1:
+    resolution: {integrity: sha512-0gSFWUPNXNopqtIPQvlD5WgXYI5GY2kP2cCvoT8kczjbfcfuIljTbcWrulD1CIPIX2gt1wghbDy08yE1p+/r3w==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@babel/traverse': 7.28.3
+      '@babel/types': 7.28.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /@babel/helper-module-transforms@7.28.3(@babel/core@7.28.3):
+    resolution: {integrity: sha512-gytXUbs8k2sXS9PnQptz5o0QnpLL51SwASIORY6XaBKF88nsOT0Zw9szLqlSGQDP/4TljBAD5y98p2U1fqkdsw==}
+    engines: {node: '>=6.9.0'}
+    peerDependencies:
+      '@babel/core': ^7.0.0
+    dependencies:
+      '@babel/core': 7.28.3
+      '@babel/helper-module-imports': 7.27.1
+      '@babel/helper-validator-identifier': 7.27.1
+      '@babel/traverse': 7.28.3
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /@babel/helper-plugin-utils@7.27.1:
+    resolution: {integrity: sha512-1gn1Up5YXka3YYAHGKpbideQ5Yjf1tDa9qYcgysz+cNCXukyLl6DjPXhD3VRwSb8c0J9tA4b2+rHEZtc6R0tlw==}
+    engines: {node: '>=6.9.0'}
+    dev: false
+
+  /@babel/helper-string-parser@7.27.1:
+    resolution: {integrity: sha512-qMlSxKbpRlAridDExk92nSobyDdpPijUq2DW6oDnUqd0iOGxmQjyqhMIihI9+zv4LPyZdRje2cavWPbCbWm3eA==}
+    engines: {node: '>=6.9.0'}
+    dev: false
+
+  /@babel/helper-validator-identifier@7.27.1:
+    resolution: {integrity: sha512-D2hP9eA+Sqx1kBZgzxZh0y1trbuU+JoDkiEwqhQ36nodYqJwyEIhPSdMNd7lOm/4io72luTPWH20Yda0xOuUow==}
+    engines: {node: '>=6.9.0'}
+    dev: false
+
+  /@babel/helper-validator-option@7.27.1:
+    resolution: {integrity: sha512-YvjJow9FxbhFFKDSuFnVCe2WxXk1zWc22fFePVNEaWJEu8IrZVlda6N0uHwzZrUM1il7NC9Mlp4MaJYbYd9JSg==}
+    engines: {node: '>=6.9.0'}
+    dev: false
+
+  /@babel/helpers@7.28.3:
+    resolution: {integrity: sha512-PTNtvUQihsAsDHMOP5pfobP8C6CM4JWXmP8DrEIt46c3r2bf87Ua1zoqevsMo9g+tWDwgWrFP5EIxuBx5RudAw==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@babel/template': 7.27.2
+      '@babel/types': 7.28.2
+    dev: false
+
+  /@babel/parser@7.28.3:
+    resolution: {integrity: sha512-7+Ey1mAgYqFAx2h0RuoxcQT5+MlG3GTV0TQrgr7/ZliKsm/MNDxVVutlWaziMq7wJNAz8MTqz55XLpWvva6StA==}
+    engines: {node: '>=6.0.0'}
+    hasBin: true
+    dependencies:
+      '@babel/types': 7.28.2
+    dev: false
+
+  /@babel/plugin-syntax-typescript@7.27.1(@babel/core@7.28.3):
+    resolution: {integrity: sha512-xfYCBMxveHrRMnAWl1ZlPXOZjzkN82THFvLhQhFXFt81Z5HnN+EtUkZhv/zcKpmT3fzmWZB0ywiBrbC3vogbwQ==}
+    engines: {node: '>=6.9.0'}
+    peerDependencies:
+      '@babel/core': ^7.0.0-0
+    dependencies:
+      '@babel/core': 7.28.3
+      '@babel/helper-plugin-utils': 7.27.1
+    dev: false
+
+  /@babel/plugin-transform-react-jsx-self@7.27.1(@babel/core@7.28.3):
+    resolution: {integrity: sha512-6UzkCs+ejGdZ5mFFC/OCUrv028ab2fp1znZmCZjAOBKiBK2jXD1O+BPSfX8X2qjJ75fZBMSnQn3Rq2mrBJK2mw==}
+    engines: {node: '>=6.9.0'}
+    peerDependencies:
+      '@babel/core': ^7.0.0-0
+    dependencies:
+      '@babel/core': 7.28.3
+      '@babel/helper-plugin-utils': 7.27.1
+    dev: false
+
+  /@babel/plugin-transform-react-jsx-source@7.27.1(@babel/core@7.28.3):
+    resolution: {integrity: sha512-zbwoTsBruTeKB9hSq73ha66iFeJHuaFkUbwvqElnygoNbj/jHRsSeokowZFN3CZ64IvEqcmmkVe89OPXc7ldAw==}
+    engines: {node: '>=6.9.0'}
+    peerDependencies:
+      '@babel/core': ^7.0.0-0
+    dependencies:
+      '@babel/core': 7.28.3
+      '@babel/helper-plugin-utils': 7.27.1
+    dev: false
+
+  /@babel/runtime@7.28.3:
+    resolution: {integrity: sha512-9uIQ10o0WGdpP6GDhXcdOJPJuDgFtIDtN/9+ArJQ2NAfAmiuhTQdzkaTGR33v43GYS2UrSA0eX2pPPHoFVvpxA==}
+    engines: {node: '>=6.9.0'}
+    dev: false
+
+  /@babel/template@7.27.2:
+    resolution: {integrity: sha512-LPDZ85aEJyYSd18/DkjNh4/y1ntkE5KwUHWTiqgRxruuZL2F1yuHligVHLvcHY2vMHXttKFpJn6LwfI7cw7ODw==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@babel/code-frame': 7.27.1
+      '@babel/parser': 7.28.3
+      '@babel/types': 7.28.2
+    dev: false
+
+  /@babel/traverse@7.28.3:
+    resolution: {integrity: sha512-7w4kZYHneL3A6NP2nxzHvT3HCZ7puDZZjFMqDpBPECub79sTtSO5CGXDkKrTQq8ksAwfD/XI2MRFX23njdDaIQ==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@babel/code-frame': 7.27.1
+      '@babel/generator': 7.28.3
+      '@babel/helper-globals': 7.28.0
+      '@babel/parser': 7.28.3
+      '@babel/template': 7.27.2
+      '@babel/types': 7.28.2
+      debug: 4.4.1
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /@babel/types@7.28.2:
+    resolution: {integrity: sha512-ruv7Ae4J5dUYULmeXw1gmb7rYRz57OWCPM57pHojnLq/3Z1CK2lNSLTCVjxVk1F/TZHwOZZrOWi0ur95BbLxNQ==}
+    engines: {node: '>=6.9.0'}
+    dependencies:
+      '@babel/helper-string-parser': 7.27.1
+      '@babel/helper-validator-identifier': 7.27.1
+    dev: false
+
+  /@braintree/sanitize-url@7.1.1:
+    resolution: {integrity: sha512-i1L7noDNxtFyL5DmZafWy1wRVhGehQmzZaz1HiN5e7iylJMSZR7ekOV7NsIqa5qBldlLrsKv4HbgFUVlQrz8Mw==}
+    dev: false
+
+  /@chevrotain/cst-dts-gen@11.0.3:
+    resolution: {integrity: sha512-BvIKpRLeS/8UbfxXxgC33xOumsacaeCKAjAeLyOn7Pcp95HiRbrpl14S+9vaZLolnbssPIUuiUd8IvgkRyt6NQ==}
+    dependencies:
+      '@chevrotain/gast': 11.0.3
+      '@chevrotain/types': 11.0.3
+      lodash-es: 4.17.21
+    dev: false
+
+  /@chevrotain/gast@11.0.3:
+    resolution: {integrity: sha512-+qNfcoNk70PyS/uxmj3li5NiECO+2YKZZQMbmjTqRI3Qchu8Hig/Q9vgkHpI3alNjr7M+a2St5pw5w5F6NL5/Q==}
+    dependencies:
+      '@chevrotain/types': 11.0.3
+      lodash-es: 4.17.21
+    dev: false
+
+  /@chevrotain/regexp-to-ast@11.0.3:
+    resolution: {integrity: sha512-1fMHaBZxLFvWI067AVbGJav1eRY7N8DDvYCTwGBiE/ytKBgP8azTdgyrKyWZ9Mfh09eHWb5PgTSO8wi7U824RA==}
+    dev: false
+
+  /@chevrotain/types@11.0.3:
+    resolution: {integrity: sha512-gsiM3G8b58kZC2HaWR50gu6Y1440cHiJ+i3JUvcp/35JchYejb2+5MVeJK0iKThYpAa/P2PYFV4hoi44HD+aHQ==}
+    dev: false
+
+  /@chevrotain/utils@11.0.3:
+    resolution: {integrity: sha512-YslZMgtJUyuMbZ+aKvfF3x1f5liK4mWNxghFRv7jqRR9C3R3fAOGTTKvxXDa2Y1s9zSbcpuO0cAxDYsc9SrXoQ==}
+    dev: false
+
+  /@clack/core@0.3.5:
+    resolution: {integrity: sha512-5cfhQNH+1VQ2xLQlmzXMqUoiaH0lRBq9/CLW9lTyMbuKLC3+xEK01tHVvyut++mLOn5urSHmkm6I0Lg9MaJSTQ==}
+    dependencies:
+      picocolors: 1.1.1
+      sisteransi: 1.0.5
+    dev: false
+
+  /@clack/prompts@0.7.0:
+    resolution: {integrity: sha512-0MhX9/B4iL6Re04jPrttDm+BsP8y6mS7byuv0BvXgdXhbV5PdlsHt55dvNsuBCPZ7xq1oTAOOuotR9NFbQyMSA==}
+    dependencies:
+      '@clack/core': 0.3.5
+      picocolors: 1.1.1
+      sisteransi: 1.0.5
+    dev: false
+    bundledDependencies:
+      - is-unicode-supported
+
+  /@emotion/hash@0.9.2:
+    resolution: {integrity: sha512-MyqliTZGuOm3+5ZRSaaBGP3USLw6+EGykkwZns2EPC5g8jJ4z9OrdZY9apkl3+UP9+sdz76YYkwCKP5gh8iY3g==}
+    dev: false
+
+  /@esbuild/aix-ppc64@0.25.9:
+    resolution: {integrity: sha512-OaGtL73Jck6pBKjNIe24BnFE6agGl+6KxDtTfHhy1HmhthfKouEcOhqpSL64K4/0WCtbKFLOdzD/44cJ4k9opA==}
+    engines: {node: '>=18'}
+    cpu: [ppc64]
+    os: [aix]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/android-arm64@0.25.9:
+    resolution: {integrity: sha512-IDrddSmpSv51ftWslJMvl3Q2ZT98fUSL2/rlUXuVqRXHCs5EUF1/f+jbjF5+NG9UffUDMCiTyh8iec7u8RlTLg==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [android]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/android-arm@0.25.9:
+    resolution: {integrity: sha512-5WNI1DaMtxQ7t7B6xa572XMXpHAaI/9Hnhk8lcxF4zVN4xstUgTlvuGDorBguKEnZO70qwEcLpfifMLoxiPqHQ==}
+    engines: {node: '>=18'}
+    cpu: [arm]
+    os: [android]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/android-x64@0.25.9:
+    resolution: {integrity: sha512-I853iMZ1hWZdNllhVZKm34f4wErd4lMyeV7BLzEExGEIZYsOzqDWDf+y082izYUE8gtJnYHdeDpN/6tUdwvfiw==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [android]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/darwin-arm64@0.25.9:
+    resolution: {integrity: sha512-XIpIDMAjOELi/9PB30vEbVMs3GV1v2zkkPnuyRRURbhqjyzIINwj+nbQATh4H9GxUgH1kFsEyQMxwiLFKUS6Rg==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/darwin-x64@0.25.9:
+    resolution: {integrity: sha512-jhHfBzjYTA1IQu8VyrjCX4ApJDnH+ez+IYVEoJHeqJm9VhG9Dh2BYaJritkYK3vMaXrf7Ogr/0MQ8/MeIefsPQ==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/freebsd-arm64@0.25.9:
+    resolution: {integrity: sha512-z93DmbnY6fX9+KdD4Ue/H6sYs+bhFQJNCPZsi4XWJoYblUqT06MQUdBCpcSfuiN72AbqeBFu5LVQTjfXDE2A6Q==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [freebsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/freebsd-x64@0.25.9:
+    resolution: {integrity: sha512-mrKX6H/vOyo5v71YfXWJxLVxgy1kyt1MQaD8wZJgJfG4gq4DpQGpgTB74e5yBeQdyMTbgxp0YtNj7NuHN0PoZg==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [freebsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-arm64@0.25.9:
+    resolution: {integrity: sha512-BlB7bIcLT3G26urh5Dmse7fiLmLXnRlopw4s8DalgZ8ef79Jj4aUcYbk90g8iCa2467HX8SAIidbL7gsqXHdRw==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-arm@0.25.9:
+    resolution: {integrity: sha512-HBU2Xv78SMgaydBmdor38lg8YDnFKSARg1Q6AT0/y2ezUAKiZvc211RDFHlEZRFNRVhcMamiToo7bDx3VEOYQw==}
+    engines: {node: '>=18'}
+    cpu: [arm]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-ia32@0.25.9:
+    resolution: {integrity: sha512-e7S3MOJPZGp2QW6AK6+Ly81rC7oOSerQ+P8L0ta4FhVi+/j/v2yZzx5CqqDaWjtPFfYz21Vi1S0auHrap3Ma3A==}
+    engines: {node: '>=18'}
+    cpu: [ia32]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-loong64@0.25.9:
+    resolution: {integrity: sha512-Sbe10Bnn0oUAB2AalYztvGcK+o6YFFA/9829PhOCUS9vkJElXGdphz0A3DbMdP8gmKkqPmPcMJmJOrI3VYB1JQ==}
+    engines: {node: '>=18'}
+    cpu: [loong64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-mips64el@0.25.9:
+    resolution: {integrity: sha512-YcM5br0mVyZw2jcQeLIkhWtKPeVfAerES5PvOzaDxVtIyZ2NUBZKNLjC5z3/fUlDgT6w89VsxP2qzNipOaaDyA==}
+    engines: {node: '>=18'}
+    cpu: [mips64el]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-ppc64@0.25.9:
+    resolution: {integrity: sha512-++0HQvasdo20JytyDpFvQtNrEsAgNG2CY1CLMwGXfFTKGBGQT3bOeLSYE2l1fYdvML5KUuwn9Z8L1EWe2tzs1w==}
+    engines: {node: '>=18'}
+    cpu: [ppc64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-riscv64@0.25.9:
+    resolution: {integrity: sha512-uNIBa279Y3fkjV+2cUjx36xkx7eSjb8IvnL01eXUKXez/CBHNRw5ekCGMPM0BcmqBxBcdgUWuUXmVWwm4CH9kg==}
+    engines: {node: '>=18'}
+    cpu: [riscv64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-s390x@0.25.9:
+    resolution: {integrity: sha512-Mfiphvp3MjC/lctb+7D287Xw1DGzqJPb/J2aHHcHxflUo+8tmN/6d4k6I2yFR7BVo5/g7x2Monq4+Yew0EHRIA==}
+    engines: {node: '>=18'}
+    cpu: [s390x]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/linux-x64@0.25.9:
+    resolution: {integrity: sha512-iSwByxzRe48YVkmpbgoxVzn76BXjlYFXC7NvLYq+b+kDjyyk30J0JY47DIn8z1MO3K0oSl9fZoRmZPQI4Hklzg==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/netbsd-arm64@0.25.9:
+    resolution: {integrity: sha512-9jNJl6FqaUG+COdQMjSCGW4QiMHH88xWbvZ+kRVblZsWrkXlABuGdFJ1E9L7HK+T0Yqd4akKNa/lO0+jDxQD4Q==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [netbsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/netbsd-x64@0.25.9:
+    resolution: {integrity: sha512-RLLdkflmqRG8KanPGOU7Rpg829ZHu8nFy5Pqdi9U01VYtG9Y0zOG6Vr2z4/S+/3zIyOxiK6cCeYNWOFR9QP87g==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [netbsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/openbsd-arm64@0.25.9:
+    resolution: {integrity: sha512-YaFBlPGeDasft5IIM+CQAhJAqS3St3nJzDEgsgFixcfZeyGPCd6eJBWzke5piZuZ7CtL656eOSYKk4Ls2C0FRQ==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [openbsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/openbsd-x64@0.25.9:
+    resolution: {integrity: sha512-1MkgTCuvMGWuqVtAvkpkXFmtL8XhWy+j4jaSO2wxfJtilVCi0ZE37b8uOdMItIHz4I6z1bWWtEX4CJwcKYLcuA==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [openbsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/openharmony-arm64@0.25.9:
+    resolution: {integrity: sha512-4Xd0xNiMVXKh6Fa7HEJQbrpP3m3DDn43jKxMjxLLRjWnRsfxjORYJlXPO4JNcXtOyfajXorRKY9NkOpTHptErg==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [openharmony]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/sunos-x64@0.25.9:
+    resolution: {integrity: sha512-WjH4s6hzo00nNezhp3wFIAfmGZ8U7KtrJNlFMRKxiI9mxEK1scOMAaa9i4crUtu+tBr+0IN6JCuAcSBJZfnphw==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [sunos]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/win32-arm64@0.25.9:
+    resolution: {integrity: sha512-mGFrVJHmZiRqmP8xFOc6b84/7xa5y5YvR1x8djzXpJBSv/UsNK6aqec+6JDjConTgvvQefdGhFDAs2DLAds6gQ==}
+    engines: {node: '>=18'}
+    cpu: [arm64]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/win32-ia32@0.25.9:
+    resolution: {integrity: sha512-b33gLVU2k11nVx1OhX3C8QQP6UHQK4ZtN56oFWvVXvz2VkDoe6fbG8TOgHFxEvqeqohmRnIHe5A1+HADk4OQww==}
+    engines: {node: '>=18'}
+    cpu: [ia32]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@esbuild/win32-x64@0.25.9:
+    resolution: {integrity: sha512-PPOl1mi6lpLNQxnGoyAfschAodRFYXJ+9fs6WHXz7CSWKbOqiMZsubC+BQsVKuul+3vKLuwTHsS2c2y9EoKwxQ==}
+    engines: {node: '>=18'}
+    cpu: [x64]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@floating-ui/core@1.7.3:
+    resolution: {integrity: sha512-sGnvb5dmrJaKEZ+LDIpguvdX3bDlEllmv4/ClQ9awcmCZrlx5jQyyMWFM5kBI+EyNOCDDiKk8il0zeuX3Zlg/w==}
+    dependencies:
+      '@floating-ui/utils': 0.2.10
+    dev: false
+
+  /@floating-ui/dom@1.7.3:
+    resolution: {integrity: sha512-uZA413QEpNuhtb3/iIKoYMSK07keHPYeXF02Zhd6e213j+d1NamLix/mCLxBUDW/Gx52sPH2m+chlUsyaBs/Ag==}
+    dependencies:
+      '@floating-ui/core': 1.7.3
+      '@floating-ui/utils': 0.2.10
+    dev: false
+
+  /@floating-ui/react-dom@2.1.5(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-HDO/1/1oH9fjj4eLgegrlH3dklZpHtUYYFiVwMUwfGvk9jWDRWqkklA2/NFScknrcNSspbV868WjXORvreDX+Q==}
+    peerDependencies:
+      react: '>=16.8.0'
+      react-dom: '>=16.8.0'
+    dependencies:
+      '@floating-ui/dom': 1.7.3
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@floating-ui/react@0.27.15(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-0LGxhBi3BB1DwuSNQAmuaSuertFzNAerlMdPbotjTVnvPtdOs7CkrHLaev5NIXemhzDXNC0tFzuseut7cWA5mw==}
+    peerDependencies:
+      react: '>=17.0.0'
+      react-dom: '>=17.0.0'
+    dependencies:
+      '@floating-ui/react-dom': 2.1.5(react-dom@19.1.1)(react@19.1.1)
+      '@floating-ui/utils': 0.2.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+      tabbable: 6.2.0
+    dev: false
+
+  /@floating-ui/utils@0.2.10:
+    resolution: {integrity: sha512-aGTxbpbg8/b5JfU1HXSrbH3wXZuLPJcNEcZQFMxLs3oSzgtVu6nFPkbbGGUvBcUjKV2YyB9Wxxabo+HEH9tcRQ==}
+    dev: false
+
+  /@fortawesome/fontawesome-free@6.7.2:
+    resolution: {integrity: sha512-JUOtgFW6k9u4Y+xeIaEiLr3+cjoUPiAuLXoyKOJSia6Duzb7pq+A76P9ZdPDoAoxHdHzq6gE9/jKBGXlZT8FbA==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /@hono/node-server@1.18.2(hono@4.9.2):
+    resolution: {integrity: sha512-icgNvC0vRYivzyuSSaUv9ttcwtN8fDyd1k3AOIBDJgYd84tXRZSS6na8X54CY/oYoFTNhEmZraW/Rb9XYwX4KA==}
+    engines: {node: '>=18.14.1'}
+    peerDependencies:
+      hono: ^4
+    dependencies:
+      hono: 4.9.2
+    dev: false
+
+  /@iconify/types@2.0.0:
+    resolution: {integrity: sha512-+wluvCrRhXrhyOmRDJ3q8mux9JkKy5SJ/v8ol2tu4FVjyYvtEzkc/3pK15ET6RKg4b4w4BmTk1+gsCUhf21Ykg==}
+    dev: false
+
+  /@iconify/utils@2.3.0:
+    resolution: {integrity: sha512-GmQ78prtwYW6EtzXRU1rY+KwOKfz32PD7iJh6Iyqw68GiKuoZ2A6pRtzWONz5VQJbp50mEjXh/7NkumtrAgRKA==}
+    dependencies:
+      '@antfu/install-pkg': 1.1.0
+      '@antfu/utils': 8.1.1
+      '@iconify/types': 2.0.0
+      debug: 4.4.1
+      globals: 15.15.0
+      kolorist: 1.8.0
+      local-pkg: 1.1.1
+      mlly: 1.7.4
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /@isaacs/balanced-match@4.0.1:
+    resolution: {integrity: sha512-yzMTt9lEb8Gv7zRioUilSglI0c0smZ9k5D65677DLWLtWJaXIS3CqcGyUFByYKlnUj6TkjLVs54fBl6+TiGQDQ==}
+    engines: {node: 20 || >=22}
+    dev: true
+
+  /@isaacs/brace-expansion@5.0.0:
+    resolution: {integrity: sha512-ZT55BDLV0yv0RBm2czMiZ+SqCGO7AvmOM3G/w2xhVPH+te0aKgFjmBvGlL1dH+ql2tgGO3MVrbb3jCKyvpgnxA==}
+    engines: {node: 20 || >=22}
+    dependencies:
+      '@isaacs/balanced-match': 4.0.1
+    dev: true
+
+  /@isaacs/cliui@8.0.2:
+    resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==}
+    engines: {node: '>=12'}
+    dependencies:
+      string-width: 5.1.2
+      string-width-cjs: /string-width@4.2.3
+      strip-ansi: 7.1.0
+      strip-ansi-cjs: /strip-ansi@6.0.1
+      wrap-ansi: 8.1.0
+      wrap-ansi-cjs: /wrap-ansi@7.0.0
+    dev: true
+
+  /@jridgewell/gen-mapping@0.3.13:
+    resolution: {integrity: sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==}
+    dependencies:
+      '@jridgewell/sourcemap-codec': 1.5.5
+      '@jridgewell/trace-mapping': 0.3.30
+    dev: false
+
+  /@jridgewell/resolve-uri@3.1.2:
+    resolution: {integrity: sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==}
+    engines: {node: '>=6.0.0'}
+    dev: false
+
+  /@jridgewell/sourcemap-codec@1.5.5:
+    resolution: {integrity: sha512-cYQ9310grqxueWbl+WuIUIaiUaDcj7WOq5fVhEljNVgRfOUhY9fy2zTvfoqWsnebh8Sl70VScFbICvJnLKB0Og==}
+    dev: false
+
+  /@jridgewell/trace-mapping@0.3.30:
+    resolution: {integrity: sha512-GQ7Nw5G2lTu/BtHTKfXhKHok2WGetd4XYcVKGx00SjAk8GMwgJM3zr6zORiPGuOE+/vkc90KtTosSSvaCjKb2Q==}
+    dependencies:
+      '@jridgewell/resolve-uri': 3.1.2
+      '@jridgewell/sourcemap-codec': 1.5.5
+    dev: false
+
+  /@mdx-js/mdx@3.1.0(acorn@8.15.0):
+    resolution: {integrity: sha512-/QxEhPAvGwbQmy1Px8F899L5Uc2KZ6JtXwlCgJmjSTBedwOZkByYcBG4GceIGPXRDsmfxhHazuS+hlOShRLeDw==}
+    dependencies:
+      '@types/estree': 1.0.8
+      '@types/estree-jsx': 1.0.5
+      '@types/hast': 3.0.4
+      '@types/mdx': 2.0.13
+      collapse-white-space: 2.1.0
+      devlop: 1.1.0
+      estree-util-is-identifier-name: 3.0.0
+      estree-util-scope: 1.0.0
+      estree-walker: 3.0.3
+      hast-util-to-jsx-runtime: 2.3.6
+      markdown-extensions: 2.0.0
+      recma-build-jsx: 1.0.0
+      recma-jsx: 1.0.1(acorn@8.15.0)
+      recma-stringify: 1.0.0
+      rehype-recma: 1.0.0
+      remark-mdx: 3.1.0
+      remark-parse: 11.0.0
+      remark-rehype: 11.1.2
+      source-map: 0.7.6
+      unified: 11.0.5
+      unist-util-position-from-estree: 2.0.0
+      unist-util-stringify-position: 4.0.0
+      unist-util-visit: 5.0.0
+      vfile: 6.0.3
+    transitivePeerDependencies:
+      - acorn
+      - supports-color
+    dev: false
+
+  /@mdx-js/react@3.1.0(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-QjHtSaoameoalGnKDT3FoIl4+9RwyTmo9ZJGBdLOks/YOiWHoRDI3PUwEzOE7kEmGcV3AFcp9K6dYu9rEuKLAQ==}
+    peerDependencies:
+      '@types/react': '>=16'
+      react: '>=16'
+    dependencies:
+      '@types/mdx': 2.0.13
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@mdx-js/rollup@3.1.0(acorn@8.15.0)(rollup@4.46.2):
+    resolution: {integrity: sha512-q4xOtUXpCzeouE8GaJ8StT4rDxm/U5j6lkMHL2srb2Q3Y7cobE0aXyPzXVVlbeIMBi+5R5MpbiaVE5/vJUdnHg==}
+    peerDependencies:
+      rollup: '>=2'
+    dependencies:
+      '@mdx-js/mdx': 3.1.0(acorn@8.15.0)
+      '@rollup/pluginutils': 5.2.0(rollup@4.46.2)
+      rollup: 4.46.2
+      source-map: 0.7.6
+      vfile: 6.0.3
+    transitivePeerDependencies:
+      - acorn
+      - supports-color
+    dev: false
+
+  /@mermaid-js/parser@0.6.2:
+    resolution: {integrity: sha512-+PO02uGF6L6Cs0Bw8RpGhikVvMWEysfAyl27qTlroUB8jSWr1lL0Sf6zi78ZxlSnmgSY2AMMKVgghnN9jTtwkQ==}
+    dependencies:
+      langium: 3.3.1
+    dev: false
+
+  /@noble/hashes@1.8.0:
+    resolution: {integrity: sha512-jCs9ldd7NwzpgXDIf6P3+NrHh9/sD6CQdxHyjQI+h/6rDNo88ypBxxz45UDuZHz9r3tNz7N/VInSVoVdtXEI4A==}
+    engines: {node: ^14.21.3 || >=16}
+    dev: false
+
+  /@nodelib/fs.scandir@2.1.5:
+    resolution: {integrity: sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==}
+    engines: {node: '>= 8'}
+    dependencies:
+      '@nodelib/fs.stat': 2.0.5
+      run-parallel: 1.2.0
+    dev: false
+
+  /@nodelib/fs.stat@2.0.5:
+    resolution: {integrity: sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==}
+    engines: {node: '>= 8'}
+    dev: false
+
+  /@nodelib/fs.walk@1.2.8:
+    resolution: {integrity: sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==}
+    engines: {node: '>= 8'}
+    dependencies:
+      '@nodelib/fs.scandir': 2.1.5
+      fastq: 1.19.1
+    dev: false
+
+  /@radix-ui/colors@3.0.0:
+    resolution: {integrity: sha512-FUOsGBkHrYJwCSEtWRCIfQbZG7q1e6DgxCIOe1SUQzDe/7rXXeA47s8yCn6fuTNQAj1Zq4oTFi9Yjp3wzElcxg==}
+    dev: false
+
+  /@radix-ui/number@1.1.1:
+    resolution: {integrity: sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g==}
+    dev: false
+
+  /@radix-ui/primitive@1.1.3:
+    resolution: {integrity: sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg==}
+    dev: false
+
+  /@radix-ui/react-accessible-icon@1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-XM+E4WXl0OqUJFovy6GjmxxFyx9opfCAIUku4dlKRd5YEPqt4kALOkQOp0Of6reHuUkJuiPBEc5k0o4z4lTC8A==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-visually-hidden': 1.2.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-accordion@1.2.12(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-T4nygeh9YE9dLRPhAHSeOZi7HBXo+0kYIPJXayZfvWOWA0+n3dESrZbjfDPUABkUNym6Hd+f2IR113To8D2GPA==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collapsible': 1.1.12(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-alert-dialog@1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-oTVLkEw5GpdRe29BqJ0LSDFWI3qu0vR1M0mUkOQWDIUnY/QIkLpgDMWuKxP94c2NAC2LGcgVhG1ImF3jkZ5wXw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dialog': 1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-arrow@1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-aspect-ratio@1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-Yq6lvO9HQyPwev1onK1daHCHqXVLzPhSVjmsNjCa2Zcxy2f7uJD2itDtxknv6FzAKCwD1qQkeVDmX/cev13n/g==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-avatar@1.1.10(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-V8piFfWapM5OmNCXTzVQY+E1rDa53zY+MQ4Y7356v4fFz6vqCyUtIz2rUD44ZEdwg78/jKmMJHj07+C/Z/rcog==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-is-hydrated': 0.1.0(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-checkbox@1.3.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-wBbpv+NQftHDdG86Qc0pIyXk5IR3tM8Vd0nWLKDcX8nNn4nXFOFwsKuqw2okA/1D/mpaAkmuyndrPJTYDNZtFw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-previous': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-size': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-collapsible@1.1.12(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-Uu+mSh4agx2ib1uIGPP4/CKNULyajb3p92LsVXmH2EHVMTfZWpll88XJ0j4W0z3f8NK1eYl1+Mf/szHPmcHzyA==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-collection@1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-Fh9rGN0MoI4ZFUNyfFVNU4y9LUz93u9/0K+yLgA2bwRojxM8JU1DyvvMBabnZPBgMWREAJvU2jjVzq+LrFUglw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-compose-refs@1.1.2(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-context-menu@2.2.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-O8morBEW+HsVG28gYDZPTrT9UUovQUlJue5YO836tiTJhuIWBm/zQHc7j388sHWtdH/xUZurK9olD2+pcqx5ww==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-menu': 2.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-context@1.1.2(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-dialog@1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-TCglVRtzlffRNxRMEyR36DGBLJpeusFcgMVD9PZEzAKnUs1lKCgX5u9BmC2Yg+LL9MgZDugFFs1Vl+Jp4t/PGw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-focus-guards': 1.1.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-focus-scope': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-portal': 1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      aria-hidden: 1.2.6
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+      react-remove-scroll: 2.7.1(@types/react@19.1.10)(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-direction@1.1.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-1UEWRX6jnOA2y4H5WczZ44gOOjTEmlqv1uNW4GAJEO5+bauCBhv8snY65Iw5/VOS/ghKN9gr2KjnLKxrsvoMVw==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-dismissable-layer@1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-Nqcp+t5cTB8BinFkZgXiMJniQH0PsUt2k51FUhbdfeKvc4ACcG2uQniY/8+h1Yv6Kza4Q7lD7PQV0z0oicE0Mg==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-escape-keydown': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-dropdown-menu@2.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-1PLGQEynI/3OX/ftV54COn+3Sud/Mn8vALg2rWnBLnRaGtJDduNW/22XjlGgPdpcIbiQxjKtb7BkcjP00nqfJw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-menu': 2.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-focus-guards@1.1.3(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-0rFg/Rj2Q62NCm62jZw0QX7a3sz6QCQU0LpZdNrJX8byRGaGVTqbrW9jAoIAHyMQqsNpeZ81YgSizOt5WXq0Pw==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-focus-scope@1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-t2ODlkXBQyn7jkl6TNaw/MtVEVvIGelJDCG41Okq/KwUsJBwQ4XVZsHAVUkK4mBv3ewiAS3PGuUWuY2BoK4ZUw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-form@0.1.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-QM70k4Zwjttifr5a4sZFts9fn8FzHYvQ5PiB19O2HsYibaHSVt9fH9rzB0XZo/YcM+b7t/p7lYCT/F5eOeF5yQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-label': 2.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-hover-card@1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-qgTkjNT1CfKMoP0rcasmlH2r1DAiYicWsDsufxl940sT2wHNEWWv6FMWIQXWhVdmC1d/HYfbhQx60KYyAtKxjg==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-popper': 1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-portal': 1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-icons@1.3.2(react@19.1.1):
+    resolution: {integrity: sha512-fyQIhGDhzfc9pK2kH6Pl9c4BDJGfMkPqkyIgYDthyNYoNg3wVhoJMMh19WS4Up/1KMPFVpNsT2q3WmXn2N1m6g==}
+    peerDependencies:
+      react: ^16.x || ^17.x || ^18.x || ^19.0.0 || ^19.0.0-rc
+    dependencies:
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-id@1.1.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-kGkGegYIdQsOb4XjsfM97rXsiHaBwco+hFI66oO4s9LU+PLAC5oJ7khdOVFxkhsmlbpUqDAvXw11CluXP+jkHg==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-label@2.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-YT1GqPSL8kJn20djelMX7/cTRp/Y9w5IZHvfxQTVHrOqa2yMl7i/UfMqKRU5V7mEyKTrUVgJXhNQPVCG8PBLoQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-menu@2.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-72F2T+PLlphrqLcAotYPp0uJMr5SjP5SL01wfEspJbru5Zs5vQaSHb4VB3ZMJPimgHHCHG7gMOeOB9H3Hdmtxg==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-focus-guards': 1.1.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-focus-scope': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-popper': 1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-portal': 1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-roving-focus': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      aria-hidden: 1.2.6
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+      react-remove-scroll: 2.7.1(@types/react@19.1.10)(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-menubar@1.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-EB1FktTz5xRRi2Er974AUQZWg2yVBb1yjip38/lgwtCVRd3a+maUoGHN/xs9Yv8SY8QwbSEb+YrxGadVWbEutA==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-menu': 2.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-roving-focus': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-navigation-menu@1.2.14(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-YB9mTFQvCOAQMHU+C/jVl96WmuWeltyUEpRJJky51huhds5W2FQr1J8D/16sQlf0ozxkPK8uF3niQMdUwZPv5w==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-previous': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-visually-hidden': 1.2.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-one-time-password-field@0.1.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-ycS4rbwURavDPVjCb5iS3aG4lURFDILi6sKI/WITUMZ13gMmn/xGjpLoqBAalhJaDk8I3UbCM5GzKHrnzwHbvg==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/number': 1.1.1
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-roving-focus': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-effect-event': 0.0.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-is-hydrated': 0.1.0(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-password-toggle-field@0.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-/UuCrDBWravcaMix4TdT+qlNdVwOM1Nck9kWx/vafXsdfj1ChfhOdfi3cy9SGBpWgTXwYCuboT/oYpJy3clqfw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-effect-event': 0.0.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-is-hydrated': 0.1.0(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-popover@1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-kr0X2+6Yy/vJzLYJUPCZEc8SfQcf+1COFoAqauJm74umQhta9M7lNJHP7QQS3vkvcGLQUbWpMzwrXYwrYztHKA==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-focus-guards': 1.1.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-focus-scope': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-popper': 1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-portal': 1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      aria-hidden: 1.2.6
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+      react-remove-scroll: 2.7.1(@types/react@19.1.10)(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-popper@1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-0NJQ4LFFUuWkE7Oxf0htBKS6zLkkjBH+hM1uk7Ng705ReR8m/uelduy1DBo0PyBXPKVnBA6YBlU94MBGXrSBCw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@floating-ui/react-dom': 2.1.5(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-arrow': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-rect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-size': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/rect': 1.1.1
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-portal@1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-presence@1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-primitive@2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-progress@1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-vPdg/tF6YC/ynuBIJlk1mm7Le0VgW6ub6J2UWnTQ7/D23KXcPI1qy+0vBkgKgd38RCMJavBXpB83HPNFMTb0Fg==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-radio-group@1.3.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-VBKYIYImA5zsxACdisNQ3BjCBfmbGH3kQlnFVqlWU4tXwjy7cGX8ta80BcrO+WJXIn5iBylEH3K6ZTlee//lgQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-roving-focus': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-previous': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-size': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-roving-focus@1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-7A6S9jSgm/S+7MdtNDSb+IU859vQqJ/QAtcYQcfFC6W8RS4IxIZDldLR0xqCFZ6DCyrQLjLPsxtTNch5jVA4lA==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-scroll-area@1.2.10(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-tAXIa1g3sM5CGpVT0uIbUx/U3Gs5N8T52IICuCtObaos1S8fzsrPXG5WObkQN3S6NVl6wKgPhAIiBGbWnvc97A==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/number': 1.1.1
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-select@2.2.6(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-I30RydO+bnn2PQztvo25tswPH+wFBjehVGtmagkU78yMdwTwVf12wnAOF+AeP8S2N8xD+5UPbGhkUfPyvT+mwQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/number': 1.1.1
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-focus-guards': 1.1.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-focus-scope': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-popper': 1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-portal': 1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-previous': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-visually-hidden': 1.2.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      aria-hidden: 1.2.6
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+      react-remove-scroll: 2.7.1(@types/react@19.1.10)(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-separator@1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-0HEb8R9E8A+jZjvmFCy/J4xhbXy3TV+9XSnGJ3KvTtjlIUy/YQ/p6UYZvi7YbeoeXdyU9+Y3scizK6hkY37baA==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-slider@1.3.6(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-JPYb1GuM1bxfjMRlNLE+BcmBC8onfCi60Blk7OBqi2MLTFdS+8401U4uFjnwkOr49BLmXxLC6JHkvAsx5OJvHw==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/number': 1.1.1
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-previous': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-size': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-slot@1.2.3(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-aeNmHnBxbi2St0au6VBVC7JXFlhLlOnvIIlePNniyUNAClzmtAUEY8/pBiK3iHjufOlwA+c20/8jngo7xcrg8A==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-switch@1.2.6(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-bByzr1+ep1zk4VubeEVViV592vu2lHE2BZY5OnzehZqOOgogN80+mNtCqPkhn2gklJqOpxWgPoYTSnhBCqpOXQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-previous': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-size': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-tabs@1.1.13(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-7xdcatg7/U+7+Udyoj2zodtI9H/IIopqo+YOIcZOq1nJwXWBZ9p8xiu5llXlekDbZkca79a/fozEYQXIA4sW6A==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-roving-focus': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-toast@1.2.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-3OSz3TacUWy4WtOXV38DggwxoqJK4+eDkNMl5Z/MJZaoUPaP4/9lf81xXMe1I2ReTAptverZUpbPY4wWwWyL5g==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-portal': 1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-visually-hidden': 1.2.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-toggle-group@1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-5umnS0T8JQzQT6HbPyO7Hh9dgd82NmS36DQr+X/YJ9ctFNCiiQd6IJAYYZ33LUwm8M+taCz5t2ui29fHZc4Y6Q==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-roving-focus': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-toggle': 1.1.10(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-toggle@1.1.10(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-lS1odchhFTeZv3xwHH31YPObmJn8gOg7Lq12inrr0+BH/l3Tsq32VfjqH1oh80ARM3mlkfMic15n0kg4sD1poQ==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-toolbar@1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-4ol06/1bLoFu1nwUqzdD4Y5RZ9oDdKeiHIsntug54Hcr1pgaHiPqHFEaXI1IFP/EsOfROQZ8Mig9VTIRza6Tjg==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-roving-focus': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-separator': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-toggle-group': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-tooltip@1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-tY7sVt1yL9ozIxvmbtN5qtmH2krXcBCfjEiCgKGLqunJHvgvZG2Pcl2oQ3kbcZARb1BGEHdkLzcYGO8ynVlieg==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-id': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-popper': 1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-portal': 1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-visually-hidden': 1.2.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-use-callback-ref@1.1.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-FkBMwD+qbGQeMu1cOHnuGB6x4yzPjho8ap5WtbEJ26umhgqVXbhekKUQO+hZEL1vU92a3wHwdp0HAcqAUF5iDg==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-use-controllable-state@1.2.2(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-BjasUjixPFdS+NKkypcyyN5Pmg83Olst0+c6vGov0diwTEo6mgdqVR6hxcEgFuh4QrAs7Rc+9KuGJ9TVCj0Zzg==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@radix-ui/react-use-effect-event': 0.0.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-use-effect-event@0.0.2(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-Qp8WbZOBe+blgpuUT+lw2xheLP8q0oatc9UpmiemEICxGvFLYmHm9QowVZGHtJlGbS6A6yJ3iViad/2cVjnOiA==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-use-escape-keydown@1.1.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-Il0+boE7w/XebUHyBjroE+DbByORGR9KKmITzbR7MyQ4akpORYP/ZmbhAr0DG7RmmBqoOnZdy2QlvajJ2QA59g==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-use-is-hydrated@0.1.0(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-U+UORVEq+cTnRIaostJv9AGdV3G6Y+zbVd+12e18jQ5A3c0xL03IhnHuiU4UV69wolOQp5GfR58NW/EgdQhwOA==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+      use-sync-external-store: 1.5.0(react@19.1.1)
+    dev: false
+
+  /@radix-ui/react-use-layout-effect@1.1.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-RbJRS4UWQFkzHTTwVymMTUv8EqYhOp8dOOviLj2ugtTiXRaRQS7GLGxZTLL1jWhMeoSCf5zmcZkqTl9IiYfXcQ==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-use-previous@1.1.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-2dHfToCj/pzca2Ck724OZ5L0EVrr3eHRNsG/b3xQJLA2hZpVCS99bLAX+hm1IHXDEnzU6by5z/5MIY794/a8NQ==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-use-rect@1.1.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-QTYuDesS0VtuHNNvMh+CjlKJ4LJickCMUAqjlE3+j8w+RlRpwyX3apEQKGFzbZGdo7XNG1tXa+bQqIE7HIXT2w==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@radix-ui/rect': 1.1.1
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-use-size@1.1.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-ewrXRDTAqAXlkl6t/fkXWNAhFX9I+CkKlw6zjEwk86RSPKwZr3xpBRso655aqYafwtnbpHLj6toFzmd6xdVptQ==}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+    dev: false
+
+  /@radix-ui/react-visually-hidden@1.2.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-pzJq12tEaaIhqjbzpCuv/OypJY/BPavOofm+dbab+MHLajy277+1lLm6JFcGgF5eskJ6mquGirhXY2GD/8u8Ug==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /@radix-ui/rect@1.1.1:
+    resolution: {integrity: sha512-HPwpGIzkl28mWyZqG52jiqDJ12waP11Pa1lGoiyUkIEuMLBP0oeK/C89esbXrxsky5we7dfd8U58nm0SgAWpVw==}
+    dev: false
+
+  /@rolldown/pluginutils@1.0.0-beta.27:
+    resolution: {integrity: sha512-+d0F4MKMCbeVUJwG96uQ4SgAznZNSq93I3V+9NHA4OpvqG8mRCpGdKmK8l/dl02h2CCDHwW2FqilnTyDcAnqjA==}
+    dev: false
+
+  /@rollup/pluginutils@5.2.0(rollup@4.46.2):
+    resolution: {integrity: sha512-qWJ2ZTbmumwiLFomfzTyt5Kng4hwPi9rwCYN4SHb6eaRU1KNO4ccxINHr/VhH4GgPlt1XfSTLX2LBTme8ne4Zw==}
+    engines: {node: '>=14.0.0'}
+    peerDependencies:
+      rollup: ^1.20.0||^2.0.0||^3.0.0||^4.0.0
+    peerDependenciesMeta:
+      rollup:
+        optional: true
+    dependencies:
+      '@types/estree': 1.0.8
+      estree-walker: 2.0.2
+      picomatch: 4.0.3
+      rollup: 4.46.2
+    dev: false
+
+  /@rollup/rollup-android-arm-eabi@4.46.2:
+    resolution: {integrity: sha512-Zj3Hl6sN34xJtMv7Anwb5Gu01yujyE/cLBDB2gnHTAHaWS1Z38L7kuSG+oAh0giZMqG060f/YBStXtMH6FvPMA==}
+    cpu: [arm]
+    os: [android]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-android-arm64@4.46.2:
+    resolution: {integrity: sha512-nTeCWY83kN64oQ5MGz3CgtPx8NSOhC5lWtsjTs+8JAJNLcP3QbLCtDDgUKQc/Ro/frpMq4SHUaHN6AMltcEoLQ==}
+    cpu: [arm64]
+    os: [android]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-darwin-arm64@4.46.2:
+    resolution: {integrity: sha512-HV7bW2Fb/F5KPdM/9bApunQh68YVDU8sO8BvcW9OngQVN3HHHkw99wFupuUJfGR9pYLLAjcAOA6iO+evsbBaPQ==}
+    cpu: [arm64]
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-darwin-x64@4.46.2:
+    resolution: {integrity: sha512-SSj8TlYV5nJixSsm/y3QXfhspSiLYP11zpfwp6G/YDXctf3Xkdnk4woJIF5VQe0of2OjzTt8EsxnJDCdHd2xMA==}
+    cpu: [x64]
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-freebsd-arm64@4.46.2:
+    resolution: {integrity: sha512-ZyrsG4TIT9xnOlLsSSi9w/X29tCbK1yegE49RYm3tu3wF1L/B6LVMqnEWyDB26d9Ecx9zrmXCiPmIabVuLmNSg==}
+    cpu: [arm64]
+    os: [freebsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-freebsd-x64@4.46.2:
+    resolution: {integrity: sha512-pCgHFoOECwVCJ5GFq8+gR8SBKnMO+xe5UEqbemxBpCKYQddRQMgomv1104RnLSg7nNvgKy05sLsY51+OVRyiVw==}
+    cpu: [x64]
+    os: [freebsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-arm-gnueabihf@4.46.2:
+    resolution: {integrity: sha512-EtP8aquZ0xQg0ETFcxUbU71MZlHaw9MChwrQzatiE8U/bvi5uv/oChExXC4mWhjiqK7azGJBqU0tt5H123SzVA==}
+    cpu: [arm]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-arm-musleabihf@4.46.2:
+    resolution: {integrity: sha512-qO7F7U3u1nfxYRPM8HqFtLd+raev2K137dsV08q/LRKRLEc7RsiDWihUnrINdsWQxPR9jqZ8DIIZ1zJJAm5PjQ==}
+    cpu: [arm]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-arm64-gnu@4.46.2:
+    resolution: {integrity: sha512-3dRaqLfcOXYsfvw5xMrxAk9Lb1f395gkoBYzSFcc/scgRFptRXL9DOaDpMiehf9CO8ZDRJW2z45b6fpU5nwjng==}
+    cpu: [arm64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-arm64-musl@4.46.2:
+    resolution: {integrity: sha512-fhHFTutA7SM+IrR6lIfiHskxmpmPTJUXpWIsBXpeEwNgZzZZSg/q4i6FU4J8qOGyJ0TR+wXBwx/L7Ho9z0+uDg==}
+    cpu: [arm64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-loongarch64-gnu@4.46.2:
+    resolution: {integrity: sha512-i7wfGFXu8x4+FRqPymzjD+Hyav8l95UIZ773j7J7zRYc3Xsxy2wIn4x+llpunexXe6laaO72iEjeeGyUFmjKeA==}
+    cpu: [loong64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-ppc64-gnu@4.46.2:
+    resolution: {integrity: sha512-B/l0dFcHVUnqcGZWKcWBSV2PF01YUt0Rvlurci5P+neqY/yMKchGU8ullZvIv5e8Y1C6wOn+U03mrDylP5q9Yw==}
+    cpu: [ppc64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-riscv64-gnu@4.46.2:
+    resolution: {integrity: sha512-32k4ENb5ygtkMwPMucAb8MtV8olkPT03oiTxJbgkJa7lJ7dZMr0GCFJlyvy+K8iq7F/iuOr41ZdUHaOiqyR3iQ==}
+    cpu: [riscv64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-riscv64-musl@4.46.2:
+    resolution: {integrity: sha512-t5B2loThlFEauloaQkZg9gxV05BYeITLvLkWOkRXogP4qHXLkWSbSHKM9S6H1schf/0YGP/qNKtiISlxvfmmZw==}
+    cpu: [riscv64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-s390x-gnu@4.46.2:
+    resolution: {integrity: sha512-YKjekwTEKgbB7n17gmODSmJVUIvj8CX7q5442/CK80L8nqOUbMtf8b01QkG3jOqyr1rotrAnW6B/qiHwfcuWQA==}
+    cpu: [s390x]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-x64-gnu@4.46.2:
+    resolution: {integrity: sha512-Jj5a9RUoe5ra+MEyERkDKLwTXVu6s3aACP51nkfnK9wJTraCC8IMe3snOfALkrjTYd2G1ViE1hICj0fZ7ALBPA==}
+    cpu: [x64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-linux-x64-musl@4.46.2:
+    resolution: {integrity: sha512-7kX69DIrBeD7yNp4A5b81izs8BqoZkCIaxQaOpumcJ1S/kmqNFjPhDu1LHeVXv0SexfHQv5cqHsxLOjETuqDuA==}
+    cpu: [x64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-win32-arm64-msvc@4.46.2:
+    resolution: {integrity: sha512-wiJWMIpeaak/jsbaq2HMh/rzZxHVW1rU6coyeNNpMwk5isiPjSTx0a4YLSlYDwBH/WBvLz+EtsNqQScZTLJy3g==}
+    cpu: [arm64]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-win32-ia32-msvc@4.46.2:
+    resolution: {integrity: sha512-gBgaUDESVzMgWZhcyjfs9QFK16D8K6QZpwAaVNJxYDLHWayOta4ZMjGm/vsAEy3hvlS2GosVFlBlP9/Wb85DqQ==}
+    cpu: [ia32]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@rollup/rollup-win32-x64-msvc@4.46.2:
+    resolution: {integrity: sha512-CvUo2ixeIQGtF6WvuB87XWqPQkoFAFqW+HUo/WzHwuHDvIwZCtjdWXoYCcr06iKGydiqTclC4jU/TNObC/xKZg==}
+    cpu: [x64]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@shikijs/core@1.29.2:
+    resolution: {integrity: sha512-vju0lY9r27jJfOY4Z7+Rt/nIOjzJpZ3y+nYpqtUZInVoXQ/TJZcfGnNOGnKjFdVZb8qexiCuSlZRKcGfhhTTZQ==}
+    dependencies:
+      '@shikijs/engine-javascript': 1.29.2
+      '@shikijs/engine-oniguruma': 1.29.2
+      '@shikijs/types': 1.29.2
+      '@shikijs/vscode-textmate': 10.0.2
+      '@types/hast': 3.0.4
+      hast-util-to-html: 9.0.5
+    dev: false
+
+  /@shikijs/engine-javascript@1.29.2:
+    resolution: {integrity: sha512-iNEZv4IrLYPv64Q6k7EPpOCE/nuvGiKl7zxdq0WFuRPF5PAE9PRo2JGq/d8crLusM59BRemJ4eOqrFrC4wiQ+A==}
+    dependencies:
+      '@shikijs/types': 1.29.2
+      '@shikijs/vscode-textmate': 10.0.2
+      oniguruma-to-es: 2.3.0
+    dev: false
+
+  /@shikijs/engine-oniguruma@1.29.2:
+    resolution: {integrity: sha512-7iiOx3SG8+g1MnlzZVDYiaeHe7Ez2Kf2HrJzdmGwkRisT7r4rak0e655AcM/tF9JG/kg5fMNYlLLKglbN7gBqA==}
+    dependencies:
+      '@shikijs/types': 1.29.2
+      '@shikijs/vscode-textmate': 10.0.2
+    dev: false
+
+  /@shikijs/langs@1.29.2:
+    resolution: {integrity: sha512-FIBA7N3LZ+223U7cJDUYd5shmciFQlYkFXlkKVaHsCPgfVLiO+e12FmQE6Tf9vuyEsFe3dIl8qGWKXgEHL9wmQ==}
+    dependencies:
+      '@shikijs/types': 1.29.2
+    dev: false
+
+  /@shikijs/rehype@1.29.2:
+    resolution: {integrity: sha512-sxi53HZe5XDz0s2UqF+BVN/kgHPMS9l6dcacM4Ra3ZDzCJa5rDGJ+Ukpk4LxdD1+MITBM6hoLbPfGv9StV8a5Q==}
+    dependencies:
+      '@shikijs/types': 1.29.2
+      '@types/hast': 3.0.4
+      hast-util-to-string: 3.0.1
+      shiki: 1.29.2
+      unified: 11.0.5
+      unist-util-visit: 5.0.0
+    dev: false
+
+  /@shikijs/themes@1.29.2:
+    resolution: {integrity: sha512-i9TNZlsq4uoyqSbluIcZkmPL9Bfi3djVxRnofUHwvx/h6SRW3cwgBC5SML7vsDcWyukY0eCzVN980rqP6qNl9g==}
+    dependencies:
+      '@shikijs/types': 1.29.2
+    dev: false
+
+  /@shikijs/transformers@1.29.2:
+    resolution: {integrity: sha512-NHQuA+gM7zGuxGWP9/Ub4vpbwrYCrho9nQCLcCPfOe3Yc7LOYwmSuhElI688oiqIXk9dlZwDiyAG9vPBTuPJMA==}
+    dependencies:
+      '@shikijs/core': 1.29.2
+      '@shikijs/types': 1.29.2
+    dev: false
+
+  /@shikijs/twoslash@1.29.2(typescript@5.9.2):
+    resolution: {integrity: sha512-2S04ppAEa477tiaLfGEn1QJWbZUmbk8UoPbAEw4PifsrxkBXtAtOflIZJNtuCwz8ptc/TPxy7CO7gW4Uoi6o/g==}
+    dependencies:
+      '@shikijs/core': 1.29.2
+      '@shikijs/types': 1.29.2
+      twoslash: 0.2.12(typescript@5.9.2)
+    transitivePeerDependencies:
+      - supports-color
+      - typescript
+    dev: false
+
+  /@shikijs/types@1.29.2:
+    resolution: {integrity: sha512-VJjK0eIijTZf0QSTODEXCqinjBn0joAHQ+aPSBzrv4O2d/QSbsMw+ZeSRx03kV34Hy7NzUvV/7NqfYGRLrASmw==}
+    dependencies:
+      '@shikijs/vscode-textmate': 10.0.2
+      '@types/hast': 3.0.4
+    dev: false
+
+  /@shikijs/vscode-textmate@10.0.2:
+    resolution: {integrity: sha512-83yeghZ2xxin3Nj8z1NMd/NCuca+gsYXswywDy5bHvwlWL8tpTQmzGeUuHd9FC3E/SBEMvzJRwWEOz5gGes9Qg==}
+    dev: false
+
+  /@sindresorhus/merge-streams@2.3.0:
+    resolution: {integrity: sha512-LtoMMhxAlorcGhmFYI+LhPgbPZCkgP6ra1YL604EeF6U98pLlQ3iWIGMdWSC+vWmPBWBNgmDBAhnAobLROJmwg==}
+    engines: {node: '>=18'}
+    dev: false
+
+  /@tailwindcss/node@4.0.7:
+    resolution: {integrity: sha512-dkFXufkbRB2mu3FPsW5xLAUWJyexpJA+/VtQj18k3SUiJVLdpgzBd1v1gRRcIpEJj7K5KpxBKfOXlZxT3ZZRuA==}
+    dependencies:
+      enhanced-resolve: 5.18.3
+      jiti: 2.5.1
+      tailwindcss: 4.0.7
+    dev: false
+
+  /@tailwindcss/oxide-android-arm64@4.0.7:
+    resolution: {integrity: sha512-5iQXXcAeOHBZy8ASfHFm1k0O/9wR2E3tKh6+P+ilZZbQiMgu+qrnfpBWYPc3FPuQdWiWb73069WT5D+CAfx/tg==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [android]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-darwin-arm64@4.0.7:
+    resolution: {integrity: sha512-7yGZtEc5IgVYylqK/2B0yVqoofk4UAbkn1ygNpIJZyrOhbymsfr8uUFCueTu2fUxmAYIfMZ8waWo2dLg/NgLgg==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-darwin-x64@4.0.7:
+    resolution: {integrity: sha512-tPQDV20fBjb26yWbPqT1ZSoDChomMCiXTKn4jupMSoMCFyU7+OJvIY1ryjqBuY622dEBJ8LnCDDWsnj1lX9nNQ==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-freebsd-x64@4.0.7:
+    resolution: {integrity: sha512-sZqJpTyTZiknU9LLHuByg5GKTW+u3FqM7q7myequAXxKOpAFiOfXpY710FuMY+gjzSapyRbDXJlsTQtCyiTo5w==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [freebsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-linux-arm-gnueabihf@4.0.7:
+    resolution: {integrity: sha512-PBgvULgeSswjd8cbZ91gdIcIDMdc3TUHV5XemEpxlqt9M8KoydJzkuB/Dt910jYdofOIaTWRL6adG9nJICvU4A==}
+    engines: {node: '>= 10'}
+    cpu: [arm]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-linux-arm64-gnu@4.0.7:
+    resolution: {integrity: sha512-By/a2yeh+e9b+C67F88ndSwVJl2A3tcUDb29FbedDi+DZ4Mr07Oqw9Y1DrDrtHIDhIZ3bmmiL1dkH2YxrtV+zw==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-linux-arm64-musl@4.0.7:
+    resolution: {integrity: sha512-WHYs3cpPEJb/ccyT20NOzopYQkl7JKncNBUbb77YFlwlXMVJLLV3nrXQKhr7DmZxz2ZXqjyUwsj2rdzd9stYdw==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-linux-x64-gnu@4.0.7:
+    resolution: {integrity: sha512-7bP1UyuX9kFxbOwkeIJhBZNevKYPXB6xZI37v09fqi6rqRJR8elybwjMUHm54GVP+UTtJ14ueB1K54Dy1tIO6w==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-linux-x64-musl@4.0.7:
+    resolution: {integrity: sha512-gBQIV8nL/LuhARNGeroqzXymMzzW5wQzqlteVqOVoqwEfpHOP3GMird5pGFbnpY+NP0fOlsZGrxxOPQ4W/84bQ==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-win32-arm64-msvc@4.0.7:
+    resolution: {integrity: sha512-aH530NFfx0kpQpvYMfWoeG03zGnRCMVlQG8do/5XeahYydz+6SIBxA1tl/cyITSJyWZHyVt6GVNkXeAD30v0Xg==}
+    engines: {node: '>= 10'}
+    cpu: [arm64]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide-win32-x64-msvc@4.0.7:
+    resolution: {integrity: sha512-8Cva6bbJN7ZJx320k7vxGGdU0ewmpfS5A4PudyzUuofdi8MgeINuiiWiPQ0VZCda/GX88K6qp+6UpDZNVr8HMQ==}
+    engines: {node: '>= 10'}
+    cpu: [x64]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@tailwindcss/oxide@4.0.7:
+    resolution: {integrity: sha512-yr6w5YMgjy+B+zkJiJtIYGXW+HNYOPfRPtSs+aqLnKwdEzNrGv4ZuJh9hYJ3mcA+HMq/K1rtFV+KsEr65S558g==}
+    engines: {node: '>= 10'}
+    optionalDependencies:
+      '@tailwindcss/oxide-android-arm64': 4.0.7
+      '@tailwindcss/oxide-darwin-arm64': 4.0.7
+      '@tailwindcss/oxide-darwin-x64': 4.0.7
+      '@tailwindcss/oxide-freebsd-x64': 4.0.7
+      '@tailwindcss/oxide-linux-arm-gnueabihf': 4.0.7
+      '@tailwindcss/oxide-linux-arm64-gnu': 4.0.7
+      '@tailwindcss/oxide-linux-arm64-musl': 4.0.7
+      '@tailwindcss/oxide-linux-x64-gnu': 4.0.7
+      '@tailwindcss/oxide-linux-x64-musl': 4.0.7
+      '@tailwindcss/oxide-win32-arm64-msvc': 4.0.7
+      '@tailwindcss/oxide-win32-x64-msvc': 4.0.7
+    dev: false
+
+  /@tailwindcss/vite@4.0.7(vite@6.3.5):
+    resolution: {integrity: sha512-GYx5sxArfIMtdZCsxfya3S/efMmf4RvfqdiLUozkhmSFBNUFnYVodatpoO/en4/BsOIGvq/RB6HwcTLn9prFnQ==}
+    peerDependencies:
+      vite: ^5.2.0 || ^6
+    dependencies:
+      '@tailwindcss/node': 4.0.7
+      '@tailwindcss/oxide': 4.0.7
+      lightningcss: 1.30.1
+      tailwindcss: 4.0.7
+      vite: 6.3.5(@types/node@24.3.0)
+    dev: false
+
+  /@types/babel__core@7.20.5:
+    resolution: {integrity: sha512-qoQprZvz5wQFJwMDqeseRXWv3rqMvhgpbXFfVyWhbx9X47POIA6i/+dXefEmZKoAgOaTdaIgNSMqMIU61yRyzA==}
+    dependencies:
+      '@babel/parser': 7.28.3
+      '@babel/types': 7.28.2
+      '@types/babel__generator': 7.27.0
+      '@types/babel__template': 7.4.4
+      '@types/babel__traverse': 7.28.0
+    dev: false
+
+  /@types/babel__generator@7.27.0:
+    resolution: {integrity: sha512-ufFd2Xi92OAVPYsy+P4n7/U7e68fex0+Ee8gSG9KX7eo084CWiQ4sdxktvdl0bOPupXtVJPY19zk6EwWqUQ8lg==}
+    dependencies:
+      '@babel/types': 7.28.2
+    dev: false
+
+  /@types/babel__template@7.4.4:
+    resolution: {integrity: sha512-h/NUaSyG5EyxBIp8YRxo4RMe2/qQgvyowRwVMzhYhBCONbW8PUsg4lkFMrhgZhUe5z3L3MiLDuvyJ/CaPa2A8A==}
+    dependencies:
+      '@babel/parser': 7.28.3
+      '@babel/types': 7.28.2
+    dev: false
+
+  /@types/babel__traverse@7.28.0:
+    resolution: {integrity: sha512-8PvcXf70gTDZBgt9ptxJ8elBeBjcLOAcOtoO/mPJjtji1+CdGbHgm77om1GrsPxsiE+uXIpNSK64UYaIwQXd4Q==}
+    dependencies:
+      '@babel/types': 7.28.2
+    dev: false
+
+  /@types/d3-array@3.2.1:
+    resolution: {integrity: sha512-Y2Jn2idRrLzUfAKV2LyRImR+y4oa2AntrgID95SHJxuMUrkNXmanDSed71sRNZysveJVt1hLLemQZIady0FpEg==}
+    dev: false
+
+  /@types/d3-axis@3.0.6:
+    resolution: {integrity: sha512-pYeijfZuBd87T0hGn0FO1vQ/cgLk6E1ALJjfkC0oJ8cbwkZl3TpgS8bVBLZN+2jjGgg38epgxb2zmoGtSfvgMw==}
+    dependencies:
+      '@types/d3-selection': 3.0.11
+    dev: false
+
+  /@types/d3-brush@3.0.6:
+    resolution: {integrity: sha512-nH60IZNNxEcrh6L1ZSMNA28rj27ut/2ZmI3r96Zd+1jrZD++zD3LsMIjWlvg4AYrHn/Pqz4CF3veCxGjtbqt7A==}
+    dependencies:
+      '@types/d3-selection': 3.0.11
+    dev: false
+
+  /@types/d3-chord@3.0.6:
+    resolution: {integrity: sha512-LFYWWd8nwfwEmTZG9PfQxd17HbNPksHBiJHaKuY1XeqscXacsS2tyoo6OdRsjf+NQYeB6XrNL3a25E3gH69lcg==}
+    dev: false
+
+  /@types/d3-color@3.1.3:
+    resolution: {integrity: sha512-iO90scth9WAbmgv7ogoq57O9YpKmFBbmoEoCHDB2xMBY0+/KVrqAaCDyCE16dUspeOvIxFFRI+0sEtqDqy2b4A==}
+    dev: false
+
+  /@types/d3-contour@3.0.6:
+    resolution: {integrity: sha512-BjzLgXGnCWjUSYGfH1cpdo41/hgdWETu4YxpezoztawmqsvCeep+8QGfiY6YbDvfgHz/DkjeIkkZVJavB4a3rg==}
+    dependencies:
+      '@types/d3-array': 3.2.1
+      '@types/geojson': 7946.0.16
+    dev: false
+
+  /@types/d3-delaunay@6.0.4:
+    resolution: {integrity: sha512-ZMaSKu4THYCU6sV64Lhg6qjf1orxBthaC161plr5KuPHo3CNm8DTHiLw/5Eq2b6TsNP0W0iJrUOFscY6Q450Hw==}
+    dev: false
+
+  /@types/d3-dispatch@3.0.7:
+    resolution: {integrity: sha512-5o9OIAdKkhN1QItV2oqaE5KMIiXAvDWBDPrD85e58Qlz1c1kI/J0NcqbEG88CoTwJrYe7ntUCVfeUl2UJKbWgA==}
+    dev: false
+
+  /@types/d3-drag@3.0.7:
+    resolution: {integrity: sha512-HE3jVKlzU9AaMazNufooRJ5ZpWmLIoc90A37WU2JMmeq28w1FQqCZswHZ3xR+SuxYftzHq6WU6KJHvqxKzTxxQ==}
+    dependencies:
+      '@types/d3-selection': 3.0.11
+    dev: false
+
+  /@types/d3-dsv@3.0.7:
+    resolution: {integrity: sha512-n6QBF9/+XASqcKK6waudgL0pf/S5XHPPI8APyMLLUHd8NqouBGLsU8MgtO7NINGtPBtk9Kko/W4ea0oAspwh9g==}
+    dev: false
+
+  /@types/d3-ease@3.0.2:
+    resolution: {integrity: sha512-NcV1JjO5oDzoK26oMzbILE6HW7uVXOHLQvHshBUW4UMdZGfiY6v5BeQwh9a9tCzv+CeefZQHJt5SRgK154RtiA==}
+    dev: false
+
+  /@types/d3-fetch@3.0.7:
+    resolution: {integrity: sha512-fTAfNmxSb9SOWNB9IoG5c8Hg6R+AzUHDRlsXsDZsNp6sxAEOP0tkP3gKkNSO/qmHPoBFTxNrjDprVHDQDvo5aA==}
+    dependencies:
+      '@types/d3-dsv': 3.0.7
+    dev: false
+
+  /@types/d3-force@3.0.10:
+    resolution: {integrity: sha512-ZYeSaCF3p73RdOKcjj+swRlZfnYpK1EbaDiYICEEp5Q6sUiqFaFQ9qgoshp5CzIyyb/yD09kD9o2zEltCexlgw==}
+    dev: false
+
+  /@types/d3-format@3.0.4:
+    resolution: {integrity: sha512-fALi2aI6shfg7vM5KiR1wNJnZ7r6UuggVqtDA+xiEdPZQwy/trcQaHnwShLuLdta2rTymCNpxYTiMZX/e09F4g==}
+    dev: false
+
+  /@types/d3-geo@3.1.0:
+    resolution: {integrity: sha512-856sckF0oP/diXtS4jNsiQw/UuK5fQG8l/a9VVLeSouf1/PPbBE1i1W852zVwKwYCBkFJJB7nCFTbk6UMEXBOQ==}
+    dependencies:
+      '@types/geojson': 7946.0.16
+    dev: false
+
+  /@types/d3-hierarchy@3.1.7:
+    resolution: {integrity: sha512-tJFtNoYBtRtkNysX1Xq4sxtjK8YgoWUNpIiUee0/jHGRwqvzYxkq0hGVbbOGSz+JgFxxRu4K8nb3YpG3CMARtg==}
+    dev: false
+
+  /@types/d3-interpolate@3.0.4:
+    resolution: {integrity: sha512-mgLPETlrpVV1YRJIglr4Ez47g7Yxjl1lj7YKsiMCb27VJH9W8NVM6Bb9d8kkpG/uAQS5AmbA48q2IAolKKo1MA==}
+    dependencies:
+      '@types/d3-color': 3.1.3
+    dev: false
+
+  /@types/d3-path@3.1.1:
+    resolution: {integrity: sha512-VMZBYyQvbGmWyWVea0EHs/BwLgxc+MKi1zLDCONksozI4YJMcTt8ZEuIR4Sb1MMTE8MMW49v0IwI5+b7RmfWlg==}
+    dev: false
+
+  /@types/d3-polygon@3.0.2:
+    resolution: {integrity: sha512-ZuWOtMaHCkN9xoeEMr1ubW2nGWsp4nIql+OPQRstu4ypeZ+zk3YKqQT0CXVe/PYqrKpZAi+J9mTs05TKwjXSRA==}
+    dev: false
+
+  /@types/d3-quadtree@3.0.6:
+    resolution: {integrity: sha512-oUzyO1/Zm6rsxKRHA1vH0NEDG58HrT5icx/azi9MF1TWdtttWl0UIUsjEQBBh+SIkrpd21ZjEv7ptxWys1ncsg==}
+    dev: false
+
+  /@types/d3-random@3.0.3:
+    resolution: {integrity: sha512-Imagg1vJ3y76Y2ea0871wpabqp613+8/r0mCLEBfdtqC7xMSfj9idOnmBYyMoULfHePJyxMAw3nWhJxzc+LFwQ==}
+    dev: false
+
+  /@types/d3-scale-chromatic@3.1.0:
+    resolution: {integrity: sha512-iWMJgwkK7yTRmWqRB5plb1kadXyQ5Sj8V/zYlFGMUBbIPKQScw+Dku9cAAMgJG+z5GYDoMjWGLVOvjghDEFnKQ==}
+    dev: false
+
+  /@types/d3-scale@4.0.9:
+    resolution: {integrity: sha512-dLmtwB8zkAeO/juAMfnV+sItKjlsw2lKdZVVy6LRr0cBmegxSABiLEpGVmSJJ8O08i4+sGR6qQtb6WtuwJdvVw==}
+    dependencies:
+      '@types/d3-time': 3.0.4
+    dev: false
+
+  /@types/d3-selection@3.0.11:
+    resolution: {integrity: sha512-bhAXu23DJWsrI45xafYpkQ4NtcKMwWnAC/vKrd2l+nxMFuvOT3XMYTIj2opv8vq8AO5Yh7Qac/nSeP/3zjTK0w==}
+    dev: false
+
+  /@types/d3-shape@3.1.7:
+    resolution: {integrity: sha512-VLvUQ33C+3J+8p+Daf+nYSOsjB4GXp19/S/aGo60m9h1v6XaxjiT82lKVWJCfzhtuZ3yD7i/TPeC/fuKLLOSmg==}
+    dependencies:
+      '@types/d3-path': 3.1.1
+    dev: false
+
+  /@types/d3-time-format@4.0.3:
+    resolution: {integrity: sha512-5xg9rC+wWL8kdDj153qZcsJ0FWiFt0J5RB6LYUNZjwSnesfblqrI/bJ1wBdJ8OQfncgbJG5+2F+qfqnqyzYxyg==}
+    dev: false
+
+  /@types/d3-time@3.0.4:
+    resolution: {integrity: sha512-yuzZug1nkAAaBlBBikKZTgzCeA+k1uy4ZFwWANOfKw5z5LRhV0gNA7gNkKm7HoK+HRN0wX3EkxGk0fpbWhmB7g==}
+    dev: false
+
+  /@types/d3-timer@3.0.2:
+    resolution: {integrity: sha512-Ps3T8E8dZDam6fUyNiMkekK3XUsaUEik+idO9/YjPtfj2qruF8tFBXS7XhtE4iIXBLxhmLjP3SXpLhVf21I9Lw==}
+    dev: false
+
+  /@types/d3-transition@3.0.9:
+    resolution: {integrity: sha512-uZS5shfxzO3rGlu0cC3bjmMFKsXv+SmZZcgp0KD22ts4uGXp5EVYGzu/0YdwZeKmddhcAccYtREJKkPfXkZuCg==}
+    dependencies:
+      '@types/d3-selection': 3.0.11
+    dev: false
+
+  /@types/d3-zoom@3.0.8:
+    resolution: {integrity: sha512-iqMC4/YlFCSlO8+2Ii1GGGliCAY4XdeG748w5vQUbevlbDu0zSjH/+jojorQVBK/se0j6DUFNPBGSqD3YWYnDw==}
+    dependencies:
+      '@types/d3-interpolate': 3.0.4
+      '@types/d3-selection': 3.0.11
+    dev: false
+
+  /@types/d3@7.4.3:
+    resolution: {integrity: sha512-lZXZ9ckh5R8uiFVt8ogUNf+pIrK4EsWrx2Np75WvF/eTpJ0FMHNhjXk8CKEx/+gpHbNQyJWehbFaTvqmHWB3ww==}
+    dependencies:
+      '@types/d3-array': 3.2.1
+      '@types/d3-axis': 3.0.6
+      '@types/d3-brush': 3.0.6
+      '@types/d3-chord': 3.0.6
+      '@types/d3-color': 3.1.3
+      '@types/d3-contour': 3.0.6
+      '@types/d3-delaunay': 6.0.4
+      '@types/d3-dispatch': 3.0.7
+      '@types/d3-drag': 3.0.7
+      '@types/d3-dsv': 3.0.7
+      '@types/d3-ease': 3.0.2
+      '@types/d3-fetch': 3.0.7
+      '@types/d3-force': 3.0.10
+      '@types/d3-format': 3.0.4
+      '@types/d3-geo': 3.1.0
+      '@types/d3-hierarchy': 3.1.7
+      '@types/d3-interpolate': 3.0.4
+      '@types/d3-path': 3.1.1
+      '@types/d3-polygon': 3.0.2
+      '@types/d3-quadtree': 3.0.6
+      '@types/d3-random': 3.0.3
+      '@types/d3-scale': 4.0.9
+      '@types/d3-scale-chromatic': 3.1.0
+      '@types/d3-selection': 3.0.11
+      '@types/d3-shape': 3.1.7
+      '@types/d3-time': 3.0.4
+      '@types/d3-time-format': 4.0.3
+      '@types/d3-timer': 3.0.2
+      '@types/d3-transition': 3.0.9
+      '@types/d3-zoom': 3.0.8
+    dev: false
+
+  /@types/debug@4.1.12:
+    resolution: {integrity: sha512-vIChWdVG3LG1SMxEvI/AK+FWJthlrqlTu7fbrlywTkkaONwk/UAGaULXRlf8vkzFBLVm0zkMdCquhL5aOjhXPQ==}
+    dependencies:
+      '@types/ms': 2.1.0
+    dev: false
+
+  /@types/estree-jsx@1.0.5:
+    resolution: {integrity: sha512-52CcUVNFyfb1A2ALocQw/Dd1BQFNmSdkuC3BkZ6iqhdMfQz7JWOFRuJFloOzjk+6WijU56m9oKXFAXc7o3Towg==}
+    dependencies:
+      '@types/estree': 1.0.8
+    dev: false
+
+  /@types/estree@1.0.8:
+    resolution: {integrity: sha512-dWHzHa2WqEXI/O1E9OjrocMTKJl2mSrEolh1Iomrv6U+JuNwaHXsXx9bLu5gG7BUWFIN0skIQJQ/L1rIex4X6w==}
+    dev: false
+
+  /@types/geojson@7946.0.16:
+    resolution: {integrity: sha512-6C8nqWur3j98U6+lXDfTUWIfgvZU+EumvpHKcYjujKH7woYyLj2sUmff0tRhrqM7BohUw7Pz3ZB1jj2gW9Fvmg==}
+    dev: false
+
+  /@types/hast@2.3.10:
+    resolution: {integrity: sha512-McWspRw8xx8J9HurkVBfYj0xKoE25tOFlHGdx4MJ5xORQrMGZNqJhVQWaIbm6Oyla5kYOXtDiopzKRJzEOkwJw==}
+    dependencies:
+      '@types/unist': 2.0.11
+    dev: false
+
+  /@types/hast@3.0.4:
+    resolution: {integrity: sha512-WPs+bbQw5aCj+x6laNGWLH3wviHtoCv/P3+otBhbOhJgG8qtpdAMlTCxLtsTWA7LH1Oh/bFCHsBn0TPS5m30EQ==}
+    dependencies:
+      '@types/unist': 3.0.3
+    dev: false
+
+  /@types/katex@0.16.7:
+    resolution: {integrity: sha512-HMwFiRujE5PjrgwHQ25+bsLJgowjGjm5Z8FVSf0N6PwgJrwxH0QxzHYDcKsTfV3wva0vzrpqMTJS2jXPr5BMEQ==}
+    dev: false
+
+  /@types/mdast@4.0.4:
+    resolution: {integrity: sha512-kGaNbPh1k7AFzgpud/gMdvIm5xuECykRR+JnWKQno9TAXVa6WIVCGTPvYGekIDL4uwCZQSYbUxNBSb1aUo79oA==}
+    dependencies:
+      '@types/unist': 3.0.3
+    dev: false
+
+  /@types/mdx@2.0.13:
+    resolution: {integrity: sha512-+OWZQfAYyio6YkJb3HLxDrvnx6SWWDbC0zVPfBRzUk0/nqoDyf6dNxQi3eArPe8rJ473nobTMQ/8Zk+LxJ+Yuw==}
+    dev: false
+
+  /@types/ms@2.1.0:
+    resolution: {integrity: sha512-GsCCIZDE/p3i96vtEqx+7dBUGXrc7zeSK3wwPHIaRThS+9OhWIXRqzs4d6k1SVU8g91DrNRWxWUGhp5KXQb2VA==}
+    dev: false
+
+  /@types/node@24.3.0:
+    resolution: {integrity: sha512-aPTXCrfwnDLj4VvXrm+UUCQjNEvJgNA8s5F1cvwQU+3KNltTOkBm1j30uNLyqqPNe7gE3KFzImYoZEfLhp4Yow==}
+    dependencies:
+      undici-types: 7.10.0
+
+  /@types/react@19.1.10:
+    resolution: {integrity: sha512-EhBeSYX0Y6ye8pNebpKrwFJq7BoQ8J5SO6NlvNwwHjSj6adXJViPQrKlsyPw7hLBLvckEMO1yxeGdR82YBBlDg==}
+    dependencies:
+      csstype: 3.1.3
+
+  /@types/trusted-types@2.0.7:
+    resolution: {integrity: sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw==}
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /@types/unist@2.0.11:
+    resolution: {integrity: sha512-CmBKiL6NNo/OqgmMn95Fk9Whlp2mtvIv+KNpQKN2F4SjvrEesubTRWGYSg+BnWZOnlCaSTU1sMpsBOzgbYhnsA==}
+    dev: false
+
+  /@types/unist@3.0.3:
+    resolution: {integrity: sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q==}
+    dev: false
+
+  /@typescript/vfs@1.6.1(typescript@5.9.2):
+    resolution: {integrity: sha512-JwoxboBh7Oz1v38tPbkrZ62ZXNHAk9bJ7c9x0eI5zBfBnBYGhURdbnh7Z4smN/MV48Y5OCcZb58n972UtbazsA==}
+    peerDependencies:
+      typescript: '*'
+    dependencies:
+      debug: 4.4.1
+      typescript: 5.9.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /@ungap/structured-clone@1.3.0:
+    resolution: {integrity: sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g==}
+    dev: false
+
+  /@vanilla-extract/babel-plugin-debug-ids@1.2.2:
+    resolution: {integrity: sha512-MeDWGICAF9zA/OZLOKwhoRlsUW+fiMwnfuOAqFVohL31Agj7Q/RBWAYweqjHLgFBCsdnr6XIfwjJnmb2znEWxw==}
+    dependencies:
+      '@babel/core': 7.28.3
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /@vanilla-extract/compiler@0.3.1(@types/node@24.3.0):
+    resolution: {integrity: sha512-KZ67DZQu58dMo7Jv4PNMPG5TbMOXB68xxVYV2cRJvUdPeiOmX0FOaPgEsYBAZUgd/oLEx4IyV0AvlvsxJ1akfQ==}
+    dependencies:
+      '@vanilla-extract/css': 1.17.4
+      '@vanilla-extract/integration': 8.0.4
+      vite: 6.3.5(@types/node@24.3.0)
+      vite-node: 3.2.4(@types/node@24.3.0)
+    transitivePeerDependencies:
+      - '@types/node'
+      - babel-plugin-macros
+      - jiti
+      - less
+      - lightningcss
+      - sass
+      - sass-embedded
+      - stylus
+      - sugarss
+      - supports-color
+      - terser
+      - tsx
+      - yaml
+    dev: false
+
+  /@vanilla-extract/css@1.17.4:
+    resolution: {integrity: sha512-m3g9nQDWPtL+sTFdtCGRMI1Vrp86Ay4PBYq1Bo7Bnchj5ElNtAJpOqD+zg+apthVA4fB7oVpMWNjwpa6ElDWFQ==}
+    dependencies:
+      '@emotion/hash': 0.9.2
+      '@vanilla-extract/private': 1.0.9
+      css-what: 6.2.2
+      cssesc: 3.0.0
+      csstype: 3.1.3
+      dedent: 1.6.0
+      deep-object-diff: 1.1.9
+      deepmerge: 4.3.1
+      lru-cache: 10.4.3
+      media-query-parser: 2.0.2
+      modern-ahocorasick: 1.1.0
+      picocolors: 1.1.1
+    transitivePeerDependencies:
+      - babel-plugin-macros
+    dev: false
+
+  /@vanilla-extract/dynamic@2.1.5:
+    resolution: {integrity: sha512-QGIFGb1qyXQkbzx6X6i3+3LMc/iv/ZMBttMBL+Wm/DetQd36KsKsFg5CtH3qy+1hCA/5w93mEIIAiL4fkM8ycw==}
+    dependencies:
+      '@vanilla-extract/private': 1.0.9
+    dev: false
+
+  /@vanilla-extract/integration@8.0.4:
+    resolution: {integrity: sha512-cmOb7tR+g3ulKvFtSbmdw3YUyIS1d7MQqN+FcbwNhdieyno5xzUyfDCMjeWJhmCSMvZ6WlinkrOkgs6SHB+FRg==}
+    dependencies:
+      '@babel/core': 7.28.3
+      '@babel/plugin-syntax-typescript': 7.27.1(@babel/core@7.28.3)
+      '@vanilla-extract/babel-plugin-debug-ids': 1.2.2
+      '@vanilla-extract/css': 1.17.4
+      dedent: 1.6.0
+      esbuild: 0.25.9
+      eval: 0.1.8
+      find-up: 5.0.0
+      javascript-stringify: 2.1.0
+      mlly: 1.7.4
+    transitivePeerDependencies:
+      - babel-plugin-macros
+      - supports-color
+    dev: false
+
+  /@vanilla-extract/private@1.0.9:
+    resolution: {integrity: sha512-gT2jbfZuaaCLrAxwXbRgIhGhcXbRZCG3v4TTUnjw0EJ7ArdBRxkq4msNJkbuRkCgfIK5ATmprB5t9ljvLeFDEA==}
+    dev: false
+
+  /@vanilla-extract/vite-plugin@5.1.1(@types/node@24.3.0)(vite@6.3.5):
+    resolution: {integrity: sha512-Nd1worqkHrd8XED4ZAA7Wmkd3pCqCwpmzCBVF8v6T1BSLHGXQE5HYflVgygw0CsIAbFRMS6zQBIk4F4/r/YKIw==}
+    peerDependencies:
+      vite: ^5.0.0 || ^6.0.0 || ^7.0.0
+    dependencies:
+      '@vanilla-extract/compiler': 0.3.1(@types/node@24.3.0)
+      '@vanilla-extract/integration': 8.0.4
+      vite: 6.3.5(@types/node@24.3.0)
+    transitivePeerDependencies:
+      - '@types/node'
+      - babel-plugin-macros
+      - jiti
+      - less
+      - lightningcss
+      - sass
+      - sass-embedded
+      - stylus
+      - sugarss
+      - supports-color
+      - terser
+      - tsx
+      - yaml
+    dev: false
+
+  /@vitejs/plugin-react@4.7.0(vite@6.3.5):
+    resolution: {integrity: sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA==}
+    engines: {node: ^14.18.0 || >=16.0.0}
+    peerDependencies:
+      vite: ^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0
+    dependencies:
+      '@babel/core': 7.28.3
+      '@babel/plugin-transform-react-jsx-self': 7.27.1(@babel/core@7.28.3)
+      '@babel/plugin-transform-react-jsx-source': 7.27.1(@babel/core@7.28.3)
+      '@rolldown/pluginutils': 1.0.0-beta.27
+      '@types/babel__core': 7.20.5
+      react-refresh: 0.17.0
+      vite: 6.3.5(@types/node@24.3.0)
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /@yarnpkg/lockfile@1.1.0:
+    resolution: {integrity: sha512-GpSwvyXOcOOlV70vbnzjj4fW5xW/FdUF6nQEt1ENy7m4ZCczi1+/buVUPAqmGfqznsORNFzUMjctTIp8a9tuCQ==}
+    dev: true
+
+  /acorn-jsx@5.3.2(acorn@8.15.0):
+    resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
+    peerDependencies:
+      acorn: ^6.0.0 || ^7.0.0 || ^8.0.0
+    dependencies:
+      acorn: 8.15.0
+    dev: false
+
+  /acorn@8.15.0:
+    resolution: {integrity: sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg==}
+    engines: {node: '>=0.4.0'}
+    hasBin: true
+    dev: false
+
+  /ansi-regex@5.0.1:
+    resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==}
+    engines: {node: '>=8'}
+    dev: true
+
+  /ansi-regex@6.1.0:
+    resolution: {integrity: sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==}
+    engines: {node: '>=12'}
+
+  /ansi-styles@4.3.0:
+    resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
+    engines: {node: '>=8'}
+    dependencies:
+      color-convert: 2.0.1
+    dev: true
+
+  /ansi-styles@6.2.1:
+    resolution: {integrity: sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==}
+    engines: {node: '>=12'}
+    dev: true
+
+  /aria-hidden@1.2.6:
+    resolution: {integrity: sha512-ik3ZgC9dY/lYVVM++OISsaYDeg1tb0VtP5uL3ouh1koGOaUMDPpbFIei4JkFimWUFPn90sbMNMXQAIVOlnYKJA==}
+    engines: {node: '>=10'}
+    dependencies:
+      tslib: 2.8.1
+    dev: false
+
+  /astring@1.9.0:
+    resolution: {integrity: sha512-LElXdjswlqjWrPpJFg1Fx4wpkOCxj1TDHlSV4PlaRxHGWko024xICaa97ZkMfs6DRKlCguiAI+rbXv5GWwXIkg==}
+    hasBin: true
+    dev: false
+
+  /at-least-node@1.0.0:
+    resolution: {integrity: sha512-+q/t7Ekv1EDY2l6Gda6LLiX14rU9TV20Wa3ofeQmwPFZbOMo9DXrLbOjFaaclkXKWidIaopwAObQDqwWtGUjqg==}
+    engines: {node: '>= 4.0.0'}
+    dev: true
+
+  /autoprefixer@10.4.21(postcss@8.5.6):
+    resolution: {integrity: sha512-O+A6LWV5LDHSJD3LjHYoNi4VLsj/Whi7k6zG12xTYaU4cQ8oxQGckXNX8cRHK5yOZ/ppVHe0ZBXGzSV9jXdVbQ==}
+    engines: {node: ^10 || ^12 || >=14}
+    hasBin: true
+    peerDependencies:
+      postcss: ^8.1.0
+    dependencies:
+      browserslist: 4.25.2
+      caniuse-lite: 1.0.30001735
+      fraction.js: 4.3.7
+      normalize-range: 0.1.2
+      picocolors: 1.1.1
+      postcss: 8.5.6
+      postcss-value-parser: 4.2.0
+    dev: false
+
+  /bail@2.0.2:
+    resolution: {integrity: sha512-0xO6mYd7JB2YesxDKplafRpsiOzPt9V02ddPCLbY1xYGPOX24NTyN50qnUxgCPcSoYMhKpAuBTjQoRZCAkUDRw==}
+    dev: false
+
+  /balanced-match@1.0.2:
+    resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==}
+
+  /base64-js@1.5.1:
+    resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==}
+    dev: false
+
+  /bcp-47-match@2.0.3:
+    resolution: {integrity: sha512-JtTezzbAibu8G0R9op9zb3vcWZd9JF6M0xOYGPn0fNCd7wOpRB1mU2mH9T8gaBGbAAyIIVgB2G7xG0GP98zMAQ==}
+    dev: false
+
+  /bl@5.1.0:
+    resolution: {integrity: sha512-tv1ZJHLfTDnXE6tMHv73YgSJaWR2AFuPwMntBe7XL/GBFHnT0CLnsHMogfk5+GzCDC5ZWarSCYaIGATZt9dNsQ==}
+    dependencies:
+      buffer: 6.0.3
+      inherits: 2.0.4
+      readable-stream: 3.6.2
+    dev: false
+
+  /boolbase@1.0.0:
+    resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
+    dev: false
+
+  /brace-expansion@1.1.12:
+    resolution: {integrity: sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==}
+    dependencies:
+      balanced-match: 1.0.2
+      concat-map: 0.0.1
+    dev: true
+
+  /brace-expansion@2.0.2:
+    resolution: {integrity: sha512-Jt0vHyM+jmUBqojB7E1NIYadt0vI0Qxjxd2TErW94wDz+E2LAm5vKMXXwg6ZZBTHPuUlDgQHKXvjGBdfcF1ZDQ==}
+    dependencies:
+      balanced-match: 1.0.2
+    dev: false
+
+  /braces@3.0.3:
+    resolution: {integrity: sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==}
+    engines: {node: '>=8'}
+    dependencies:
+      fill-range: 7.1.1
+
+  /browserslist@4.25.2:
+    resolution: {integrity: sha512-0si2SJK3ooGzIawRu61ZdPCO1IncZwS8IzuX73sPZsXW6EQ/w/DAfPyKI8l1ETTCr2MnvqWitmlCUxgdul45jA==}
+    engines: {node: ^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7}
+    hasBin: true
+    dependencies:
+      caniuse-lite: 1.0.30001735
+      electron-to-chromium: 1.5.203
+      node-releases: 2.0.19
+      update-browserslist-db: 1.1.3(browserslist@4.25.2)
+    dev: false
+
+  /buffer@6.0.3:
+    resolution: {integrity: sha512-FTiCpNxtwiZZHEZbcbTIcZjERVICn9yq/pDFkTl95/AxzD1naBctN7YO68riM/gLSDY7sdrMby8hofADYuuqOA==}
+    dependencies:
+      base64-js: 1.5.1
+      ieee754: 1.2.1
+    dev: false
+
+  /bytes@3.1.2:
+    resolution: {integrity: sha512-/Nf7TyzTx6S3yRJObOAV7956r8cr2+Oj8AC5dt8wSP3BQAoeX58NoHyCU8P8zGkNXStjTSi6fzO6F0pBdcYbEg==}
+    engines: {node: '>= 0.8'}
+    dev: false
+
+  /cac@6.7.14:
+    resolution: {integrity: sha512-b6Ilus+c3RrdDk+JhLKUAQfzzgLEPy6wcXqS7f/xe1EETvsDP6GORG7SFuOs6cID5YkqchW/LXZbX5bc8j7ZcQ==}
+    engines: {node: '>=8'}
+    dev: false
+
+  /call-bind-apply-helpers@1.0.2:
+    resolution: {integrity: sha512-Sp1ablJ0ivDkSzjcaJdxEunN5/XvksFJ2sMBFfq6x0ryhQV/2b/KwFe21cMpmHtPOSij8K99/wSfoEuTObmuMQ==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      es-errors: 1.3.0
+      function-bind: 1.1.2
+    dev: true
+
+  /call-bind@1.0.8:
+    resolution: {integrity: sha512-oKlSFMcMwpUg2ednkhQ454wfWiU/ul3CkJe/PEHcTKuiX6RpbehUiFMXu13HalGZxfUwCQzZG747YXBn1im9ww==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-define-property: 1.0.1
+      get-intrinsic: 1.3.0
+      set-function-length: 1.2.2
+    dev: true
+
+  /call-bound@1.0.4:
+    resolution: {integrity: sha512-+ys997U96po4Kx/ABpBCqhA9EuxJaQWDQg7295H4hBphv3IZg0boBKuwYpt4YXp6MZ5AmZQnU/tyMTlRpaSejg==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      get-intrinsic: 1.3.0
+    dev: true
+
+  /caniuse-lite@1.0.30001735:
+    resolution: {integrity: sha512-EV/laoX7Wq2J9TQlyIXRxTJqIw4sxfXS4OYgudGxBYRuTv0q7AM6yMEpU/Vo1I94thg9U6EZ2NfZx9GJq83u7w==}
+    dev: false
+
+  /ccount@2.0.1:
+    resolution: {integrity: sha512-eyrF0jiFpY+3drT6383f1qhkbGsLSifNAjA61IUjZjmLCWjItY6LB9ft9YhoDgwfmclB2zhu51Lc7+95b8NRAg==}
+    dev: false
+
+  /chalk@4.1.2:
+    resolution: {integrity: sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA==}
+    engines: {node: '>=10'}
+    dependencies:
+      ansi-styles: 4.3.0
+      supports-color: 7.2.0
+    dev: true
+
+  /chalk@5.5.0:
+    resolution: {integrity: sha512-1tm8DTaJhPBG3bIkVeZt1iZM9GfSX2lzOeDVZH9R9ffRHpmHvxZ/QhgQH/aDTkswQVt+YHdXAdS/In/30OjCbg==}
+    engines: {node: ^12.17.0 || ^14.13 || >=16.0.0}
+    dev: false
+
+  /character-entities-html4@2.1.0:
+    resolution: {integrity: sha512-1v7fgQRj6hnSwFpq1Eu0ynr/CDEw0rXo2B61qXrLNdHZmPKgb7fqS1a2JwF0rISo9q77jDI8VMEHoApn8qDoZA==}
+    dev: false
+
+  /character-entities-legacy@3.0.0:
+    resolution: {integrity: sha512-RpPp0asT/6ufRm//AJVwpViZbGM/MkjQFxJccQRHmISF/22NBtsHqAWmL+/pmkPWoIUJdWyeVleTl1wydHATVQ==}
+    dev: false
+
+  /character-entities@2.0.2:
+    resolution: {integrity: sha512-shx7oQ0Awen/BRIdkjkvz54PnEEI/EjwXDSIZp86/KKdbafHh1Df/RYGBhn4hbe2+uKC9FnT5UCEdyPz3ai9hQ==}
+    dev: false
+
+  /character-reference-invalid@2.0.1:
+    resolution: {integrity: sha512-iBZ4F4wRbyORVsu0jPV7gXkOsGYjGHPmAyv+HiHG8gi5PtC9KI2j1+v8/tlibRvjoWX027ypmG/n0HtO5t7unw==}
+    dev: false
+
+  /chevrotain-allstar@0.3.1(chevrotain@11.0.3):
+    resolution: {integrity: sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==}
+    peerDependencies:
+      chevrotain: ^11.0.0
+    dependencies:
+      chevrotain: 11.0.3
+      lodash-es: 4.17.21
+    dev: false
+
+  /chevrotain@11.0.3:
+    resolution: {integrity: sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==}
+    dependencies:
+      '@chevrotain/cst-dts-gen': 11.0.3
+      '@chevrotain/gast': 11.0.3
+      '@chevrotain/regexp-to-ast': 11.0.3
+      '@chevrotain/types': 11.0.3
+      '@chevrotain/utils': 11.0.3
+      lodash-es: 4.17.21
+    dev: false
+
+  /chroma-js@3.1.2:
+    resolution: {integrity: sha512-IJnETTalXbsLx1eKEgx19d5L6SRM7cH4vINw/99p/M11HCuXGRWL+6YmCm7FWFGIo6dtWuQoQi1dc5yQ7ESIHg==}
+    dev: false
+
+  /ci-info@3.9.0:
+    resolution: {integrity: sha512-NIxF55hv4nSqQswkAeiOi1r83xy8JldOFDTWiug55KBu9Jnblncd2U6ViHmYgHf01TPZS77NJBhBMKdWj9HQMQ==}
+    engines: {node: '>=8'}
+    dev: true
+
+  /cli-cursor@4.0.0:
+    resolution: {integrity: sha512-VGtlMu3x/4DOtIUwEkRezxUZ2lBacNJCHash0N0WeZDBS+7Ux1dm3XWAgWYxLJFMMdOeXMHXorshEFhbMSGelg==}
+    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
+    dependencies:
+      restore-cursor: 4.0.0
+    dev: false
+
+  /cli-spinners@2.9.2:
+    resolution: {integrity: sha512-ywqV+5MmyL4E7ybXgKys4DugZbX0FC6LnwrhjuykIjnK9k8OQacQ7axGKnjDXWNhns0xot3bZI5h55H8yo9cJg==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /clsx@2.1.1:
+    resolution: {integrity: sha512-eYm0QWBtUrBWZWG0d386OGAw16Z995PiOVo2B7bjWSbHedGl5e0ZWaq65kOGgUSNesEIDkB9ISbTg/JK9dhCZA==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /collapse-white-space@2.1.0:
+    resolution: {integrity: sha512-loKTxY1zCOuG4j9f6EPnuyyYkf58RnhhWTvRoZEokgB+WbdXehfjFviyOVYkqzEWz1Q5kRiZdBYS5SwxbQYwzw==}
+    dev: false
+
+  /color-convert@2.0.1:
+    resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==}
+    engines: {node: '>=7.0.0'}
+    dependencies:
+      color-name: 1.1.4
+    dev: true
+
+  /color-name@1.1.4:
+    resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==}
+    dev: true
+
+  /comma-separated-tokens@2.0.3:
+    resolution: {integrity: sha512-Fu4hJdvzeylCfQPp9SGWidpzrMs7tTrlu6Vb8XGaRGck8QSNZJJp538Wrb60Lax4fPwR64ViY468OIUTbRlGZg==}
+    dev: false
+
+  /commander@7.2.0:
+    resolution: {integrity: sha512-QrWXB+ZQSVPmIWIhtEO9H+gwHaMGYiF5ChvoJ+K9ZGHG/sVsa6yiesAD1GC/x46sET00Xlwo1u49RVVVzvcSkw==}
+    engines: {node: '>= 10'}
+    dev: false
+
+  /commander@8.3.0:
+    resolution: {integrity: sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww==}
+    engines: {node: '>= 12'}
+    dev: false
+
+  /compressible@2.0.18:
+    resolution: {integrity: sha512-AF3r7P5dWxL8MxyITRMlORQNaOA2IkAFaTr4k7BUumjPtRpGDTZpl0Pb1XCO6JeDCBdp126Cgs9sMxqSjgYyRg==}
+    engines: {node: '>= 0.6'}
+    dependencies:
+      mime-db: 1.54.0
+    dev: false
+
+  /compression@1.8.1:
+    resolution: {integrity: sha512-9mAqGPHLakhCLeNyxPkK4xVo746zQ/czLH1Ky+vkitMnWfWZps8r0qXuwhwizagCRttsL4lfG4pIOvaWLpAP0w==}
+    engines: {node: '>= 0.8.0'}
+    dependencies:
+      bytes: 3.1.2
+      compressible: 2.0.18
+      debug: 2.6.9
+      negotiator: 0.6.4
+      on-headers: 1.1.0
+      safe-buffer: 5.2.1
+      vary: 1.1.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /concat-map@0.0.1:
+    resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==}
+    dev: true
+
+  /confbox@0.1.8:
+    resolution: {integrity: sha512-RMtmw0iFkeR4YV+fUOSucriAQNb9g8zFR52MWCtl+cCZOFRNL6zeB395vPzFhEjjn4fMxXudmELnl/KF/WrK6w==}
+    dev: false
+
+  /confbox@0.2.2:
+    resolution: {integrity: sha512-1NB+BKqhtNipMsov4xI/NnhCKp9XG9NamYp5PVm9klAT0fsrNPjaFICsCFhNhwZJKNh7zB/3q8qXz0E9oaMNtQ==}
+    dev: false
+
+  /convert-source-map@2.0.0:
+    resolution: {integrity: sha512-Kvp459HrV2FEJ1CAsi1Ku+MY3kasH19TFykTz2xWmMeq6bk2NU3XXvfJ+Q61m0xktWwt+1HSYf3JZsTms3aRJg==}
+    dev: false
+
+  /cookie@1.0.2:
+    resolution: {integrity: sha512-9Kr/j4O16ISv8zBBhJoi4bXOYNTkFLOqSL3UDB0njXxCXNezjeyVrJyGOWtgfs/q2km1gwBcfH8q1yEGoMYunA==}
+    engines: {node: '>=18'}
+    dev: false
+
+  /cose-base@1.0.3:
+    resolution: {integrity: sha512-s9whTXInMSgAp/NVXVNuVxVKzGH2qck3aQlVHxDCdAEPgtMKwc4Wq6/QKhgdEdgbLSi9rBTAcPoRa6JpiG4ksg==}
+    dependencies:
+      layout-base: 1.0.2
+    dev: false
+
+  /cose-base@2.2.0:
+    resolution: {integrity: sha512-AzlgcsCbUMymkADOJtQm3wO9S3ltPfYOFD5033keQn9NJzIbtnZj+UdBJe7DYml/8TdbtHJW3j58SOnKhWY/5g==}
+    dependencies:
+      layout-base: 2.0.1
+    dev: false
+
+  /create-vocs@1.0.0:
+    resolution: {integrity: sha512-Lv1Bd3WZEgwG4nrogkM54m8viW+TWPlGivLyEi7aNb3cuKPsEfMDZ/kTbo87fzOGtsZ2yh7scO54ZmVhhgBgTw==}
+    hasBin: true
+    dependencies:
+      '@clack/prompts': 0.7.0
+      cac: 6.7.14
+      detect-package-manager: 3.0.2
+      fs-extra: 11.3.1
+      picocolors: 1.1.1
+    dev: false
+
+  /cross-spawn@7.0.6:
+    resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
+    engines: {node: '>= 8'}
+    dependencies:
+      path-key: 3.1.1
+      shebang-command: 2.0.0
+      which: 2.0.2
+
+  /css-selector-parser@3.1.3:
+    resolution: {integrity: sha512-gJMigczVZqYAk0hPVzx/M4Hm1D9QOtqkdQk9005TNzDIUGzo5cnHEDiKUT7jGPximL/oYb+LIitcHFQ4aKupxg==}
+    dev: false
+
+  /css-what@6.2.2:
+    resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==}
+    engines: {node: '>= 6'}
+    dev: false
+
+  /cssesc@3.0.0:
+    resolution: {integrity: sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==}
+    engines: {node: '>=4'}
+    hasBin: true
+    dev: false
+
+  /csstype@3.1.3:
+    resolution: {integrity: sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==}
+
+  /cytoscape-cose-bilkent@4.1.0(cytoscape@3.33.1):
+    resolution: {integrity: sha512-wgQlVIUJF13Quxiv5e1gstZ08rnZj2XaLHGoFMYXz7SkNfCDOOteKBE6SYRfA9WxxI/iBc3ajfDoc6hb/MRAHQ==}
+    peerDependencies:
+      cytoscape: ^3.2.0
+    dependencies:
+      cose-base: 1.0.3
+      cytoscape: 3.33.1
+    dev: false
+
+  /cytoscape-fcose@2.2.0(cytoscape@3.33.1):
+    resolution: {integrity: sha512-ki1/VuRIHFCzxWNrsshHYPs6L7TvLu3DL+TyIGEsRcvVERmxokbf5Gdk7mFxZnTdiGtnA4cfSmjZJMviqSuZrQ==}
+    peerDependencies:
+      cytoscape: ^3.2.0
+    dependencies:
+      cose-base: 2.2.0
+      cytoscape: 3.33.1
+    dev: false
+
+  /cytoscape@3.33.1:
+    resolution: {integrity: sha512-iJc4TwyANnOGR1OmWhsS9ayRS3s+XQ185FmuHObThD+5AeJCakAAbWv8KimMTt08xCCLNgneQwFp+JRJOr9qGQ==}
+    engines: {node: '>=0.10'}
+    dev: false
+
+  /d3-array@2.12.1:
+    resolution: {integrity: sha512-B0ErZK/66mHtEsR1TkPEEkwdy+WDesimkM5gpZr5Dsg54BiTA5RXtYW5qTLIAcekaS9xfZrzBLF/OAkB3Qn1YQ==}
+    dependencies:
+      internmap: 1.0.1
+    dev: false
+
+  /d3-array@3.2.4:
+    resolution: {integrity: sha512-tdQAmyA18i4J7wprpYq8ClcxZy3SC31QMeByyCFyRt7BVHdREQZ5lpzoe5mFEYZUWe+oq8HBvk9JjpibyEV4Jg==}
+    engines: {node: '>=12'}
+    dependencies:
+      internmap: 2.0.3
+    dev: false
+
+  /d3-axis@3.0.0:
+    resolution: {integrity: sha512-IH5tgjV4jE/GhHkRV0HiVYPDtvfjHQlQfJHs0usq7M30XcSBvOotpmH1IgkcXsO/5gEQZD43B//fc7SRT5S+xw==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-brush@3.0.0:
+    resolution: {integrity: sha512-ALnjWlVYkXsVIGlOsuWH1+3udkYFI48Ljihfnh8FZPF2QS9o+PzGLBslO0PjzVoHLZ2KCVgAM8NVkXPJB2aNnQ==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-dispatch: 3.0.1
+      d3-drag: 3.0.0
+      d3-interpolate: 3.0.1
+      d3-selection: 3.0.0
+      d3-transition: 3.0.1(d3-selection@3.0.0)
+    dev: false
+
+  /d3-chord@3.0.1:
+    resolution: {integrity: sha512-VE5S6TNa+j8msksl7HwjxMHDM2yNK3XCkusIlpX5kwauBfXuyLAtNg9jCp/iHH61tgI4sb6R/EIMWCqEIdjT/g==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-path: 3.1.0
+    dev: false
+
+  /d3-color@3.1.0:
+    resolution: {integrity: sha512-zg/chbXyeBtMQ1LbD/WSoW2DpC3I0mpmPdW+ynRTj/x2DAWYrIY7qeZIHidozwV24m4iavr15lNwIwLxRmOxhA==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-contour@4.0.2:
+    resolution: {integrity: sha512-4EzFTRIikzs47RGmdxbeUvLWtGedDUNkTcmzoeyg4sP/dvCexO47AaQL7VKy/gul85TOxw+IBgA8US2xwbToNA==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-array: 3.2.4
+    dev: false
+
+  /d3-delaunay@6.0.4:
+    resolution: {integrity: sha512-mdjtIZ1XLAM8bm/hx3WwjfHt6Sggek7qH043O8KEjDXN40xi3vx/6pYSVTwLjEgiXQTbvaouWKynLBiUZ6SK6A==}
+    engines: {node: '>=12'}
+    dependencies:
+      delaunator: 5.0.1
+    dev: false
+
+  /d3-dispatch@3.0.1:
+    resolution: {integrity: sha512-rzUyPU/S7rwUflMyLc1ETDeBj0NRuHKKAcvukozwhshr6g6c5d8zh4c2gQjY2bZ0dXeGLWc1PF174P2tVvKhfg==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-drag@3.0.0:
+    resolution: {integrity: sha512-pWbUJLdETVA8lQNJecMxoXfH6x+mO2UQo8rSmZ+QqxcbyA3hfeprFgIT//HW2nlHChWeIIMwS2Fq+gEARkhTkg==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-dispatch: 3.0.1
+      d3-selection: 3.0.0
+    dev: false
+
+  /d3-dsv@3.0.1:
+    resolution: {integrity: sha512-UG6OvdI5afDIFP9w4G0mNq50dSOsXHJaRE8arAS5o9ApWnIElp8GZw1Dun8vP8OyHOZ/QJUKUJwxiiCCnUwm+Q==}
+    engines: {node: '>=12'}
+    hasBin: true
+    dependencies:
+      commander: 7.2.0
+      iconv-lite: 0.6.3
+      rw: 1.3.3
+    dev: false
+
+  /d3-ease@3.0.1:
+    resolution: {integrity: sha512-wR/XK3D3XcLIZwpbvQwQ5fK+8Ykds1ip7A2Txe0yxncXSdq1L9skcG7blcedkOX+ZcgxGAmLX1FrRGbADwzi0w==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-fetch@3.0.1:
+    resolution: {integrity: sha512-kpkQIM20n3oLVBKGg6oHrUchHM3xODkTzjMoj7aWQFq5QEM+R6E4WkzT5+tojDY7yjez8KgCBRoj4aEr99Fdqw==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-dsv: 3.0.1
+    dev: false
+
+  /d3-force@3.0.0:
+    resolution: {integrity: sha512-zxV/SsA+U4yte8051P4ECydjD/S+qeYtnaIyAs9tgHCqfguma/aAQDjo85A9Z6EKhBirHRJHXIgJUlffT4wdLg==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-dispatch: 3.0.1
+      d3-quadtree: 3.0.1
+      d3-timer: 3.0.1
+    dev: false
+
+  /d3-format@3.1.0:
+    resolution: {integrity: sha512-YyUI6AEuY/Wpt8KWLgZHsIU86atmikuoOmCfommt0LYHiQSPjvX2AcFc38PX0CBpr2RCyZhjex+NS/LPOv6YqA==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-geo@3.1.1:
+    resolution: {integrity: sha512-637ln3gXKXOwhalDzinUgY83KzNWZRKbYubaG+fGVuc/dxO64RRljtCTnf5ecMyE1RIdtqpkVcq0IbtU2S8j2Q==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-array: 3.2.4
+    dev: false
+
+  /d3-hierarchy@3.1.2:
+    resolution: {integrity: sha512-FX/9frcub54beBdugHjDCdikxThEqjnR93Qt7PvQTOHxyiNCAlvMrHhclk3cD5VeAaq9fxmfRp+CnWw9rEMBuA==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-interpolate@3.0.1:
+    resolution: {integrity: sha512-3bYs1rOD33uo8aqJfKP3JWPAibgw8Zm2+L9vBKEHJ2Rg+viTR7o5Mmv5mZcieN+FRYaAOWX5SJATX6k1PWz72g==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-color: 3.1.0
+    dev: false
+
+  /d3-path@1.0.9:
+    resolution: {integrity: sha512-VLaYcn81dtHVTjEHd8B+pbe9yHWpXKZUC87PzoFmsFrJqgFwDe/qxfp5MlfsfM1V5E/iVt0MmEbWQ7FVIXh/bg==}
+    dev: false
+
+  /d3-path@3.1.0:
+    resolution: {integrity: sha512-p3KP5HCf/bvjBSSKuXid6Zqijx7wIfNW+J/maPs+iwR35at5JCbLUT0LzF1cnjbCHWhqzQTIN2Jpe8pRebIEFQ==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-polygon@3.0.1:
+    resolution: {integrity: sha512-3vbA7vXYwfe1SYhED++fPUQlWSYTTGmFmQiany/gdbiWgU/iEyQzyymwL9SkJjFFuCS4902BSzewVGsHHmHtXg==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-quadtree@3.0.1:
+    resolution: {integrity: sha512-04xDrxQTDTCFwP5H6hRhsRcb9xxv2RzkcsygFzmkSIOJy3PeRJP7sNk3VRIbKXcog561P9oU0/rVH6vDROAgUw==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-random@3.0.1:
+    resolution: {integrity: sha512-FXMe9GfxTxqd5D6jFsQ+DJ8BJS4E/fT5mqqdjovykEB2oFbTMDVdg1MGFxfQW+FBOGoB++k8swBrgwSHT1cUXQ==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-sankey@0.12.3:
+    resolution: {integrity: sha512-nQhsBRmM19Ax5xEIPLMY9ZmJ/cDvd1BG3UVvt5h3WRxKg5zGRbvnteTyWAbzeSvlh3tW7ZEmq4VwR5mB3tutmQ==}
+    dependencies:
+      d3-array: 2.12.1
+      d3-shape: 1.3.7
+    dev: false
+
+  /d3-scale-chromatic@3.1.0:
+    resolution: {integrity: sha512-A3s5PWiZ9YCXFye1o246KoscMWqf8BsD9eRiJ3He7C9OBaxKhAd5TFCdEx/7VbKtxxTsu//1mMJFrEt572cEyQ==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-color: 3.1.0
+      d3-interpolate: 3.0.1
+    dev: false
+
+  /d3-scale@4.0.2:
+    resolution: {integrity: sha512-GZW464g1SH7ag3Y7hXjf8RoUuAFIqklOAq3MRl4OaWabTFJY9PN/E1YklhXLh+OQ3fM9yS2nOkCoS+WLZ6kvxQ==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-array: 3.2.4
+      d3-format: 3.1.0
+      d3-interpolate: 3.0.1
+      d3-time: 3.1.0
+      d3-time-format: 4.1.0
+    dev: false
+
+  /d3-selection@3.0.0:
+    resolution: {integrity: sha512-fmTRWbNMmsmWq6xJV8D19U/gw/bwrHfNXxrIN+HfZgnzqTHp9jOmKMhsTUjXOJnZOdZY9Q28y4yebKzqDKlxlQ==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-shape@1.3.7:
+    resolution: {integrity: sha512-EUkvKjqPFUAZyOlhY5gzCxCeI0Aep04LwIRpsZ/mLFelJiUfnK56jo5JMDSE7yyP2kLSb6LtF+S5chMk7uqPqw==}
+    dependencies:
+      d3-path: 1.0.9
+    dev: false
+
+  /d3-shape@3.2.0:
+    resolution: {integrity: sha512-SaLBuwGm3MOViRq2ABk3eLoxwZELpH6zhl3FbAoJ7Vm1gofKx6El1Ib5z23NUEhF9AsGl7y+dzLe5Cw2AArGTA==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-path: 3.1.0
+    dev: false
+
+  /d3-time-format@4.1.0:
+    resolution: {integrity: sha512-dJxPBlzC7NugB2PDLwo9Q8JiTR3M3e4/XANkreKSUxF8vvXKqm1Yfq4Q5dl8budlunRVlUUaDUgFt7eA8D6NLg==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-time: 3.1.0
+    dev: false
+
+  /d3-time@3.1.0:
+    resolution: {integrity: sha512-VqKjzBLejbSMT4IgbmVgDjpkYrNWUYJnbCGo874u7MMKIWsILRX+OpX/gTk8MqjpT1A/c6HY2dCA77ZN0lkQ2Q==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-array: 3.2.4
+    dev: false
+
+  /d3-timer@3.0.1:
+    resolution: {integrity: sha512-ndfJ/JxxMd3nw31uyKoY2naivF+r29V+Lc0svZxe1JvvIRmi8hUsrMvdOwgS1o6uBHmiz91geQ0ylPP0aj1VUA==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /d3-transition@3.0.1(d3-selection@3.0.0):
+    resolution: {integrity: sha512-ApKvfjsSR6tg06xrL434C0WydLr7JewBB3V+/39RMHsaXTOG0zmt/OAXeng5M5LBm0ojmxJrpomQVZ1aPvBL4w==}
+    engines: {node: '>=12'}
+    peerDependencies:
+      d3-selection: 2 - 3
+    dependencies:
+      d3-color: 3.1.0
+      d3-dispatch: 3.0.1
+      d3-ease: 3.0.1
+      d3-interpolate: 3.0.1
+      d3-selection: 3.0.0
+      d3-timer: 3.0.1
+    dev: false
+
+  /d3-zoom@3.0.0:
+    resolution: {integrity: sha512-b8AmV3kfQaqWAuacbPuNbL6vahnOJflOhexLzMMNLga62+/nh0JzvJ0aO/5a5MVgUFGS7Hu1P9P03o3fJkDCyw==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-dispatch: 3.0.1
+      d3-drag: 3.0.0
+      d3-interpolate: 3.0.1
+      d3-selection: 3.0.0
+      d3-transition: 3.0.1(d3-selection@3.0.0)
+    dev: false
+
+  /d3@7.9.0:
+    resolution: {integrity: sha512-e1U46jVP+w7Iut8Jt8ri1YsPOvFpg46k+K8TpCb0P+zjCkjkPnV7WzfDJzMHy1LnA+wj5pLT1wjO901gLXeEhA==}
+    engines: {node: '>=12'}
+    dependencies:
+      d3-array: 3.2.4
+      d3-axis: 3.0.0
+      d3-brush: 3.0.0
+      d3-chord: 3.0.1
+      d3-color: 3.1.0
+      d3-contour: 4.0.2
+      d3-delaunay: 6.0.4
+      d3-dispatch: 3.0.1
+      d3-drag: 3.0.0
+      d3-dsv: 3.0.1
+      d3-ease: 3.0.1
+      d3-fetch: 3.0.1
+      d3-force: 3.0.0
+      d3-format: 3.1.0
+      d3-geo: 3.1.1
+      d3-hierarchy: 3.1.2
+      d3-interpolate: 3.0.1
+      d3-path: 3.1.0
+      d3-polygon: 3.0.1
+      d3-quadtree: 3.0.1
+      d3-random: 3.0.1
+      d3-scale: 4.0.2
+      d3-scale-chromatic: 3.1.0
+      d3-selection: 3.0.0
+      d3-shape: 3.2.0
+      d3-time: 3.1.0
+      d3-time-format: 4.1.0
+      d3-timer: 3.0.1
+      d3-transition: 3.0.1(d3-selection@3.0.0)
+      d3-zoom: 3.0.0
+    dev: false
+
+  /dagre-d3-es@7.0.11:
+    resolution: {integrity: sha512-tvlJLyQf834SylNKax8Wkzco/1ias1OPw8DcUMDE7oUIoSEW25riQVuiu/0OWEFqT0cxHT3Pa9/D82Jr47IONw==}
+    dependencies:
+      d3: 7.9.0
+      lodash-es: 4.17.21
+    dev: false
+
+  /dayjs@1.11.13:
+    resolution: {integrity: sha512-oaMBel6gjolK862uaPQOVTA7q3TZhuSvuMQAAglQDOWYO9A91IrAOUJEyKVlqJlHE0vq5p5UXxzdPfMH/x6xNg==}
+    dev: false
+
+  /debug@2.6.9:
+    resolution: {integrity: sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA==}
+    peerDependencies:
+      supports-color: '*'
+    peerDependenciesMeta:
+      supports-color:
+        optional: true
+    dependencies:
+      ms: 2.0.0
+    dev: false
+
+  /debug@4.4.1:
+    resolution: {integrity: sha512-KcKCqiftBJcZr++7ykoDIEwSa3XWowTfNPo92BYxjXiyYEVrUQh2aLyhxBCwww+heortUFxEJYcRzosstTEBYQ==}
+    engines: {node: '>=6.0'}
+    peerDependencies:
+      supports-color: '*'
+    peerDependenciesMeta:
+      supports-color:
+        optional: true
+    dependencies:
+      ms: 2.1.3
+    dev: false
+
+  /decode-named-character-reference@1.2.0:
+    resolution: {integrity: sha512-c6fcElNV6ShtZXmsgNgFFV5tVX2PaV4g+MOAkb8eXHvn6sryJBrZa9r0zV6+dtTyoCKxtDy5tyQ5ZwQuidtd+Q==}
+    dependencies:
+      character-entities: 2.0.2
+    dev: false
+
+  /dedent@1.6.0:
+    resolution: {integrity: sha512-F1Z+5UCFpmQUzJa11agbyPVMbpgT/qA3/SKyJ1jyBgm7dUcUEa8v9JwDkerSQXfakBwFljIxhOJqGkjUwZ9FSA==}
+    peerDependencies:
+      babel-plugin-macros: ^3.1.0
+    peerDependenciesMeta:
+      babel-plugin-macros:
+        optional: true
+    dev: false
+
+  /deep-object-diff@1.1.9:
+    resolution: {integrity: sha512-Rn+RuwkmkDwCi2/oXOFS9Gsr5lJZu/yTGpK7wAaAIE75CC+LCGEZHpY6VQJa/RoJcrmaA/docWJZvYohlNkWPA==}
+    dev: false
+
+  /deepmerge@4.3.1:
+    resolution: {integrity: sha512-3sUqbMEc77XqpdNO7FRyRog+eW3ph+GYCbj+rK+uYyRMuwsVy0rMiVtPn+QJlKFvWP/1PYpapqYn0Me2knFn+A==}
+    engines: {node: '>=0.10.0'}
+    dev: false
+
+  /define-data-property@1.1.4:
+    resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      es-define-property: 1.0.1
+      es-errors: 1.3.0
+      gopd: 1.2.0
+    dev: true
+
+  /delaunator@5.0.1:
+    resolution: {integrity: sha512-8nvh+XBe96aCESrGOqMp/84b13H9cdKbG5P2ejQCh4d4sK9RL4371qou9drQjMhvnPmhWl5hnmqbEE0fXr9Xnw==}
+    dependencies:
+      robust-predicates: 3.0.2
+    dev: false
+
+  /depd@2.0.0:
+    resolution: {integrity: sha512-g7nH6P6dyDioJogAAGprGpCtVImJhpPk/roCzdb3fIh61/s/nPsfR6onyMwkCAR/OlC3yBC0lESvUoQEAssIrw==}
+    engines: {node: '>= 0.8'}
+    dev: false
+
+  /dequal@2.0.3:
+    resolution: {integrity: sha512-0je+qPKHEMohvfRTCEo3CrPG6cAzAYgmzKyxRiYSSDkS6eGJdyVJm7WaYA5ECaAD9wLB2T4EEeymA5aFVcYXCA==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /destroy@1.2.0:
+    resolution: {integrity: sha512-2sJGJTaXIIaR1w4iJSNoN0hnMY7Gpc/n8D4qSCJw8QqFWXf7cuAgnEHxBpweaVcPevC2l3KpjYCx3NypQQgaJg==}
+    engines: {node: '>= 0.8', npm: 1.2.8000 || >= 1.4.16}
+    dev: false
+
+  /detect-libc@2.0.4:
+    resolution: {integrity: sha512-3UDv+G9CsCKO1WKMGw9fwq/SWJYbI0c5Y7LU1AXYoDdbhE2AHQ6N6Nb34sG8Fj7T5APy8qXDCKuuIHd1BR0tVA==}
+    engines: {node: '>=8'}
+    dev: false
+
+  /detect-node-es@1.1.0:
+    resolution: {integrity: sha512-ypdmJU/TbBby2Dxibuv7ZLW3Bs1QEmM7nHjEANfohJLvE0XVujisn1qPJcZxg+qDucsr+bP6fLD1rPS3AhJ7EQ==}
+    dev: false
+
+  /detect-package-manager@3.0.2:
+    resolution: {integrity: sha512-8JFjJHutStYrfWwzfretQoyNGoZVW1Fsrp4JO9spa7h/fBfwgTMEIy4/LBzRDGsxwVPHU0q+T9YvwLDJoOApLQ==}
+    engines: {node: '>=12'}
+    dependencies:
+      execa: 5.1.1
+    dev: false
+
+  /devlop@1.1.0:
+    resolution: {integrity: sha512-RWmIqhcFf1lRYBvNmr7qTNuyCt/7/ns2jbpp1+PalgE/rDQcBT0fioSMUpJ93irlUhC5hrg4cYqe6U+0ImW0rA==}
+    dependencies:
+      dequal: 2.0.3
+    dev: false
+
+  /direction@2.0.1:
+    resolution: {integrity: sha512-9S6m9Sukh1cZNknO1CWAr2QAWsbKLafQiyM5gZ7VgXHeuaoUwffKN4q6NC4A/Mf9iiPlOXQEKW/Mv/mh9/3YFA==}
+    hasBin: true
+    dev: false
+
+  /dompurify@3.2.6:
+    resolution: {integrity: sha512-/2GogDQlohXPZe6D6NOgQvXLPSYBqIWMnZ8zzOhn09REE4eyAzb+Hed3jhoM9OkuaJ8P6ZGTTVWQKAi8ieIzfQ==}
+    optionalDependencies:
+      '@types/trusted-types': 2.0.7
+    dev: false
+
+  /dunder-proto@1.0.1:
+    resolution: {integrity: sha512-KIN/nDJBQRcXw0MLVhZE9iQHmG68qAVIBg9CqmUYjmQIhgij9U5MFvrqkUL5FbtyyzZuOeOt0zdeRe4UY7ct+A==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-errors: 1.3.0
+      gopd: 1.2.0
+    dev: true
+
+  /eastasianwidth@0.2.0:
+    resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
+
+  /ee-first@1.1.1:
+    resolution: {integrity: sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==}
+    dev: false
+
+  /electron-to-chromium@1.5.203:
+    resolution: {integrity: sha512-uz4i0vLhfm6dLZWbz/iH88KNDV+ivj5+2SA+utpgjKaj9Q0iDLuwk6Idhe9BTxciHudyx6IvTvijhkPvFGUQ0g==}
+    dev: false
+
+  /emoji-regex-xs@1.0.0:
+    resolution: {integrity: sha512-LRlerrMYoIDrT6jgpeZ2YYl/L8EulRTt5hQcYjy5AInh7HWXKimpqx68aknBFpGL2+/IcogTcaydJEgaTmOpDg==}
+    dev: false
+
+  /emoji-regex@10.4.0:
+    resolution: {integrity: sha512-EC+0oUMY1Rqm4O6LLrgjtYDvcVYTy7chDnM4Q7030tP4Kwj3u/pR6gP9ygnp2CJMK5Gq+9Q2oqmrFJAz01DXjw==}
+    dev: false
+
+  /emoji-regex@8.0.0:
+    resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==}
+    dev: true
+
+  /emoji-regex@9.2.2:
+    resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
+    dev: true
+
+  /encodeurl@1.0.2:
+    resolution: {integrity: sha512-TPJXq8JqFaVYm2CWmPvnP2Iyo4ZSM7/QKcSmuMLDObfpH5fi7RUGmd/rTDf+rut/saiDiQEeVTNgAmJEdAOx0w==}
+    engines: {node: '>= 0.8'}
+    dev: false
+
+  /encodeurl@2.0.0:
+    resolution: {integrity: sha512-Q0n9HRi4m6JuGIV1eFlmvJB7ZEVxu93IrMyiMsGC0lrMJMWzRgx6WGquyfQgZVb31vhGgXnfmPNNXmxnOkRBrg==}
+    engines: {node: '>= 0.8'}
+    dev: false
+
+  /enhanced-resolve@5.18.3:
+    resolution: {integrity: sha512-d4lC8xfavMeBjzGr2vECC3fsGXziXZQyJxD868h2M/mBI3PwAuODxAkLkq5HYuvrPYcUtiLzsTo8U3PgX3Ocww==}
+    engines: {node: '>=10.13.0'}
+    dependencies:
+      graceful-fs: 4.2.11
+      tapable: 2.2.2
+    dev: false
+
+  /entities@6.0.1:
+    resolution: {integrity: sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g==}
+    engines: {node: '>=0.12'}
+    dev: false
+
+  /es-define-property@1.0.1:
+    resolution: {integrity: sha512-e3nRfgfUZ4rNGL232gUgX06QNyyez04KdjFrF+LTRoOXmrOgFKDg4BCdsjW8EnT69eqdYGmRpJwiPVYNrCaW3g==}
+    engines: {node: '>= 0.4'}
+    dev: true
+
+  /es-errors@1.3.0:
+    resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==}
+    engines: {node: '>= 0.4'}
+    dev: true
+
+  /es-module-lexer@1.7.0:
+    resolution: {integrity: sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA==}
+    dev: false
+
+  /es-object-atoms@1.1.1:
+    resolution: {integrity: sha512-FGgH2h8zKNim9ljj7dankFPcICIK9Cp5bm+c2gQSYePhpaG5+esrLODihIorn+Pe6FGJzWhXQotPv73jTaldXA==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      es-errors: 1.3.0
+    dev: true
+
+  /esast-util-from-estree@2.0.0:
+    resolution: {integrity: sha512-4CyanoAudUSBAn5K13H4JhsMH6L9ZP7XbLVe/dKybkxMO7eDyLsT8UHl9TRNrU2Gr9nz+FovfSIjuXWJ81uVwQ==}
+    dependencies:
+      '@types/estree-jsx': 1.0.5
+      devlop: 1.1.0
+      estree-util-visit: 2.0.0
+      unist-util-position-from-estree: 2.0.0
+    dev: false
+
+  /esast-util-from-js@2.0.1:
+    resolution: {integrity: sha512-8Ja+rNJ0Lt56Pcf3TAmpBZjmx8ZcK5Ts4cAzIOjsjevg9oSXJnl6SUQ2EevU8tv3h6ZLWmoKL5H4fgWvdvfETw==}
+    dependencies:
+      '@types/estree-jsx': 1.0.5
+      acorn: 8.15.0
+      esast-util-from-estree: 2.0.0
+      vfile-message: 4.0.3
+    dev: false
+
+  /esbuild@0.25.9:
+    resolution: {integrity: sha512-CRbODhYyQx3qp7ZEwzxOk4JBqmD/seJrzPa/cGjY1VtIn5E09Oi9/dB4JwctnfZ8Q8iT7rioVv5k/FNT/uf54g==}
+    engines: {node: '>=18'}
+    hasBin: true
+    requiresBuild: true
+    optionalDependencies:
+      '@esbuild/aix-ppc64': 0.25.9
+      '@esbuild/android-arm': 0.25.9
+      '@esbuild/android-arm64': 0.25.9
+      '@esbuild/android-x64': 0.25.9
+      '@esbuild/darwin-arm64': 0.25.9
+      '@esbuild/darwin-x64': 0.25.9
+      '@esbuild/freebsd-arm64': 0.25.9
+      '@esbuild/freebsd-x64': 0.25.9
+      '@esbuild/linux-arm': 0.25.9
+      '@esbuild/linux-arm64': 0.25.9
+      '@esbuild/linux-ia32': 0.25.9
+      '@esbuild/linux-loong64': 0.25.9
+      '@esbuild/linux-mips64el': 0.25.9
+      '@esbuild/linux-ppc64': 0.25.9
+      '@esbuild/linux-riscv64': 0.25.9
+      '@esbuild/linux-s390x': 0.25.9
+      '@esbuild/linux-x64': 0.25.9
+      '@esbuild/netbsd-arm64': 0.25.9
+      '@esbuild/netbsd-x64': 0.25.9
+      '@esbuild/openbsd-arm64': 0.25.9
+      '@esbuild/openbsd-x64': 0.25.9
+      '@esbuild/openharmony-arm64': 0.25.9
+      '@esbuild/sunos-x64': 0.25.9
+      '@esbuild/win32-arm64': 0.25.9
+      '@esbuild/win32-ia32': 0.25.9
+      '@esbuild/win32-x64': 0.25.9
+    dev: false
+
+  /escalade@3.2.0:
+    resolution: {integrity: sha512-WUj2qlxaQtO4g6Pq5c29GTcWGDyd8itL8zTlipgECz3JesAiiOKotd8JU6otB3PACgG6xkJUyVhboMS+bje/jA==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /escape-html@1.0.3:
+    resolution: {integrity: sha512-NiSupZ4OeuGwr68lGIeym/ksIZMJodUGOSCZ/FSnTxcrekbvqrgdUxlJOMpijaKZVjAJrWrGs/6Jy8OMuyj9ow==}
+    dev: false
+
+  /escape-string-regexp@5.0.0:
+    resolution: {integrity: sha512-/veY75JbMK4j1yjvuUxuVsiS/hr/4iHs9FTT6cgTexxdE0Ly/glccBAkloH/DofkjRbZU3bnoj38mOmhkZ0lHw==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /estree-util-attach-comments@3.0.0:
+    resolution: {integrity: sha512-cKUwm/HUcTDsYh/9FgnuFqpfquUbwIqwKM26BVCGDPVgvaCl/nDCCjUfiLlx6lsEZ3Z4RFxNbOQ60pkaEwFxGw==}
+    dependencies:
+      '@types/estree': 1.0.8
+    dev: false
+
+  /estree-util-build-jsx@3.0.1:
+    resolution: {integrity: sha512-8U5eiL6BTrPxp/CHbs2yMgP8ftMhR5ww1eIKoWRMlqvltHF8fZn5LRDvTKuxD3DUn+shRbLGqXemcP51oFCsGQ==}
+    dependencies:
+      '@types/estree-jsx': 1.0.5
+      devlop: 1.1.0
+      estree-util-is-identifier-name: 3.0.0
+      estree-walker: 3.0.3
+    dev: false
+
+  /estree-util-is-identifier-name@3.0.0:
+    resolution: {integrity: sha512-hFtqIDZTIUZ9BXLb8y4pYGyk6+wekIivNVTcmvk8NoOh+VeRn5y6cEHzbURrWbfp1fIqdVipilzj+lfaadNZmg==}
+    dev: false
+
+  /estree-util-scope@1.0.0:
+    resolution: {integrity: sha512-2CAASclonf+JFWBNJPndcOpA8EMJwa0Q8LUFJEKqXLW6+qBvbFZuF5gItbQOs/umBUkjviCSDCbBwU2cXbmrhQ==}
+    dependencies:
+      '@types/estree': 1.0.8
+      devlop: 1.1.0
+    dev: false
+
+  /estree-util-to-js@2.0.0:
+    resolution: {integrity: sha512-WDF+xj5rRWmD5tj6bIqRi6CkLIXbbNQUcxQHzGysQzvHmdYG2G7p/Tf0J0gpxGgkeMZNTIjT/AoSvC9Xehcgdg==}
+    dependencies:
+      '@types/estree-jsx': 1.0.5
+      astring: 1.9.0
+      source-map: 0.7.6
+    dev: false
+
+  /estree-util-value-to-estree@3.4.0:
+    resolution: {integrity: sha512-Zlp+gxis+gCfK12d3Srl2PdX2ybsEA8ZYy6vQGVQTNNYLEGRQQ56XB64bjemN8kxIKXP1nC9ip4Z+ILy9LGzvQ==}
+    dependencies:
+      '@types/estree': 1.0.8
+    dev: false
+
+  /estree-util-visit@2.0.0:
+    resolution: {integrity: sha512-m5KgiH85xAhhW8Wta0vShLcUvOsh3LLPI2YVwcbio1l7E09NTLL1EyMZFM1OyWowoH0skScNbhOPl4kcBgzTww==}
+    dependencies:
+      '@types/estree-jsx': 1.0.5
+      '@types/unist': 3.0.3
+    dev: false
+
+  /estree-walker@2.0.2:
+    resolution: {integrity: sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w==}
+    dev: false
+
+  /estree-walker@3.0.3:
+    resolution: {integrity: sha512-7RUKfXgSMMkzt6ZuXmqapOurLGPPfgj6l9uRZ7lRGolvk0y2yocc35LdcxKC5PQZdn2DMqioAQ2NoWcrTKmm6g==}
+    dependencies:
+      '@types/estree': 1.0.8
+    dev: false
+
+  /etag@1.8.1:
+    resolution: {integrity: sha512-aIL5Fx7mawVa300al2BnEE4iNvo1qETxLrPI/o05L7z6go7fCw1J6EQmbK4FmJ2AS7kgVF/KEZWufBfdClMcPg==}
+    engines: {node: '>= 0.6'}
+    dev: false
+
+  /eval@0.1.8:
+    resolution: {integrity: sha512-EzV94NYKoO09GLXGjXj9JIlXijVck4ONSr5wiCWDvhsvj5jxSrzTmRU/9C1DyB6uToszLs8aifA6NQ7lEQdvFw==}
+    engines: {node: '>= 0.8'}
+    dependencies:
+      '@types/node': 24.3.0
+      require-like: 0.1.2
+    dev: false
+
+  /execa@5.1.1:
+    resolution: {integrity: sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==}
+    engines: {node: '>=10'}
+    dependencies:
+      cross-spawn: 7.0.6
+      get-stream: 6.0.1
+      human-signals: 2.1.0
+      is-stream: 2.0.1
+      merge-stream: 2.0.0
+      npm-run-path: 4.0.1
+      onetime: 5.1.2
+      signal-exit: 3.0.7
+      strip-final-newline: 2.0.0
+    dev: false
+
+  /exsolve@1.0.7:
+    resolution: {integrity: sha512-VO5fQUzZtI6C+vx4w/4BWJpg3s/5l+6pRQEHzFRM8WFi4XffSP1Z+4qi7GbjWbvRQEbdIco5mIMq+zX4rPuLrw==}
+    dev: false
+
+  /extend@3.0.2:
+    resolution: {integrity: sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==}
+    dev: false
+
+  /fast-glob@3.3.3:
+    resolution: {integrity: sha512-7MptL8U0cqcFdzIzwOTHoilX9x5BrNqye7Z/LuC7kCMRio1EMSyqRK3BEAUD7sXRq4iT4AzTVuZdhgQ2TCvYLg==}
+    engines: {node: '>=8.6.0'}
+    dependencies:
+      '@nodelib/fs.stat': 2.0.5
+      '@nodelib/fs.walk': 1.2.8
+      glob-parent: 5.1.2
+      merge2: 1.4.1
+      micromatch: 4.0.8
+    dev: false
+
+  /fastq@1.19.1:
+    resolution: {integrity: sha512-GwLTyxkCXjXbxqIhTsMI2Nui8huMPtnxg7krajPJAjnEG/iiOS7i+zCtWGZR9G0NBKbXKh6X9m9UIsYX/N6vvQ==}
+    dependencies:
+      reusify: 1.1.0
+    dev: false
+
+  /fault@2.0.1:
+    resolution: {integrity: sha512-WtySTkS4OKev5JtpHXnib4Gxiurzh5NCGvWrFaZ34m6JehfTUhKZvn9njTfw48t6JumVQOmrKqpmGcdwxnhqBQ==}
+    dependencies:
+      format: 0.2.2
+    dev: false
+
+  /fdir@6.5.0(picomatch@4.0.3):
+    resolution: {integrity: sha512-tIbYtZbucOs0BRGqPJkshJUYdL+SDH7dVM8gjy+ERp3WAUjLEFJE+02kanyHtwjWOnwrKYBiwAmM0p4kLJAnXg==}
+    engines: {node: '>=12.0.0'}
+    peerDependencies:
+      picomatch: ^3 || ^4
+    peerDependenciesMeta:
+      picomatch:
+        optional: true
+    dependencies:
+      picomatch: 4.0.3
+    dev: false
+
+  /fill-range@7.1.1:
+    resolution: {integrity: sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==}
+    engines: {node: '>=8'}
+    dependencies:
+      to-regex-range: 5.0.1
+
+  /find-up@5.0.0:
+    resolution: {integrity: sha512-78/PXT1wlLLDgTzDs7sjq9hzz0vXD+zn+7wypEe4fXQxCmdmqfGsEPQxmiCSQI3ajFV91bVSsvNtrJRiW6nGng==}
+    engines: {node: '>=10'}
+    dependencies:
+      locate-path: 6.0.0
+      path-exists: 4.0.0
+    dev: false
+
+  /find-yarn-workspace-root@2.0.0:
+    resolution: {integrity: sha512-1IMnbjt4KzsQfnhnzNd8wUEgXZ44IzZaZmnLYx7D5FZlaHt2gW20Cri8Q+E/t5tIj4+epTBub+2Zxu/vNILzqQ==}
+    dependencies:
+      micromatch: 4.0.8
+    dev: true
+
+  /foreground-child@3.3.1:
+    resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==}
+    engines: {node: '>=14'}
+    dependencies:
+      cross-spawn: 7.0.6
+      signal-exit: 4.1.0
+    dev: true
+
+  /format@0.2.2:
+    resolution: {integrity: sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww==}
+    engines: {node: '>=0.4.x'}
+    dev: false
+
+  /fraction.js@4.3.7:
+    resolution: {integrity: sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==}
+    dev: false
+
+  /fresh@0.5.2:
+    resolution: {integrity: sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q==}
+    engines: {node: '>= 0.6'}
+    dev: false
+
+  /fs-extra@11.3.1:
+    resolution: {integrity: sha512-eXvGGwZ5CL17ZSwHWd3bbgk7UUpF6IFHtP57NYYakPvHOs8GDgDe5KJI36jIJzDkJ6eJjuzRA8eBQb6SkKue0g==}
+    engines: {node: '>=14.14'}
+    dependencies:
+      graceful-fs: 4.2.11
+      jsonfile: 6.2.0
+      universalify: 2.0.1
+    dev: false
+
+  /fs-extra@9.1.0:
+    resolution: {integrity: sha512-hcg3ZmepS30/7BSFqRvoo3DOMQu7IjqxO5nCDt+zM9XWjb33Wg7ziNT+Qvqbuc3+gWpzO02JubVyk2G4Zvo1OQ==}
+    engines: {node: '>=10'}
+    dependencies:
+      at-least-node: 1.0.0
+      graceful-fs: 4.2.11
+      jsonfile: 6.2.0
+      universalify: 2.0.1
+    dev: true
+
+  /fs.realpath@1.0.0:
+    resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==}
+    dev: true
+
+  /fsevents@2.3.2:
+    resolution: {integrity: sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==}
+    engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /fsevents@2.3.3:
+    resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
+    engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /function-bind@1.1.2:
+    resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
+    dev: true
+
+  /gensync@1.0.0-beta.2:
+    resolution: {integrity: sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg==}
+    engines: {node: '>=6.9.0'}
+    dev: false
+
+  /get-intrinsic@1.3.0:
+    resolution: {integrity: sha512-9fSjSaos/fRIVIp+xSJlE6lfwhES7LNtKaCBIamHsjr2na1BiABJPo0mOjjz8GJDURarmCPGqaiVg5mfjb98CQ==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      call-bind-apply-helpers: 1.0.2
+      es-define-property: 1.0.1
+      es-errors: 1.3.0
+      es-object-atoms: 1.1.1
+      function-bind: 1.1.2
+      get-proto: 1.0.1
+      gopd: 1.2.0
+      has-symbols: 1.1.0
+      hasown: 2.0.2
+      math-intrinsics: 1.1.0
+    dev: true
+
+  /get-nonce@1.0.1:
+    resolution: {integrity: sha512-FJhYRoDaiatfEkUK8HKlicmu/3SGFD51q3itKDGoSTysQJBnfOcxU5GxnhE1E6soB76MbT0MBtnKJuXyAx+96Q==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /get-proto@1.0.1:
+    resolution: {integrity: sha512-sTSfBjoXBp89JvIKIefqw7U2CCebsc74kiY6awiGogKtoSGbgjYE/G/+l9sF3MWFPNc9IcoOC4ODfKHfxFmp0g==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      dunder-proto: 1.0.1
+      es-object-atoms: 1.1.1
+    dev: true
+
+  /get-stream@6.0.1:
+    resolution: {integrity: sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==}
+    engines: {node: '>=10'}
+    dev: false
+
+  /github-slugger@2.0.0:
+    resolution: {integrity: sha512-IaOQ9puYtjrkq7Y0Ygl9KDZnrf/aiUJYUpVf89y8kyaxbRG7Y1SrX/jaumrv81vc61+kiMempujsM3Yw7w5qcw==}
+    dev: false
+
+  /glob-parent@5.1.2:
+    resolution: {integrity: sha512-AOIgSQCepiJYwP3ARnGx+5VnTu2HBYdzbGP45eLw1vr3zB3vZLeyed1sC9hnbcOc9/SrMyM5RPQrkGz4aS9Zow==}
+    engines: {node: '>= 6'}
+    dependencies:
+      is-glob: 4.0.3
+    dev: false
+
+  /glob@11.0.3:
+    resolution: {integrity: sha512-2Nim7dha1KVkaiF4q6Dj+ngPPMdfvLJEOpZk/jKiUAkqKebpGAWQXAq9z1xu9HKu5lWfqw/FASuccEjyznjPaA==}
+    engines: {node: 20 || >=22}
+    hasBin: true
+    dependencies:
+      foreground-child: 3.3.1
+      jackspeak: 4.1.1
+      minimatch: 10.0.3
+      minipass: 7.1.2
+      package-json-from-dist: 1.0.1
+      path-scurry: 2.0.0
+    dev: true
+
+  /glob@7.2.3:
+    resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==}
+    deprecated: Glob versions prior to v9 are no longer supported
+    dependencies:
+      fs.realpath: 1.0.0
+      inflight: 1.0.6
+      inherits: 2.0.4
+      minimatch: 3.1.2
+      once: 1.4.0
+      path-is-absolute: 1.0.1
+    dev: true
+
+  /globals@15.15.0:
+    resolution: {integrity: sha512-7ACyT3wmyp3I61S4fG682L0VA2RGD9otkqGJIwNUMF1SWUombIIk+af1unuDYgMm082aHYwD+mzJvv9Iu8dsgg==}
+    engines: {node: '>=18'}
+    dev: false
+
+  /globby@14.1.0:
+    resolution: {integrity: sha512-0Ia46fDOaT7k4og1PDW4YbodWWr3scS2vAr2lTbsplOt2WkKp0vQbkI9wKis/T5LV/dqPjO3bpS/z6GTJB82LA==}
+    engines: {node: '>=18'}
+    dependencies:
+      '@sindresorhus/merge-streams': 2.3.0
+      fast-glob: 3.3.3
+      ignore: 7.0.5
+      path-type: 6.0.0
+      slash: 5.1.0
+      unicorn-magic: 0.3.0
+    dev: false
+
+  /gopd@1.2.0:
+    resolution: {integrity: sha512-ZUKRh6/kUFoAiTAtTYPZJ3hw9wNxx+BIBOijnlG9PnrJsCcSjs1wyyD6vJpaYtgnzDrKYRSqf3OO6Rfa93xsRg==}
+    engines: {node: '>= 0.4'}
+    dev: true
+
+  /graceful-fs@4.2.11:
+    resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
+
+  /hachure-fill@0.5.2:
+    resolution: {integrity: sha512-3GKBOn+m2LX9iq+JC1064cSFprJY4jL1jCXTcpnfER5HYE2l/4EfWSGzkPa/ZDBmYI0ZOEj5VHV/eKnPGkHuOg==}
+    dev: false
+
+  /has-flag@4.0.0:
+    resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
+    engines: {node: '>=8'}
+    dev: true
+
+  /has-property-descriptors@1.0.2:
+    resolution: {integrity: sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==}
+    dependencies:
+      es-define-property: 1.0.1
+    dev: true
+
+  /has-symbols@1.1.0:
+    resolution: {integrity: sha512-1cDNdwJ2Jaohmb3sg4OmKaMBwuC48sYni5HUw2DvsC8LjGTLK9h+eb1X6RyuOHe4hT0ULCW68iomhjUoKUqlPQ==}
+    engines: {node: '>= 0.4'}
+    dev: true
+
+  /hasown@2.0.2:
+    resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      function-bind: 1.1.2
+    dev: true
+
+  /hast-util-classnames@3.0.0:
+    resolution: {integrity: sha512-tI3JjoGDEBVorMAWK4jNRsfLMYmih1BUOG3VV36pH36njs1IEl7xkNrVTD2mD2yYHmQCa5R/fj61a8IAF4bRaQ==}
+    dependencies:
+      '@types/hast': 3.0.4
+      space-separated-tokens: 2.0.2
+    dev: false
+
+  /hast-util-from-dom@5.0.1:
+    resolution: {integrity: sha512-N+LqofjR2zuzTjCPzyDUdSshy4Ma6li7p/c3pA78uTwzFgENbgbUrm2ugwsOdcjI1muO+o6Dgzp9p8WHtn/39Q==}
+    dependencies:
+      '@types/hast': 3.0.4
+      hastscript: 9.0.1
+      web-namespaces: 2.0.1
+    dev: false
+
+  /hast-util-from-html-isomorphic@2.0.0:
+    resolution: {integrity: sha512-zJfpXq44yff2hmE0XmwEOzdWin5xwH+QIhMLOScpX91e/NSGPsAzNCvLQDIEPyO2TXi+lBmU6hjLIhV8MwP2kw==}
+    dependencies:
+      '@types/hast': 3.0.4
+      hast-util-from-dom: 5.0.1
+      hast-util-from-html: 2.0.3
+      unist-util-remove-position: 5.0.0
+    dev: false
+
+  /hast-util-from-html@2.0.3:
+    resolution: {integrity: sha512-CUSRHXyKjzHov8yKsQjGOElXy/3EKpyX56ELnkHH34vDVw1N1XSQ1ZcAvTyAPtGqLTuKP/uxM+aLkSPqF/EtMw==}
+    dependencies:
+      '@types/hast': 3.0.4
+      devlop: 1.1.0
+      hast-util-from-parse5: 8.0.3
+      parse5: 7.3.0
+      vfile: 6.0.3
+      vfile-message: 4.0.3
+    dev: false
+
+  /hast-util-from-parse5@8.0.3:
+    resolution: {integrity: sha512-3kxEVkEKt0zvcZ3hCRYI8rqrgwtlIOFMWkbclACvjlDw8Li9S2hk/d51OI0nr/gIpdMHNepwgOKqZ/sy0Clpyg==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@types/unist': 3.0.3
+      devlop: 1.1.0
+      hastscript: 9.0.1
+      property-information: 7.1.0
+      vfile: 6.0.3
+      vfile-location: 5.0.3
+      web-namespaces: 2.0.1
+    dev: false
+
+  /hast-util-has-property@3.0.0:
+    resolution: {integrity: sha512-MNilsvEKLFpV604hwfhVStK0usFY/QmM5zX16bo7EjnAEGofr5YyI37kzopBlZJkHD4t887i+q/C8/tr5Q94cA==}
+    dependencies:
+      '@types/hast': 3.0.4
+    dev: false
+
+  /hast-util-heading-rank@3.0.0:
+    resolution: {integrity: sha512-EJKb8oMUXVHcWZTDepnr+WNbfnXKFNf9duMesmr4S8SXTJBJ9M4Yok08pu9vxdJwdlGRhVumk9mEhkEvKGifwA==}
+    dependencies:
+      '@types/hast': 3.0.4
+    dev: false
+
+  /hast-util-is-element@3.0.0:
+    resolution: {integrity: sha512-Val9mnv2IWpLbNPqc/pUem+a7Ipj2aHacCwgNfTiK0vJKl0LF+4Ba4+v1oPHFpf3bLYmreq0/l3Gud9S5OH42g==}
+    dependencies:
+      '@types/hast': 3.0.4
+    dev: false
+
+  /hast-util-parse-selector@4.0.0:
+    resolution: {integrity: sha512-wkQCkSYoOGCRKERFWcxMVMOcYE2K1AaNLU8DXS9arxnLOUEWbOXKXiJUNzEpqZ3JOKpnha3jkFrumEjVliDe7A==}
+    dependencies:
+      '@types/hast': 3.0.4
+    dev: false
+
+  /hast-util-select@6.0.4:
+    resolution: {integrity: sha512-RqGS1ZgI0MwxLaKLDxjprynNzINEkRHY2i8ln4DDjgv9ZhcYVIHN9rlpiYsqtFwrgpYU361SyWDQcGNIBVu3lw==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@types/unist': 3.0.3
+      bcp-47-match: 2.0.3
+      comma-separated-tokens: 2.0.3
+      css-selector-parser: 3.1.3
+      devlop: 1.1.0
+      direction: 2.0.1
+      hast-util-has-property: 3.0.0
+      hast-util-to-string: 3.0.1
+      hast-util-whitespace: 3.0.0
+      nth-check: 2.1.1
+      property-information: 7.1.0
+      space-separated-tokens: 2.0.2
+      unist-util-visit: 5.0.0
+      zwitch: 2.0.4
+    dev: false
+
+  /hast-util-to-estree@3.1.3:
+    resolution: {integrity: sha512-48+B/rJWAp0jamNbAAf9M7Uf//UVqAoMmgXhBdxTDJLGKY+LRnZ99qcG+Qjl5HfMpYNzS5v4EAwVEF34LeAj7w==}
+    dependencies:
+      '@types/estree': 1.0.8
+      '@types/estree-jsx': 1.0.5
+      '@types/hast': 3.0.4
+      comma-separated-tokens: 2.0.3
+      devlop: 1.1.0
+      estree-util-attach-comments: 3.0.0
+      estree-util-is-identifier-name: 3.0.0
+      hast-util-whitespace: 3.0.0
+      mdast-util-mdx-expression: 2.0.1
+      mdast-util-mdx-jsx: 3.2.0
+      mdast-util-mdxjs-esm: 2.0.1
+      property-information: 7.1.0
+      space-separated-tokens: 2.0.2
+      style-to-js: 1.1.17
+      unist-util-position: 5.0.0
+      zwitch: 2.0.4
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /hast-util-to-html@9.0.5:
+    resolution: {integrity: sha512-OguPdidb+fbHQSU4Q4ZiLKnzWo8Wwsf5bZfbvu7//a9oTYoqD/fWpe96NuHkoS9h0ccGOTe0C4NGXdtS0iObOw==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@types/unist': 3.0.3
+      ccount: 2.0.1
+      comma-separated-tokens: 2.0.3
+      hast-util-whitespace: 3.0.0
+      html-void-elements: 3.0.0
+      mdast-util-to-hast: 13.2.0
+      property-information: 7.1.0
+      space-separated-tokens: 2.0.2
+      stringify-entities: 4.0.4
+      zwitch: 2.0.4
+    dev: false
+
+  /hast-util-to-jsx-runtime@2.3.6:
+    resolution: {integrity: sha512-zl6s8LwNyo1P9uw+XJGvZtdFF1GdAkOg8ujOw+4Pyb76874fLps4ueHXDhXWdk6YHQ6OgUtinliG7RsYvCbbBg==}
+    dependencies:
+      '@types/estree': 1.0.8
+      '@types/hast': 3.0.4
+      '@types/unist': 3.0.3
+      comma-separated-tokens: 2.0.3
+      devlop: 1.1.0
+      estree-util-is-identifier-name: 3.0.0
+      hast-util-whitespace: 3.0.0
+      mdast-util-mdx-expression: 2.0.1
+      mdast-util-mdx-jsx: 3.2.0
+      mdast-util-mdxjs-esm: 2.0.1
+      property-information: 7.1.0
+      space-separated-tokens: 2.0.2
+      style-to-js: 1.1.17
+      unist-util-position: 5.0.0
+      vfile-message: 4.0.3
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /hast-util-to-string@3.0.1:
+    resolution: {integrity: sha512-XelQVTDWvqcl3axRfI0xSeoVKzyIFPwsAGSLIsKdJKQMXDYJS4WYrBNF/8J7RdhIcFI2BOHgAifggsvsxp/3+A==}
+    dependencies:
+      '@types/hast': 3.0.4
+    dev: false
+
+  /hast-util-to-text@4.0.2:
+    resolution: {integrity: sha512-KK6y/BN8lbaq654j7JgBydev7wuNMcID54lkRav1P0CaE1e47P72AWWPiGKXTJU271ooYzcvTAn/Zt0REnvc7A==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@types/unist': 3.0.3
+      hast-util-is-element: 3.0.0
+      unist-util-find-after: 5.0.0
+    dev: false
+
+  /hast-util-whitespace@3.0.0:
+    resolution: {integrity: sha512-88JUN06ipLwsnv+dVn+OIYOvAuvBMy/Qoi6O7mQHxdPXpjy+Cd6xRkWwux7DKO+4sYILtLBRIKgsdpS2gQc7qw==}
+    dependencies:
+      '@types/hast': 3.0.4
+    dev: false
+
+  /hastscript@8.0.0:
+    resolution: {integrity: sha512-dMOtzCEd3ABUeSIISmrETiKuyydk1w0pa+gE/uormcTpSYuaNJPbX1NU3JLyscSLjwAQM8bWMhhIlnCqnRvDTw==}
+    dependencies:
+      '@types/hast': 3.0.4
+      comma-separated-tokens: 2.0.3
+      hast-util-parse-selector: 4.0.0
+      property-information: 6.5.0
+      space-separated-tokens: 2.0.2
+    dev: false
+
+  /hastscript@9.0.1:
+    resolution: {integrity: sha512-g7df9rMFX/SPi34tyGCyUBREQoKkapwdY/T04Qn9TDWfHhAYt4/I0gMVirzK5wEzeUqIjEB+LXC/ypb7Aqno5w==}
+    dependencies:
+      '@types/hast': 3.0.4
+      comma-separated-tokens: 2.0.3
+      hast-util-parse-selector: 4.0.0
+      property-information: 7.1.0
+      space-separated-tokens: 2.0.2
+    dev: false
+
+  /hono@4.9.2:
+    resolution: {integrity: sha512-UG2jXGS/gkLH42l/1uROnwXpkjvvxkl3kpopL3LBo27NuaDPI6xHNfuUSilIHcrBkPfl4y0z6y2ByI455TjNRw==}
+    engines: {node: '>=16.9.0'}
+    dev: false
+
+  /html-void-elements@3.0.0:
+    resolution: {integrity: sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==}
+    dev: false
+
+  /http-errors@2.0.0:
+    resolution: {integrity: sha512-FtwrG/euBzaEjYeRqOgly7G0qviiXoJWnvEH2Z1plBdXgbyjv34pHTSb9zoeHMyDy33+DWy5Wt9Wo+TURtOYSQ==}
+    engines: {node: '>= 0.8'}
+    dependencies:
+      depd: 2.0.0
+      inherits: 2.0.4
+      setprototypeof: 1.2.0
+      statuses: 2.0.1
+      toidentifier: 1.0.1
+    dev: false
+
+  /human-signals@2.1.0:
+    resolution: {integrity: sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==}
+    engines: {node: '>=10.17.0'}
+    dev: false
+
+  /iconv-lite@0.6.3:
+    resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==}
+    engines: {node: '>=0.10.0'}
+    dependencies:
+      safer-buffer: 2.1.2
+    dev: false
+
+  /ieee754@1.2.1:
+    resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==}
+    dev: false
+
+  /ignore@7.0.5:
+    resolution: {integrity: sha512-Hs59xBNfUIunMFgWAbGX5cq6893IbWg4KnrjbYwX3tx0ztorVgTDA6B2sxf8ejHJ4wz8BqGUMYlnzNBer5NvGg==}
+    engines: {node: '>= 4'}
+    dev: false
+
+  /inflight@1.0.6:
+    resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==}
+    deprecated: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.
+    dependencies:
+      once: 1.4.0
+      wrappy: 1.0.2
+    dev: true
+
+  /inherits@2.0.4:
+    resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
+
+  /inline-style-parser@0.2.4:
+    resolution: {integrity: sha512-0aO8FkhNZlj/ZIbNi7Lxxr12obT7cL1moPfE4tg1LkX7LlLfC6DeX4l2ZEud1ukP9jNQyNnfzQVqwbwmAATY4Q==}
+    dev: false
+
+  /internmap@1.0.1:
+    resolution: {integrity: sha512-lDB5YccMydFBtasVtxnZ3MRBHuaoE8GKsppq+EchKL2U4nK/DmEpPHNH8MZe5HkMtpSiTSOZwfN0tzYjO/lJEw==}
+    dev: false
+
+  /internmap@2.0.3:
+    resolution: {integrity: sha512-5Hh7Y1wQbvY5ooGgPbDaL5iYLAPzMTUrjMulskHLH6wnv/A+1q5rgEaiuqEjB+oxGXIVZs1FF+R/KPN3ZSQYYg==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /is-alphabetical@2.0.1:
+    resolution: {integrity: sha512-FWyyY60MeTNyeSRpkM2Iry0G9hpr7/9kD40mD/cGQEuilcZYS4okz8SN2Q6rLCJ8gbCt6fN+rC+6tMGS99LaxQ==}
+    dev: false
+
+  /is-alphanumerical@2.0.1:
+    resolution: {integrity: sha512-hmbYhX/9MUMF5uh7tOXyK/n0ZvWpad5caBA17GsC6vyuCqaWliRG5K1qS9inmUhEMaOBIW7/whAnSwveW/LtZw==}
+    dependencies:
+      is-alphabetical: 2.0.1
+      is-decimal: 2.0.1
+    dev: false
+
+  /is-buffer@2.0.5:
+    resolution: {integrity: sha512-i2R6zNFDwgEHJyQUtJEk0XFi1i0dPFn/oqjK3/vPCcDeJvW5NQ83V8QbicfF1SupOaB0h8ntgBC2YiE7dfyctQ==}
+    engines: {node: '>=4'}
+    dev: false
+
+  /is-decimal@2.0.1:
+    resolution: {integrity: sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A==}
+    dev: false
+
+  /is-docker@2.2.1:
+    resolution: {integrity: sha512-F+i2BKsFrH66iaUFc0woD8sLy8getkwTwtOBjvs56Cx4CgJDeKQeqfz8wAYiSb8JOprWhHH5p77PbmYCvvUuXQ==}
+    engines: {node: '>=8'}
+    hasBin: true
+    dev: true
+
+  /is-extglob@2.1.1:
+    resolution: {integrity: sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==}
+    engines: {node: '>=0.10.0'}
+    dev: false
+
+  /is-fullwidth-code-point@3.0.0:
+    resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==}
+    engines: {node: '>=8'}
+    dev: true
+
+  /is-glob@4.0.3:
+    resolution: {integrity: sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==}
+    engines: {node: '>=0.10.0'}
+    dependencies:
+      is-extglob: 2.1.1
+    dev: false
+
+  /is-hexadecimal@2.0.1:
+    resolution: {integrity: sha512-DgZQp241c8oO6cA1SbTEWiXeoxV42vlcJxgH+B3hi1AiqqKruZR3ZGF8In3fj4+/y/7rHvlOZLZtgJ/4ttYGZg==}
+    dev: false
+
+  /is-interactive@2.0.0:
+    resolution: {integrity: sha512-qP1vozQRI+BMOPcjFzrjXuQvdak2pHNUMZoeG2eRbiSqyvbEf/wQtEOTOX1guk6E3t36RkaqiSt8A/6YElNxLQ==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /is-number@7.0.0:
+    resolution: {integrity: sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==}
+    engines: {node: '>=0.12.0'}
+
+  /is-plain-obj@4.1.0:
+    resolution: {integrity: sha512-+Pgi+vMuUNkJyExiMBt5IlFoMyKnr5zhJ4Uspz58WOhBF5QoIZkFyNHIbBAtHwzVAgk5RtndVNsDRN61/mmDqg==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /is-stream@2.0.1:
+    resolution: {integrity: sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==}
+    engines: {node: '>=8'}
+    dev: false
+
+  /is-unicode-supported@1.3.0:
+    resolution: {integrity: sha512-43r2mRvz+8JRIKnWJ+3j8JtjRKZ6GmjzfaE/qiBJnikNnYv/6bagRJ1kUhNk8R5EX/GkobD+r+sfxCPJsiKBLQ==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /is-wsl@2.2.0:
+    resolution: {integrity: sha512-fKzAra0rGJUUBwGBgNkHZuToZcn+TtXHpeCgmkMJMMYx1sQDYaCSyjJBSCa2nH1DGm7s3n1oBnohoVTBaN7Lww==}
+    engines: {node: '>=8'}
+    dependencies:
+      is-docker: 2.2.1
+    dev: true
+
+  /isarray@2.0.5:
+    resolution: {integrity: sha512-xHjhDr3cNBK0BzdUJSPXZntQUx/mwMS5Rw4A7lPJ90XGAO6ISP/ePDNuo0vhqOZU+UD5JoodwCAAoZQd3FeAKw==}
+    dev: true
+
+  /isexe@2.0.0:
+    resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
+
+  /jackspeak@4.1.1:
+    resolution: {integrity: sha512-zptv57P3GpL+O0I7VdMJNBZCu+BPHVQUk55Ft8/QCJjTVxrnJHuVuX/0Bl2A6/+2oyR/ZMEuFKwmzqqZ/U5nPQ==}
+    engines: {node: 20 || >=22}
+    dependencies:
+      '@isaacs/cliui': 8.0.2
+    dev: true
+
+  /javascript-stringify@2.1.0:
+    resolution: {integrity: sha512-JVAfqNPTvNq3sB/VHQJAFxN/sPgKnsKrCwyRt15zwNCdrMMJDdcEOdubuy+DuJYYdm0ox1J4uzEuYKkN+9yhVg==}
+    dev: false
+
+  /jiti@2.5.1:
+    resolution: {integrity: sha512-twQoecYPiVA5K/h6SxtORw/Bs3ar+mLUtoPSc7iMXzQzK8d7eJ/R09wmTwAjiamETn1cXYPGfNnu7DMoHgu12w==}
+    hasBin: true
+    dev: false
+
+  /js-tokens@4.0.0:
+    resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==}
+    dev: false
+
+  /jsesc@3.1.0:
+    resolution: {integrity: sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA==}
+    engines: {node: '>=6'}
+    hasBin: true
+    dev: false
+
+  /json-stable-stringify@1.3.0:
+    resolution: {integrity: sha512-qtYiSSFlwot9XHtF9bD9c7rwKjr+RecWT//ZnPvSmEjpV5mmPOCN4j8UjY5hbjNkOwZ/jQv3J6R1/pL7RwgMsg==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      call-bind: 1.0.8
+      call-bound: 1.0.4
+      isarray: 2.0.5
+      jsonify: 0.0.1
+      object-keys: 1.1.1
+    dev: true
+
+  /json5@2.2.3:
+    resolution: {integrity: sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg==}
+    engines: {node: '>=6'}
+    hasBin: true
+    dev: false
+
+  /jsonfile@6.2.0:
+    resolution: {integrity: sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==}
+    dependencies:
+      universalify: 2.0.1
+    optionalDependencies:
+      graceful-fs: 4.2.11
+
+  /jsonify@0.0.1:
+    resolution: {integrity: sha512-2/Ki0GcmuqSrgFyelQq9M05y7PS0mEwuIzrf3f1fPqkVDVRvZrPZtVSMHxdgo8Aq0sxAOb/cr2aqqA3LeWHVPg==}
+    dev: true
+
+  /katex@0.16.22:
+    resolution: {integrity: sha512-XCHRdUw4lf3SKBaJe4EvgqIuWwkPSo9XoeO8GjQW94Bp7TWv9hNhzZjZ+OH9yf1UmLygb7DIT5GSFQiyt16zYg==}
+    hasBin: true
+    dependencies:
+      commander: 8.3.0
+    dev: false
+
+  /khroma@2.1.0:
+    resolution: {integrity: sha512-Ls993zuzfayK269Svk9hzpeGUKob/sIgZzyHYdjQoAdQetRKpOLj+k/QQQ/6Qi0Yz65mlROrfd+Ev+1+7dz9Kw==}
+    dev: false
+
+  /klaw-sync@6.0.0:
+    resolution: {integrity: sha512-nIeuVSzdCCs6TDPTqI8w1Yre34sSq7AkZ4B3sfOBbI2CgVSB4Du4aLQijFU2+lhAFCwt9+42Hel6lQNIv6AntQ==}
+    dependencies:
+      graceful-fs: 4.2.11
+    dev: true
+
+  /kolorist@1.8.0:
+    resolution: {integrity: sha512-Y+60/zizpJ3HRH8DCss+q95yr6145JXZo46OTpFvDZWLfRCE4qChOyk1b26nMaNpfHHgxagk9dXT5OP0Tfe+dQ==}
+    dev: false
+
+  /langium@3.3.1:
+    resolution: {integrity: sha512-QJv/h939gDpvT+9SiLVlY7tZC3xB2qK57v0J04Sh9wpMb6MP1q8gB21L3WIo8T5P1MSMg3Ep14L7KkDCFG3y4w==}
+    engines: {node: '>=16.0.0'}
+    dependencies:
+      chevrotain: 11.0.3
+      chevrotain-allstar: 0.3.1(chevrotain@11.0.3)
+      vscode-languageserver: 9.0.1
+      vscode-languageserver-textdocument: 1.0.12
+      vscode-uri: 3.0.8
+    dev: false
+
+  /layout-base@1.0.2:
+    resolution: {integrity: sha512-8h2oVEZNktL4BH2JCOI90iD1yXwL6iNW7KcCKT2QZgQJR2vbqDsldCTPRU9NifTCqHZci57XvQQ15YTu+sTYPg==}
+    dev: false
+
+  /layout-base@2.0.1:
+    resolution: {integrity: sha512-dp3s92+uNI1hWIpPGH3jK2kxE2lMjdXdr+DH8ynZHpd6PUlH6x6cbuXnoMmiNumznqaNO31xu9e79F0uuZ0JFg==}
+    dev: false
+
+  /lightningcss-darwin-arm64@1.30.1:
+    resolution: {integrity: sha512-c8JK7hyE65X1MHMN+Viq9n11RRC7hgin3HhYKhrMyaXflk5GVplZ60IxyoVtzILeKr+xAJwg6zK6sjTBJ0FKYQ==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [arm64]
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-darwin-x64@1.30.1:
+    resolution: {integrity: sha512-k1EvjakfumAQoTfcXUcHQZhSpLlkAuEkdMBsI/ivWw9hL+7FtilQc0Cy3hrx0AAQrVtQAbMI7YjCgYgvn37PzA==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [x64]
+    os: [darwin]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-freebsd-x64@1.30.1:
+    resolution: {integrity: sha512-kmW6UGCGg2PcyUE59K5r0kWfKPAVy4SltVeut+umLCFoJ53RdCUWxcRDzO1eTaxf/7Q2H7LTquFHPL5R+Gjyig==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [x64]
+    os: [freebsd]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-linux-arm-gnueabihf@1.30.1:
+    resolution: {integrity: sha512-MjxUShl1v8pit+6D/zSPq9S9dQ2NPFSQwGvxBCYaBYLPlCWuPh9/t1MRS8iUaR8i+a6w7aps+B4N0S1TYP/R+Q==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [arm]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-linux-arm64-gnu@1.30.1:
+    resolution: {integrity: sha512-gB72maP8rmrKsnKYy8XUuXi/4OctJiuQjcuqWNlJQ6jZiWqtPvqFziskH3hnajfvKB27ynbVCucKSm2rkQp4Bw==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [arm64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-linux-arm64-musl@1.30.1:
+    resolution: {integrity: sha512-jmUQVx4331m6LIX+0wUhBbmMX7TCfjF5FoOH6SD1CttzuYlGNVpA7QnrmLxrsub43ClTINfGSYyHe2HWeLl5CQ==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [arm64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-linux-x64-gnu@1.30.1:
+    resolution: {integrity: sha512-piWx3z4wN8J8z3+O5kO74+yr6ze/dKmPnI7vLqfSqI8bccaTGY5xiSGVIJBDd5K5BHlvVLpUB3S2YCfelyJ1bw==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [x64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-linux-x64-musl@1.30.1:
+    resolution: {integrity: sha512-rRomAK7eIkL+tHY0YPxbc5Dra2gXlI63HL+v1Pdi1a3sC+tJTcFrHX+E86sulgAXeI7rSzDYhPSeHHjqFhqfeQ==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [x64]
+    os: [linux]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-win32-arm64-msvc@1.30.1:
+    resolution: {integrity: sha512-mSL4rqPi4iXq5YVqzSsJgMVFENoa4nGTT/GjO2c0Yl9OuQfPsIfncvLrEW6RbbB24WtZ3xP/2CCmI3tNkNV4oA==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [arm64]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss-win32-x64-msvc@1.30.1:
+    resolution: {integrity: sha512-PVqXh48wh4T53F/1CCu8PIPCxLzWyCnn/9T5W1Jpmdy5h9Cwd+0YQS6/LwhHXSafuc61/xg9Lv5OrCby6a++jg==}
+    engines: {node: '>= 12.0.0'}
+    cpu: [x64]
+    os: [win32]
+    requiresBuild: true
+    dev: false
+    optional: true
+
+  /lightningcss@1.30.1:
+    resolution: {integrity: sha512-xi6IyHML+c9+Q3W0S4fCQJOym42pyurFiJUHEcEyHS0CeKzia4yZDEsLlqOFykxOdHpNy0NmvVO31vcSqAxJCg==}
+    engines: {node: '>= 12.0.0'}
+    dependencies:
+      detect-libc: 2.0.4
+    optionalDependencies:
+      lightningcss-darwin-arm64: 1.30.1
+      lightningcss-darwin-x64: 1.30.1
+      lightningcss-freebsd-x64: 1.30.1
+      lightningcss-linux-arm-gnueabihf: 1.30.1
+      lightningcss-linux-arm64-gnu: 1.30.1
+      lightningcss-linux-arm64-musl: 1.30.1
+      lightningcss-linux-x64-gnu: 1.30.1
+      lightningcss-linux-x64-musl: 1.30.1
+      lightningcss-win32-arm64-msvc: 1.30.1
+      lightningcss-win32-x64-msvc: 1.30.1
+    dev: false
+
+  /local-pkg@1.1.1:
+    resolution: {integrity: sha512-WunYko2W1NcdfAFpuLUoucsgULmgDBRkdxHxWQ7mK0cQqwPiy8E1enjuRBrhLtZkB5iScJ1XIPdhVEFK8aOLSg==}
+    engines: {node: '>=14'}
+    dependencies:
+      mlly: 1.7.4
+      pkg-types: 2.2.0
+      quansync: 0.2.11
+    dev: false
+
+  /locate-path@6.0.0:
+    resolution: {integrity: sha512-iPZK6eYjbxRu3uB4/WZ3EsEIMJFMqAoopl3R+zuq0UjcAm/MO6KCweDgPfP3elTztoKP3KtnVHxTn2NHBSDVUw==}
+    engines: {node: '>=10'}
+    dependencies:
+      p-locate: 5.0.0
+    dev: false
+
+  /lodash-es@4.17.21:
+    resolution: {integrity: sha512-mKnC+QJ9pWVzv+C4/U3rRsHapFfHvQFoFB92e52xeyGMcX6/OlIl78je1u8vePzYZSkkogMPJ2yjxxsb89cxyw==}
+    dev: false
+
+  /log-symbols@5.1.0:
+    resolution: {integrity: sha512-l0x2DvrW294C9uDCoQe1VSU4gf529FkSZ6leBl4TiqZH/e+0R7hSfHQBNut2mNygDgHwvYHfFLn6Oxb3VWj2rA==}
+    engines: {node: '>=12'}
+    dependencies:
+      chalk: 5.5.0
+      is-unicode-supported: 1.3.0
+    dev: false
+
+  /longest-streak@3.1.0:
+    resolution: {integrity: sha512-9Ri+o0JYgehTaVBBDoMqIl8GXtbWg711O3srftcHhZ0dqnETqLaoIK0x17fUw9rFSlK/0NlsKe0Ahhyl5pXE2g==}
+    dev: false
+
+  /lru-cache@10.4.3:
+    resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
+    dev: false
+
+  /lru-cache@11.1.0:
+    resolution: {integrity: sha512-QIXZUBJUx+2zHUdQujWejBkcD9+cs94tLn0+YL8UrCh+D5sCXZ4c7LaEH48pNwRY3MLDgqUFyhlCyjJPf1WP0A==}
+    engines: {node: 20 || >=22}
+    dev: true
+
+  /lru-cache@5.1.1:
+    resolution: {integrity: sha512-KpNARQA3Iwv+jTA0utUVVbrh+Jlrr1Fv0e56GGzAFOXN7dk/FviaDW8LHmK52DlcH4WP2n6gI8vN1aesBFgo9w==}
+    dependencies:
+      yallist: 3.1.1
+    dev: false
+
+  /mark.js@8.11.1:
+    resolution: {integrity: sha512-1I+1qpDt4idfgLQG+BNWmrqku+7/2bi5nLf4YwF8y8zXvmfiTBY3PV3ZibfrjBueCByROpuBjLLFCajqkgYoLQ==}
+    dev: false
+
+  /markdown-extensions@2.0.0:
+    resolution: {integrity: sha512-o5vL7aDWatOTX8LzaS1WMoaoxIiLRQJuIKKe2wAw6IeULDHaqbiqiggmx+pKvZDb1Sj+pE46Sn1T7lCqfFtg1Q==}
+    engines: {node: '>=16'}
+    dev: false
+
+  /markdown-table@3.0.4:
+    resolution: {integrity: sha512-wiYz4+JrLyb/DqW2hkFJxP7Vd7JuTDm77fvbM8VfEQdmSMqcImWeeRbHwZjBjIFki/VaMK2BhFi7oUUZeM5bqw==}
+    dev: false
+
+  /marked@16.1.2:
+    resolution: {integrity: sha512-rNQt5EvRinalby7zJZu/mB+BvaAY2oz3wCuCjt1RDrWNpS1Pdf9xqMOeC9Hm5adBdcV/3XZPJpG58eT+WBc0XQ==}
+    engines: {node: '>= 20'}
+    hasBin: true
+    dev: false
+
+  /math-intrinsics@1.1.0:
+    resolution: {integrity: sha512-/IXtbwEk5HTPyEwyKX6hGkYXxM9nbj64B+ilVJnC/R6B0pH5G4V3b0pVbL7DBj4tkhBAppbQUlf6F6Xl9LHu1g==}
+    engines: {node: '>= 0.4'}
+    dev: true
+
+  /mdast-util-directive@3.1.0:
+    resolution: {integrity: sha512-I3fNFt+DHmpWCYAT7quoM6lHf9wuqtI+oCOfvILnoicNIqjh5E3dEJWiXuYME2gNe8vl1iMQwyUHa7bgFmak6Q==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      '@types/unist': 3.0.3
+      ccount: 2.0.1
+      devlop: 1.1.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+      parse-entities: 4.0.2
+      stringify-entities: 4.0.4
+      unist-util-visit-parents: 6.0.1
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-find-and-replace@3.0.2:
+    resolution: {integrity: sha512-Tmd1Vg/m3Xz43afeNxDIhWRtFZgM2VLyaf4vSTYwudTyeuTneoL3qtWMA5jeLyz/O1vDJmmV4QuScFCA2tBPwg==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      escape-string-regexp: 5.0.0
+      unist-util-is: 6.0.0
+      unist-util-visit-parents: 6.0.1
+    dev: false
+
+  /mdast-util-from-markdown@2.0.2:
+    resolution: {integrity: sha512-uZhTV/8NBuw0WHkPTrCqDOl0zVe1BIng5ZtHoDk49ME1qqcjYmmLmOf0gELgcRMxN4w2iuIeVso5/6QymSrgmA==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      '@types/unist': 3.0.3
+      decode-named-character-reference: 1.2.0
+      devlop: 1.1.0
+      mdast-util-to-string: 4.0.0
+      micromark: 4.0.2
+      micromark-util-decode-numeric-character-reference: 2.0.2
+      micromark-util-decode-string: 2.0.1
+      micromark-util-normalize-identifier: 2.0.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+      unist-util-stringify-position: 4.0.0
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-frontmatter@2.0.1:
+    resolution: {integrity: sha512-LRqI9+wdgC25P0URIJY9vwocIzCcksduHQ9OF2joxQoyTNVduwLAFUzjoopuRJbJAReaKrNQKAZKL3uCMugWJA==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      devlop: 1.1.0
+      escape-string-regexp: 5.0.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+      micromark-extension-frontmatter: 2.0.0
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-gfm-autolink-literal@2.0.1:
+    resolution: {integrity: sha512-5HVP2MKaP6L+G6YaxPNjuL0BPrq9orG3TsrZ9YXbA3vDw/ACI4MEsnoDpn6ZNm7GnZgtAcONJyPhOP8tNJQavQ==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      ccount: 2.0.1
+      devlop: 1.1.0
+      mdast-util-find-and-replace: 3.0.2
+      micromark-util-character: 2.1.1
+    dev: false
+
+  /mdast-util-gfm-footnote@2.1.0:
+    resolution: {integrity: sha512-sqpDWlsHn7Ac9GNZQMeUzPQSMzR6Wv0WKRNvQRg0KqHh02fpTz69Qc1QSseNX29bhz1ROIyNyxExfawVKTm1GQ==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      devlop: 1.1.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+      micromark-util-normalize-identifier: 2.0.1
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-gfm-strikethrough@2.0.0:
+    resolution: {integrity: sha512-mKKb915TF+OC5ptj5bJ7WFRPdYtuHv0yTRxK2tJvi+BDqbkiG7h7u/9SI89nRAYcmap2xHQL9D+QG/6wSrTtXg==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-gfm-table@2.0.0:
+    resolution: {integrity: sha512-78UEvebzz/rJIxLvE7ZtDd/vIQ0RHv+3Mh5DR96p7cS7HsBhYIICDBCu8csTNWNO6tBWfqXPWekRuj2FNOGOZg==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      devlop: 1.1.0
+      markdown-table: 3.0.4
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-gfm-task-list-item@2.0.0:
+    resolution: {integrity: sha512-IrtvNvjxC1o06taBAVJznEnkiHxLFTzgonUdy8hzFVeDun0uTjxxrRGVaNFqkU1wJR3RBPEfsxmU6jDWPofrTQ==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      devlop: 1.1.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-gfm@3.1.0:
+    resolution: {integrity: sha512-0ulfdQOM3ysHhCJ1p06l0b0VKlhU0wuQs3thxZQagjcjPrlFRqY215uZGHHJan9GEAXd9MbfPjFJz+qMkVR6zQ==}
+    dependencies:
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-gfm-autolink-literal: 2.0.1
+      mdast-util-gfm-footnote: 2.1.0
+      mdast-util-gfm-strikethrough: 2.0.0
+      mdast-util-gfm-table: 2.0.0
+      mdast-util-gfm-task-list-item: 2.0.0
+      mdast-util-to-markdown: 2.1.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-math@3.0.0:
+    resolution: {integrity: sha512-Tl9GBNeG/AhJnQM221bJR2HPvLOSnLE/T9cJI9tlc6zwQk2nPk/4f0cHkOdEixQPC/j8UtKDdITswvLAy1OZ1w==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@types/mdast': 4.0.4
+      devlop: 1.1.0
+      longest-streak: 3.1.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+      unist-util-remove-position: 5.0.0
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-mdx-expression@2.0.1:
+    resolution: {integrity: sha512-J6f+9hUp+ldTZqKRSg7Vw5V6MqjATc+3E4gf3CFNcuZNWD8XdyI6zQ8GqH7f8169MM6P7hMBRDVGnn7oHB9kXQ==}
+    dependencies:
+      '@types/estree-jsx': 1.0.5
+      '@types/hast': 3.0.4
+      '@types/mdast': 4.0.4
+      devlop: 1.1.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-mdx-jsx@3.2.0:
+    resolution: {integrity: sha512-lj/z8v0r6ZtsN/cGNNtemmmfoLAFZnjMbNyLzBafjzikOM+glrjNHPlf6lQDOTccj9n5b0PPihEBbhneMyGs1Q==}
+    dependencies:
+      '@types/estree-jsx': 1.0.5
+      '@types/hast': 3.0.4
+      '@types/mdast': 4.0.4
+      '@types/unist': 3.0.3
+      ccount: 2.0.1
+      devlop: 1.1.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+      parse-entities: 4.0.2
+      stringify-entities: 4.0.4
+      unist-util-stringify-position: 4.0.0
+      vfile-message: 4.0.3
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-mdx@3.0.0:
+    resolution: {integrity: sha512-JfbYLAW7XnYTTbUsmpu0kdBUVe+yKVJZBItEjwyYJiDJuZ9w4eeaqks4HQO+R7objWgS2ymV60GYpI14Ug554w==}
+    dependencies:
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-mdx-expression: 2.0.1
+      mdast-util-mdx-jsx: 3.2.0
+      mdast-util-mdxjs-esm: 2.0.1
+      mdast-util-to-markdown: 2.1.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-mdxjs-esm@2.0.1:
+    resolution: {integrity: sha512-EcmOpxsZ96CvlP03NghtH1EsLtr0n9Tm4lPUJUBccV9RwUOneqSycg19n5HGzCf+10LozMRSObtVr3ee1WoHtg==}
+    dependencies:
+      '@types/estree-jsx': 1.0.5
+      '@types/hast': 3.0.4
+      '@types/mdast': 4.0.4
+      devlop: 1.1.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-to-markdown: 2.1.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mdast-util-phrasing@4.1.0:
+    resolution: {integrity: sha512-TqICwyvJJpBwvGAMZjj4J2n0X8QWp21b9l0o7eXyVJ25YNWYbJDVIyD1bZXE6WtV6RmKJVYmQAKWa0zWOABz2w==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      unist-util-is: 6.0.0
+    dev: false
+
+  /mdast-util-to-hast@13.2.0:
+    resolution: {integrity: sha512-QGYKEuUsYT9ykKBCMOEDLsU5JRObWQusAolFMeko/tYPufNkRffBAQjIE+99jbA87xv6FgmjLtwjh9wBWajwAA==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@types/mdast': 4.0.4
+      '@ungap/structured-clone': 1.3.0
+      devlop: 1.1.0
+      micromark-util-sanitize-uri: 2.0.1
+      trim-lines: 3.0.1
+      unist-util-position: 5.0.0
+      unist-util-visit: 5.0.0
+      vfile: 6.0.3
+    dev: false
+
+  /mdast-util-to-markdown@2.1.2:
+    resolution: {integrity: sha512-xj68wMTvGXVOKonmog6LwyJKrYXZPvlwabaryTjLh9LuvovB/KAH+kvi8Gjj+7rJjsFi23nkUxRQv1KqSroMqA==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      '@types/unist': 3.0.3
+      longest-streak: 3.1.0
+      mdast-util-phrasing: 4.1.0
+      mdast-util-to-string: 4.0.0
+      micromark-util-classify-character: 2.0.1
+      micromark-util-decode-string: 2.0.1
+      unist-util-visit: 5.0.0
+      zwitch: 2.0.4
+    dev: false
+
+  /mdast-util-to-string@4.0.0:
+    resolution: {integrity: sha512-0H44vDimn51F0YwvxSJSm0eCDOJTRlmN0R1yBh4HLj9wiV1Dn0QoXGbvFAWj2hSItVTlCmBF1hqKlIyUBVFLPg==}
+    dependencies:
+      '@types/mdast': 4.0.4
+    dev: false
+
+  /media-query-parser@2.0.2:
+    resolution: {integrity: sha512-1N4qp+jE0pL5Xv4uEcwVUhIkwdUO3S/9gML90nqKA7v7FcOS5vUtatfzok9S9U1EJU8dHWlcv95WLnKmmxZI9w==}
+    dependencies:
+      '@babel/runtime': 7.28.3
+    dev: false
+
+  /merge-stream@2.0.0:
+    resolution: {integrity: sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==}
+    dev: false
+
+  /merge2@1.4.1:
+    resolution: {integrity: sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==}
+    engines: {node: '>= 8'}
+    dev: false
+
+  /mermaid-isomorphic@3.0.4(playwright@1.54.2):
+    resolution: {integrity: sha512-XQTy7H1XwHK3DPEHf+ZNWiqUEd9BwX3Xws38R9Fj2gx718srmgjlZoUzHr+Tca+O+dqJOJsAJaKzCoP65QDfDg==}
+    peerDependencies:
+      playwright: '1'
+    peerDependenciesMeta:
+      playwright:
+        optional: true
+    dependencies:
+      '@fortawesome/fontawesome-free': 6.7.2
+      mermaid: 11.9.0
+      playwright: 1.54.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /mermaid@11.9.0:
+    resolution: {integrity: sha512-YdPXn9slEwO0omQfQIsW6vS84weVQftIyyTGAZCwM//MGhPzL1+l6vO6bkf0wnP4tHigH1alZ5Ooy3HXI2gOag==}
+    dependencies:
+      '@braintree/sanitize-url': 7.1.1
+      '@iconify/utils': 2.3.0
+      '@mermaid-js/parser': 0.6.2
+      '@types/d3': 7.4.3
+      cytoscape: 3.33.1
+      cytoscape-cose-bilkent: 4.1.0(cytoscape@3.33.1)
+      cytoscape-fcose: 2.2.0(cytoscape@3.33.1)
+      d3: 7.9.0
+      d3-sankey: 0.12.3
+      dagre-d3-es: 7.0.11
+      dayjs: 1.11.13
+      dompurify: 3.2.6
+      katex: 0.16.22
+      khroma: 2.1.0
+      lodash-es: 4.17.21
+      marked: 16.1.2
+      roughjs: 4.6.6
+      stylis: 4.3.6
+      ts-dedent: 2.2.0
+      uuid: 11.1.0
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /micromark-core-commonmark@2.0.3:
+    resolution: {integrity: sha512-RDBrHEMSxVFLg6xvnXmb1Ayr2WzLAWjeSATAoxwKYJV94TeNavgoIdA0a9ytzDSVzBy2YKFK+emCPOEibLeCrg==}
+    dependencies:
+      decode-named-character-reference: 1.2.0
+      devlop: 1.1.0
+      micromark-factory-destination: 2.0.1
+      micromark-factory-label: 2.0.1
+      micromark-factory-space: 2.0.1
+      micromark-factory-title: 2.0.1
+      micromark-factory-whitespace: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-chunked: 2.0.1
+      micromark-util-classify-character: 2.0.1
+      micromark-util-html-tag-name: 2.0.1
+      micromark-util-normalize-identifier: 2.0.1
+      micromark-util-resolve-all: 2.0.1
+      micromark-util-subtokenize: 2.1.0
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-directive@3.0.2:
+    resolution: {integrity: sha512-wjcXHgk+PPdmvR58Le9d7zQYWy+vKEU9Se44p2CrCDPiLr2FMyiT4Fyb5UFKFC66wGB3kPlgD7q3TnoqPS7SZA==}
+    dependencies:
+      devlop: 1.1.0
+      micromark-factory-space: 2.0.1
+      micromark-factory-whitespace: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+      parse-entities: 4.0.2
+    dev: false
+
+  /micromark-extension-frontmatter@2.0.0:
+    resolution: {integrity: sha512-C4AkuM3dA58cgZha7zVnuVxBhDsbttIMiytjgsM2XbHAB2faRVaHRle40558FBN+DJcrLNCoqG5mlrpdU4cRtg==}
+    dependencies:
+      fault: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-gfm-autolink-literal@2.1.0:
+    resolution: {integrity: sha512-oOg7knzhicgQ3t4QCjCWgTmfNhvQbDDnJeVu9v81r7NltNCVmhPy1fJRX27pISafdjL+SVc4d3l48Gb6pbRypw==}
+    dependencies:
+      micromark-util-character: 2.1.1
+      micromark-util-sanitize-uri: 2.0.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-gfm-footnote@2.1.0:
+    resolution: {integrity: sha512-/yPhxI1ntnDNsiHtzLKYnE3vf9JZ6cAisqVDauhp4CEHxlb4uoOTxOCJ+9s51bIB8U1N1FJ1RXOKTIlD5B/gqw==}
+    dependencies:
+      devlop: 1.1.0
+      micromark-core-commonmark: 2.0.3
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-normalize-identifier: 2.0.1
+      micromark-util-sanitize-uri: 2.0.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-gfm-strikethrough@2.1.0:
+    resolution: {integrity: sha512-ADVjpOOkjz1hhkZLlBiYA9cR2Anf8F4HqZUO6e5eDcPQd0Txw5fxLzzxnEkSkfnD0wziSGiv7sYhk/ktvbf1uw==}
+    dependencies:
+      devlop: 1.1.0
+      micromark-util-chunked: 2.0.1
+      micromark-util-classify-character: 2.0.1
+      micromark-util-resolve-all: 2.0.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-gfm-table@2.1.1:
+    resolution: {integrity: sha512-t2OU/dXXioARrC6yWfJ4hqB7rct14e8f7m0cbI5hUmDyyIlwv5vEtooptH8INkbLzOatzKuVbQmAYcbWoyz6Dg==}
+    dependencies:
+      devlop: 1.1.0
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-gfm-tagfilter@2.0.0:
+    resolution: {integrity: sha512-xHlTOmuCSotIA8TW1mDIM6X2O1SiX5P9IuDtqGonFhEK0qgRI4yeC6vMxEV2dgyr2TiD+2PQ10o+cOhdVAcwfg==}
+    dependencies:
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-gfm-task-list-item@2.1.0:
+    resolution: {integrity: sha512-qIBZhqxqI6fjLDYFTBIa4eivDMnP+OZqsNwmQ3xNLE4Cxwc+zfQEfbs6tzAo2Hjq+bh6q5F+Z8/cksrLFYWQQw==}
+    dependencies:
+      devlop: 1.1.0
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-gfm@3.0.0:
+    resolution: {integrity: sha512-vsKArQsicm7t0z2GugkCKtZehqUm31oeGBV/KVSorWSy8ZlNAv7ytjFhvaryUiCUJYqs+NoE6AFhpQvBTM6Q4w==}
+    dependencies:
+      micromark-extension-gfm-autolink-literal: 2.1.0
+      micromark-extension-gfm-footnote: 2.1.0
+      micromark-extension-gfm-strikethrough: 2.1.0
+      micromark-extension-gfm-table: 2.1.1
+      micromark-extension-gfm-tagfilter: 2.0.0
+      micromark-extension-gfm-task-list-item: 2.1.0
+      micromark-util-combine-extensions: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-math@3.1.0:
+    resolution: {integrity: sha512-lvEqd+fHjATVs+2v/8kg9i5Q0AP2k85H0WUOwpIVvUML8BapsMvh1XAogmQjOCsLpoKRCVQqEkQBB3NhVBcsOg==}
+    dependencies:
+      '@types/katex': 0.16.7
+      devlop: 1.1.0
+      katex: 0.16.22
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-mdx-expression@3.0.1:
+    resolution: {integrity: sha512-dD/ADLJ1AeMvSAKBwO22zG22N4ybhe7kFIZ3LsDI0GlsNr2A3KYxb0LdC1u5rj4Nw+CHKY0RVdnHX8vj8ejm4Q==}
+    dependencies:
+      '@types/estree': 1.0.8
+      devlop: 1.1.0
+      micromark-factory-mdx-expression: 2.0.3
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-events-to-acorn: 2.0.3
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-mdx-jsx@3.0.2:
+    resolution: {integrity: sha512-e5+q1DjMh62LZAJOnDraSSbDMvGJ8x3cbjygy2qFEi7HCeUT4BDKCvMozPozcD6WmOt6sVvYDNBKhFSz3kjOVQ==}
+    dependencies:
+      '@types/estree': 1.0.8
+      devlop: 1.1.0
+      estree-util-is-identifier-name: 3.0.0
+      micromark-factory-mdx-expression: 2.0.3
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-events-to-acorn: 2.0.3
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+      vfile-message: 4.0.3
+    dev: false
+
+  /micromark-extension-mdx-md@2.0.0:
+    resolution: {integrity: sha512-EpAiszsB3blw4Rpba7xTOUptcFeBFi+6PY8VnJ2hhimH+vCQDirWgsMpz7w1XcZE7LVrSAUGb9VJpG9ghlYvYQ==}
+    dependencies:
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-extension-mdxjs-esm@3.0.0:
+    resolution: {integrity: sha512-DJFl4ZqkErRpq/dAPyeWp15tGrcrrJho1hKK5uBS70BCtfrIFg81sqcTVu3Ta+KD1Tk5vAtBNElWxtAa+m8K9A==}
+    dependencies:
+      '@types/estree': 1.0.8
+      devlop: 1.1.0
+      micromark-core-commonmark: 2.0.3
+      micromark-util-character: 2.1.1
+      micromark-util-events-to-acorn: 2.0.3
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+      unist-util-position-from-estree: 2.0.0
+      vfile-message: 4.0.3
+    dev: false
+
+  /micromark-extension-mdxjs@3.0.0:
+    resolution: {integrity: sha512-A873fJfhnJ2siZyUrJ31l34Uqwy4xIFmvPY1oj+Ean5PHcPBYzEsvqvWGaWcfEIr11O5Dlw3p2y0tZWpKHDejQ==}
+    dependencies:
+      acorn: 8.15.0
+      acorn-jsx: 5.3.2(acorn@8.15.0)
+      micromark-extension-mdx-expression: 3.0.1
+      micromark-extension-mdx-jsx: 3.0.2
+      micromark-extension-mdx-md: 2.0.0
+      micromark-extension-mdxjs-esm: 3.0.0
+      micromark-util-combine-extensions: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-factory-destination@2.0.1:
+    resolution: {integrity: sha512-Xe6rDdJlkmbFRExpTOmRj9N3MaWmbAgdpSrBQvCFqhezUn4AHqJHbaEnfbVYYiexVSs//tqOdY/DxhjdCiJnIA==}
+    dependencies:
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-factory-label@2.0.1:
+    resolution: {integrity: sha512-VFMekyQExqIW7xIChcXn4ok29YE3rnuyveW3wZQWWqF4Nv9Wk5rgJ99KzPvHjkmPXF93FXIbBp6YdW3t71/7Vg==}
+    dependencies:
+      devlop: 1.1.0
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-factory-mdx-expression@2.0.3:
+    resolution: {integrity: sha512-kQnEtA3vzucU2BkrIa8/VaSAsP+EJ3CKOvhMuJgOEGg9KDC6OAY6nSnNDVRiVNRqj7Y4SlSzcStaH/5jge8JdQ==}
+    dependencies:
+      '@types/estree': 1.0.8
+      devlop: 1.1.0
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-events-to-acorn: 2.0.3
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+      unist-util-position-from-estree: 2.0.0
+      vfile-message: 4.0.3
+    dev: false
+
+  /micromark-factory-space@2.0.1:
+    resolution: {integrity: sha512-zRkxjtBxxLd2Sc0d+fbnEunsTj46SWXgXciZmHq0kDYGnck/ZSGj9/wULTV95uoeYiK5hRXP2mJ98Uo4cq/LQg==}
+    dependencies:
+      micromark-util-character: 2.1.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-factory-title@2.0.1:
+    resolution: {integrity: sha512-5bZ+3CjhAd9eChYTHsjy6TGxpOFSKgKKJPJxr293jTbfry2KDoWkhBb6TcPVB4NmzaPhMs1Frm9AZH7OD4Cjzw==}
+    dependencies:
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-factory-whitespace@2.0.1:
+    resolution: {integrity: sha512-Ob0nuZ3PKt/n0hORHyvoD9uZhr+Za8sFoP+OnMcnWK5lngSzALgQYKMr9RJVOWLqQYuyn6ulqGWSXdwf6F80lQ==}
+    dependencies:
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-util-character@2.1.1:
+    resolution: {integrity: sha512-wv8tdUTJ3thSFFFJKtpYKOYiGP2+v96Hvk4Tu8KpCAsTMs6yi+nVmGh1syvSCsaxz45J6Jbw+9DD6g97+NV67Q==}
+    dependencies:
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-util-chunked@2.0.1:
+    resolution: {integrity: sha512-QUNFEOPELfmvv+4xiNg2sRYeS/P84pTW0TCgP5zc9FpXetHY0ab7SxKyAQCNCc1eK0459uoLI1y5oO5Vc1dbhA==}
+    dependencies:
+      micromark-util-symbol: 2.0.1
+    dev: false
+
+  /micromark-util-classify-character@2.0.1:
+    resolution: {integrity: sha512-K0kHzM6afW/MbeWYWLjoHQv1sgg2Q9EccHEDzSkxiP/EaagNzCm7T/WMKZ3rjMbvIpvBiZgwR3dKMygtA4mG1Q==}
+    dependencies:
+      micromark-util-character: 2.1.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-util-combine-extensions@2.0.1:
+    resolution: {integrity: sha512-OnAnH8Ujmy59JcyZw8JSbK9cGpdVY44NKgSM7E9Eh7DiLS2E9RNQf0dONaGDzEG9yjEl5hcqeIsj4hfRkLH/Bg==}
+    dependencies:
+      micromark-util-chunked: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-util-decode-numeric-character-reference@2.0.2:
+    resolution: {integrity: sha512-ccUbYk6CwVdkmCQMyr64dXz42EfHGkPQlBj5p7YVGzq8I7CtjXZJrubAYezf7Rp+bjPseiROqe7G6foFd+lEuw==}
+    dependencies:
+      micromark-util-symbol: 2.0.1
+    dev: false
+
+  /micromark-util-decode-string@2.0.1:
+    resolution: {integrity: sha512-nDV/77Fj6eH1ynwscYTOsbK7rR//Uj0bZXBwJZRfaLEJ1iGBR6kIfNmlNqaqJf649EP0F3NWNdeJi03elllNUQ==}
+    dependencies:
+      decode-named-character-reference: 1.2.0
+      micromark-util-character: 2.1.1
+      micromark-util-decode-numeric-character-reference: 2.0.2
+      micromark-util-symbol: 2.0.1
+    dev: false
+
+  /micromark-util-encode@2.0.1:
+    resolution: {integrity: sha512-c3cVx2y4KqUnwopcO9b/SCdo2O67LwJJ/UyqGfbigahfegL9myoEFoDYZgkT7f36T0bLrM9hZTAaAyH+PCAXjw==}
+    dev: false
+
+  /micromark-util-events-to-acorn@2.0.3:
+    resolution: {integrity: sha512-jmsiEIiZ1n7X1Rr5k8wVExBQCg5jy4UXVADItHmNk1zkwEVhBuIUKRu3fqv+hs4nxLISi2DQGlqIOGiFxgbfHg==}
+    dependencies:
+      '@types/estree': 1.0.8
+      '@types/unist': 3.0.3
+      devlop: 1.1.0
+      estree-util-visit: 2.0.0
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+      vfile-message: 4.0.3
+    dev: false
+
+  /micromark-util-html-tag-name@2.0.1:
+    resolution: {integrity: sha512-2cNEiYDhCWKI+Gs9T0Tiysk136SnR13hhO8yW6BGNyhOC4qYFnwF1nKfD3HFAIXA5c45RrIG1ub11GiXeYd1xA==}
+    dev: false
+
+  /micromark-util-normalize-identifier@2.0.1:
+    resolution: {integrity: sha512-sxPqmo70LyARJs0w2UclACPUUEqltCkJ6PhKdMIDuJ3gSf/Q+/GIe3WKl0Ijb/GyH9lOpUkRAO2wp0GVkLvS9Q==}
+    dependencies:
+      micromark-util-symbol: 2.0.1
+    dev: false
+
+  /micromark-util-resolve-all@2.0.1:
+    resolution: {integrity: sha512-VdQyxFWFT2/FGJgwQnJYbe1jjQoNTS4RjglmSjTUlpUMa95Htx9NHeYW4rGDJzbjvCsl9eLjMQwGeElsqmzcHg==}
+    dependencies:
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-util-sanitize-uri@2.0.1:
+    resolution: {integrity: sha512-9N9IomZ/YuGGZZmQec1MbgxtlgougxTodVwDzzEouPKo3qFWvymFHWcnDi2vzV1ff6kas9ucW+o3yzJK9YB1AQ==}
+    dependencies:
+      micromark-util-character: 2.1.1
+      micromark-util-encode: 2.0.1
+      micromark-util-symbol: 2.0.1
+    dev: false
+
+  /micromark-util-subtokenize@2.1.0:
+    resolution: {integrity: sha512-XQLu552iSctvnEcgXw6+Sx75GflAPNED1qx7eBJ+wydBb2KCbRZe+NwvIEEMM83uml1+2WSXpBAcp9IUCgCYWA==}
+    dependencies:
+      devlop: 1.1.0
+      micromark-util-chunked: 2.0.1
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    dev: false
+
+  /micromark-util-symbol@2.0.1:
+    resolution: {integrity: sha512-vs5t8Apaud9N28kgCrRUdEed4UJ+wWNvicHLPxCa9ENlYuAY31M0ETy5y1vA33YoNPDFTghEbnh6efaE8h4x0Q==}
+    dev: false
+
+  /micromark-util-types@2.0.2:
+    resolution: {integrity: sha512-Yw0ECSpJoViF1qTU4DC6NwtC4aWGt1EkzaQB8KPPyCRR8z9TWeV0HbEFGTO+ZY1wB22zmxnJqhPyTpOVCpeHTA==}
+    dev: false
+
+  /micromark@4.0.2:
+    resolution: {integrity: sha512-zpe98Q6kvavpCr1NPVSCMebCKfD7CA2NqZ+rykeNhONIJBpc1tFKt9hucLGwha3jNTNI8lHpctWJWoimVF4PfA==}
+    dependencies:
+      '@types/debug': 4.1.12
+      debug: 4.4.1
+      decode-named-character-reference: 1.2.0
+      devlop: 1.1.0
+      micromark-core-commonmark: 2.0.3
+      micromark-factory-space: 2.0.1
+      micromark-util-character: 2.1.1
+      micromark-util-chunked: 2.0.1
+      micromark-util-combine-extensions: 2.0.1
+      micromark-util-decode-numeric-character-reference: 2.0.2
+      micromark-util-encode: 2.0.1
+      micromark-util-normalize-identifier: 2.0.1
+      micromark-util-resolve-all: 2.0.1
+      micromark-util-sanitize-uri: 2.0.1
+      micromark-util-subtokenize: 2.1.0
+      micromark-util-symbol: 2.0.1
+      micromark-util-types: 2.0.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /micromatch@4.0.8:
+    resolution: {integrity: sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA==}
+    engines: {node: '>=8.6'}
+    dependencies:
+      braces: 3.0.3
+      picomatch: 2.3.1
+
+  /mime-db@1.54.0:
+    resolution: {integrity: sha512-aU5EJuIN2WDemCcAp2vFBfp/m4EAhWJnUNSSw0ixs7/kXbd6Pg64EmwJkNdFhB8aWt1sH2CTXrLxo/iAGV3oPQ==}
+    engines: {node: '>= 0.6'}
+    dev: false
+
+  /mime@1.6.0:
+    resolution: {integrity: sha512-x0Vn8spI+wuJ1O6S7gnbaQg8Pxh4NNHb7KSINmEWKiPE4RKOplvijn+NkmYmmRgP68mc70j2EbeTFRsrswaQeg==}
+    engines: {node: '>=4'}
+    hasBin: true
+    dev: false
+
+  /mimic-fn@2.1.0:
+    resolution: {integrity: sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /mini-svg-data-uri@1.4.4:
+    resolution: {integrity: sha512-r9deDe9p5FJUPZAk3A59wGH7Ii9YrjjWw0jmw/liSbHl2CHiyXj6FcDXDu2K3TjVAXqiJdaw3xxwlZZr9E6nHg==}
+    hasBin: true
+    dev: false
+
+  /minimatch@10.0.3:
+    resolution: {integrity: sha512-IPZ167aShDZZUMdRk66cyQAW3qr0WzbHkPdMYa8bzZhlHhO3jALbKdxcaak7W9FfT2rZNpQuUu4Od7ILEpXSaw==}
+    engines: {node: 20 || >=22}
+    dependencies:
+      '@isaacs/brace-expansion': 5.0.0
+    dev: true
+
+  /minimatch@3.1.2:
+    resolution: {integrity: sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==}
+    dependencies:
+      brace-expansion: 1.1.12
+    dev: true
+
+  /minimatch@9.0.5:
+    resolution: {integrity: sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==}
+    engines: {node: '>=16 || 14 >=14.17'}
+    dependencies:
+      brace-expansion: 2.0.2
+    dev: false
+
+  /minimist@1.2.8:
+    resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
+    dev: true
+
+  /minipass@7.1.2:
+    resolution: {integrity: sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==}
+    engines: {node: '>=16 || 14 >=14.17'}
+    dev: true
+
+  /minisearch@6.3.0:
+    resolution: {integrity: sha512-ihFnidEeU8iXzcVHy74dhkxh/dn8Dc08ERl0xwoMMGqp4+LvRSCgicb+zGqWthVokQKvCSxITlh3P08OzdTYCQ==}
+    dev: false
+
+  /mlly@1.7.4:
+    resolution: {integrity: sha512-qmdSIPC4bDJXgZTCR7XosJiNKySV7O215tsPtDN9iEO/7q/76b/ijtgRu/+epFXSJhijtTCCGp3DWS549P3xKw==}
+    dependencies:
+      acorn: 8.15.0
+      pathe: 2.0.3
+      pkg-types: 1.3.1
+      ufo: 1.6.1
+    dev: false
+
+  /modern-ahocorasick@1.1.0:
+    resolution: {integrity: sha512-sEKPVl2rM+MNVkGQt3ChdmD8YsigmXdn5NifZn6jiwn9LRJpWm8F3guhaqrJT/JOat6pwpbXEk6kv+b9DMIjsQ==}
+    dev: false
+
+  /ms@2.0.0:
+    resolution: {integrity: sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A==}
+    dev: false
+
+  /ms@2.1.3:
+    resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==}
+    dev: false
+
+  /nanoid@3.3.11:
+    resolution: {integrity: sha512-N8SpfPUnUp1bK+PMYW8qSWdl9U+wwNWI4QKxOYDy9JAro3WMX7p2OeVRF9v+347pnakNevPmiHhNmZ2HbFA76w==}
+    engines: {node: ^10 || ^12 || ^13.7 || ^14 || >=15.0.1}
+    hasBin: true
+    dev: false
+
+  /negotiator@0.6.4:
+    resolution: {integrity: sha512-myRT3DiWPHqho5PrJaIRyaMv2kgYf0mUVgBNOYMuCH5Ki1yEiQaf/ZJuQ62nvpc44wL5WDbTX7yGJi1Neevw8w==}
+    engines: {node: '>= 0.6'}
+    dev: false
+
+  /node-releases@2.0.19:
+    resolution: {integrity: sha512-xxOWJsBKtzAq7DY0J+DTzuz58K8e7sJbdgwkbMWQe8UYB6ekmsQ45q0M/tJDsGaZmbC+l7n57UV8Hl5tHxO9uw==}
+    dev: false
+
+  /normalize-range@0.1.2:
+    resolution: {integrity: sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==}
+    engines: {node: '>=0.10.0'}
+    dev: false
+
+  /npm-run-path@4.0.1:
+    resolution: {integrity: sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==}
+    engines: {node: '>=8'}
+    dependencies:
+      path-key: 3.1.1
+    dev: false
+
+  /nth-check@2.1.1:
+    resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
+    dependencies:
+      boolbase: 1.0.0
+    dev: false
+
+  /object-keys@1.1.1:
+    resolution: {integrity: sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==}
+    engines: {node: '>= 0.4'}
+    dev: true
+
+  /on-finished@2.4.1:
+    resolution: {integrity: sha512-oVlzkg3ENAhCk2zdv7IJwd/QUD4z2RxRwpkcGY8psCVcCYZNq4wYnVWALHM+brtuJjePWiYF/ClmuDr8Ch5+kg==}
+    engines: {node: '>= 0.8'}
+    dependencies:
+      ee-first: 1.1.1
+    dev: false
+
+  /on-headers@1.1.0:
+    resolution: {integrity: sha512-737ZY3yNnXy37FHkQxPzt4UZ2UWPWiCZWLvFZ4fu5cueciegX0zGPnrlY6bwRg4FdQOe9YU8MkmJwGhoMybl8A==}
+    engines: {node: '>= 0.8'}
+    dev: false
+
+  /once@1.4.0:
+    resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==}
+    dependencies:
+      wrappy: 1.0.2
+    dev: true
+
+  /onetime@5.1.2:
+    resolution: {integrity: sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==}
+    engines: {node: '>=6'}
+    dependencies:
+      mimic-fn: 2.1.0
+    dev: false
+
+  /oniguruma-to-es@2.3.0:
+    resolution: {integrity: sha512-bwALDxriqfKGfUufKGGepCzu9x7nJQuoRoAFp4AnwehhC2crqrDIAP/uN2qdlsAvSMpeRC3+Yzhqc7hLmle5+g==}
+    dependencies:
+      emoji-regex-xs: 1.0.0
+      regex: 5.1.1
+      regex-recursion: 5.1.1
+    dev: false
+
+  /open@7.4.2:
+    resolution: {integrity: sha512-MVHddDVweXZF3awtlAS+6pgKLlm/JgxZ90+/NBurBoQctVOOB/zDdVjcyPzQ+0laDGbsWgrRkflI65sQeOgT9Q==}
+    engines: {node: '>=8'}
+    dependencies:
+      is-docker: 2.2.1
+      is-wsl: 2.2.0
+    dev: true
+
+  /ora@7.0.1:
+    resolution: {integrity: sha512-0TUxTiFJWv+JnjWm4o9yvuskpEJLXTcng8MJuKd+SzAzp2o+OP3HWqNhB4OdJRt1Vsd9/mR0oyaEYlOnL7XIRw==}
+    engines: {node: '>=16'}
+    dependencies:
+      chalk: 5.5.0
+      cli-cursor: 4.0.0
+      cli-spinners: 2.9.2
+      is-interactive: 2.0.0
+      is-unicode-supported: 1.3.0
+      log-symbols: 5.1.0
+      stdin-discarder: 0.1.0
+      string-width: 6.1.0
+      strip-ansi: 7.1.0
+    dev: false
+
+  /os-tmpdir@1.0.2:
+    resolution: {integrity: sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g==}
+    engines: {node: '>=0.10.0'}
+    dev: true
+
+  /p-limit@3.1.0:
+    resolution: {integrity: sha512-TYOanM3wGwNGsZN2cVTYPArw454xnXj5qmWF1bEoAc4+cU/ol7GVh7odevjp1FNHduHc3KZMcFduxU5Xc6uJRQ==}
+    engines: {node: '>=10'}
+    dependencies:
+      yocto-queue: 0.1.0
+    dev: false
+
+  /p-limit@5.0.0:
+    resolution: {integrity: sha512-/Eaoq+QyLSiXQ4lyYV23f14mZRQcXnxfHrN0vCai+ak9G0pp9iEQukIIZq5NccEvwRB8PUnZT0KsOoDCINS1qQ==}
+    engines: {node: '>=18'}
+    dependencies:
+      yocto-queue: 1.2.1
+    dev: false
+
+  /p-locate@5.0.0:
+    resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==}
+    engines: {node: '>=10'}
+    dependencies:
+      p-limit: 3.1.0
+    dev: false
+
+  /package-json-from-dist@1.0.1:
+    resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
+    dev: true
+
+  /package-manager-detector@1.3.0:
+    resolution: {integrity: sha512-ZsEbbZORsyHuO00lY1kV3/t72yp6Ysay6Pd17ZAlNGuGwmWDLCJxFpRs0IzfXfj1o4icJOkUEioexFHzyPurSQ==}
+    dev: false
+
+  /parse-entities@4.0.2:
+    resolution: {integrity: sha512-GG2AQYWoLgL877gQIKeRPGO1xF9+eG1ujIb5soS5gPvLQ1y2o8FL90w2QWNdf9I361Mpp7726c+lj3U0qK1uGw==}
+    dependencies:
+      '@types/unist': 2.0.11
+      character-entities-legacy: 3.0.0
+      character-reference-invalid: 2.0.1
+      decode-named-character-reference: 1.2.0
+      is-alphanumerical: 2.0.1
+      is-decimal: 2.0.1
+      is-hexadecimal: 2.0.1
+    dev: false
+
+  /parse5@7.3.0:
+    resolution: {integrity: sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==}
+    dependencies:
+      entities: 6.0.1
+    dev: false
+
+  /parseurl@1.3.3:
+    resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==}
+    engines: {node: '>= 0.8'}
+    dev: false
+
+  /patch-package@8.0.0:
+    resolution: {integrity: sha512-da8BVIhzjtgScwDJ2TtKsfT5JFWz1hYoBl9rUQ1f38MC2HwnEIkK8VN3dKMKcP7P7bvvgzNDbfNHtx3MsQb5vA==}
+    engines: {node: '>=14', npm: '>5'}
+    hasBin: true
+    dependencies:
+      '@yarnpkg/lockfile': 1.1.0
+      chalk: 4.1.2
+      ci-info: 3.9.0
+      cross-spawn: 7.0.6
+      find-yarn-workspace-root: 2.0.0
+      fs-extra: 9.1.0
+      json-stable-stringify: 1.3.0
+      klaw-sync: 6.0.0
+      minimist: 1.2.8
+      open: 7.4.2
+      rimraf: 2.7.1
+      semver: 7.7.2
+      slash: 2.0.0
+      tmp: 0.0.33
+      yaml: 2.8.1
+    dev: true
+
+  /path-data-parser@0.1.0:
+    resolution: {integrity: sha512-NOnmBpt5Y2RWbuv0LMzsayp3lVylAHLPUTut412ZA3l+C4uw4ZVkQbjShYCQ8TCpUMdPapr4YjUqLYD6v68j+w==}
+    dev: false
+
+  /path-exists@4.0.0:
+    resolution: {integrity: sha512-ak9Qy5Q7jYb2Wwcey5Fpvg2KoAc/ZIhLSLOSBmRmygPsGwkVVt0fZa0qrtMz+m6tJTAHfZQ8FnmB4MG4LWy7/w==}
+    engines: {node: '>=8'}
+    dev: false
+
+  /path-is-absolute@1.0.1:
+    resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==}
+    engines: {node: '>=0.10.0'}
+    dev: true
+
+  /path-key@3.1.1:
+    resolution: {integrity: sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==}
+    engines: {node: '>=8'}
+
+  /path-scurry@2.0.0:
+    resolution: {integrity: sha512-ypGJsmGtdXUOeM5u93TyeIEfEhM6s+ljAhrk5vAvSx8uyY/02OvrZnA0YNGUrPXfpJMgI1ODd3nwz8Npx4O4cg==}
+    engines: {node: 20 || >=22}
+    dependencies:
+      lru-cache: 11.1.0
+      minipass: 7.1.2
+    dev: true
+
+  /path-type@6.0.0:
+    resolution: {integrity: sha512-Vj7sf++t5pBD637NSfkxpHSMfWaeig5+DKWLhcqIYx6mWQz5hdJTGDVMQiJcw1ZYkhs7AazKDGpRVji1LJCZUQ==}
+    engines: {node: '>=18'}
+    dev: false
+
+  /pathe@2.0.3:
+    resolution: {integrity: sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w==}
+    dev: false
+
+  /picocolors@1.1.1:
+    resolution: {integrity: sha512-xceH2snhtb5M9liqDsmEw56le376mTZkEX/jEb/RxNFyegNul7eNslCXP9FDj/Lcu0X8KEyMceP2ntpaHrDEVA==}
+    dev: false
+
+  /picomatch@2.3.1:
+    resolution: {integrity: sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==}
+    engines: {node: '>=8.6'}
+
+  /picomatch@4.0.3:
+    resolution: {integrity: sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q==}
+    engines: {node: '>=12'}
+    dev: false
+
+  /pkg-types@1.3.1:
+    resolution: {integrity: sha512-/Jm5M4RvtBFVkKWRu2BLUTNP8/M2a+UwuAX+ae4770q1qVGtfjG+WTCupoZixokjmHiry8uI+dlY8KXYV5HVVQ==}
+    dependencies:
+      confbox: 0.1.8
+      mlly: 1.7.4
+      pathe: 2.0.3
+    dev: false
+
+  /pkg-types@2.2.0:
+    resolution: {integrity: sha512-2SM/GZGAEkPp3KWORxQZns4M+WSeXbC2HEvmOIJe3Cmiv6ieAJvdVhDldtHqM5J1Y7MrR1XhkBT/rMlhh9FdqQ==}
+    dependencies:
+      confbox: 0.2.2
+      exsolve: 1.0.7
+      pathe: 2.0.3
+    dev: false
+
+  /playwright-core@1.54.2:
+    resolution: {integrity: sha512-n5r4HFbMmWsB4twG7tJLDN9gmBUeSPcsBZiWSE4DnYz9mJMAFqr2ID7+eGC9kpEnxExJ1epttwR59LEWCk8mtA==}
+    engines: {node: '>=18'}
+    hasBin: true
+    dev: false
+
+  /playwright@1.54.2:
+    resolution: {integrity: sha512-Hu/BMoA1NAdRUuulyvQC0pEqZ4vQbGfn8f7wPXcnqQmM+zct9UliKxsIkLNmz/ku7LElUNqmaiv1TG/aL5ACsw==}
+    engines: {node: '>=18'}
+    hasBin: true
+    dependencies:
+      playwright-core: 1.54.2
+    optionalDependencies:
+      fsevents: 2.3.2
+    dev: false
+
+  /points-on-curve@0.2.0:
+    resolution: {integrity: sha512-0mYKnYYe9ZcqMCWhUjItv/oHjvgEsfKvnUTg8sAtnHr3GVy7rGkXCb6d5cSyqrWqL4k81b9CPg3urd+T7aop3A==}
+    dev: false
+
+  /points-on-path@0.2.1:
+    resolution: {integrity: sha512-25ClnWWuw7JbWZcgqY/gJ4FQWadKxGWk+3kR/7kD0tCaDtPPMj7oHu2ToLaVhfpnHrZzYby2w6tUA0eOIuUg8g==}
+    dependencies:
+      path-data-parser: 0.1.0
+      points-on-curve: 0.2.0
+    dev: false
+
+  /postcss-value-parser@4.2.0:
+    resolution: {integrity: sha512-1NNCs6uurfkVbeXG4S8JFT9t19m45ICnif8zWLd5oPSZ50QnwMfK+H3jv408d4jw/7Bttv5axS5IiHoLaVNHeQ==}
+    dev: false
+
+  /postcss@8.5.6:
+    resolution: {integrity: sha512-3Ybi1tAuwAP9s0r1UQ2J4n5Y0G05bJkpUIO0/bI9MhwmD70S5aTWbXGBwxHrelT+XM1k6dM0pk+SwNkpTRN7Pg==}
+    engines: {node: ^10 || ^12 || >=14}
+    dependencies:
+      nanoid: 3.3.11
+      picocolors: 1.1.1
+      source-map-js: 1.2.1
+    dev: false
+
+  /postinstall-postinstall@2.1.0:
+    resolution: {integrity: sha512-7hQX6ZlZXIoRiWNrbMQaLzUUfH+sSx39u8EJ9HYuDc1kLo9IXKWjM5RSquZN1ad5GnH8CGFM78fsAAQi3OKEEQ==}
+    requiresBuild: true
+    dev: true
+
+  /property-information@6.5.0:
+    resolution: {integrity: sha512-PgTgs/BlvHxOu8QuEN7wi5A0OmXaBcHpmCSTehcs6Uuu9IkDIEo13Hy7n898RHfrQ49vKCoGeWZSaAK01nwVig==}
+    dev: false
+
+  /property-information@7.1.0:
+    resolution: {integrity: sha512-TwEZ+X+yCJmYfL7TPUOcvBZ4QfoT5YenQiJuX//0th53DE6w0xxLEtfK3iyryQFddXuvkIk51EEgrJQ0WJkOmQ==}
+    dev: false
+
+  /quansync@0.2.11:
+    resolution: {integrity: sha512-AifT7QEbW9Nri4tAwR5M/uzpBuqfZf+zwaEM/QkzEjj7NBuFD2rBuy0K3dE+8wltbezDV7JMA0WfnCPYRSYbXA==}
+    dev: false
+
+  /queue-microtask@1.2.3:
+    resolution: {integrity: sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==}
+    dev: false
+
+  /radix-ui@1.4.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-aWizCQiyeAenIdUbqEpXgRA1ya65P13NKn/W8rWkcN0OPkRDxdBVLWnIEDsS2RpwCK2nobI7oMUSmexzTDyAmA==}
+    peerDependencies:
+      '@types/react': '*'
+      '@types/react-dom': '*'
+      react: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+      react-dom: ^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+      '@types/react-dom':
+        optional: true
+    dependencies:
+      '@radix-ui/primitive': 1.1.3
+      '@radix-ui/react-accessible-icon': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-accordion': 1.2.12(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-alert-dialog': 1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-arrow': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-aspect-ratio': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-avatar': 1.1.10(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-checkbox': 1.3.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-collapsible': 1.1.12(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-collection': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-compose-refs': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context': 1.1.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-context-menu': 2.2.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-dialog': 1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-direction': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-dismissable-layer': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-dropdown-menu': 2.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-focus-guards': 1.1.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-focus-scope': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-form': 0.1.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-hover-card': 1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-label': 2.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-menu': 2.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-menubar': 1.1.16(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-navigation-menu': 1.2.14(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-one-time-password-field': 0.1.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-password-toggle-field': 0.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-popover': 1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-popper': 1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-portal': 1.1.9(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-presence': 1.1.5(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-primitive': 2.1.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-progress': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-radio-group': 1.3.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-roving-focus': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-scroll-area': 1.2.10(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-select': 2.2.6(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-separator': 1.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slider': 1.3.6(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-slot': 1.2.3(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-switch': 1.2.6(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-tabs': 1.1.13(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-toast': 1.2.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-toggle': 1.1.10(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-toggle-group': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-toolbar': 1.1.11(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-tooltip': 1.2.8(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-use-callback-ref': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-controllable-state': 1.2.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-effect-event': 0.0.2(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-escape-keydown': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-is-hydrated': 0.1.0(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-layout-effect': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-use-size': 1.1.1(@types/react@19.1.10)(react@19.1.1)
+      '@radix-ui/react-visually-hidden': 1.2.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /range-parser@1.2.1:
+    resolution: {integrity: sha512-Hrgsx+orqoygnmhFbKaHE6c296J+HTAQXoxEF6gNupROmmGJRoyzfG3ccAveqCBrwr/2yxQ5BVd/GTl5agOwSg==}
+    engines: {node: '>= 0.6'}
+    dev: false
+
+  /react-dom@19.1.1(react@19.1.1):
+    resolution: {integrity: sha512-Dlq/5LAZgF0Gaz6yiqZCf6VCcZs1ghAJyrsu84Q/GT0gV+mCxbfmKNoGRKBYMJ8IEdGPqu49YWXD02GCknEDkw==}
+    peerDependencies:
+      react: ^19.1.1
+    dependencies:
+      react: 19.1.1
+      scheduler: 0.26.0
+    dev: false
+
+  /react-intersection-observer@9.16.0(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-w9nJSEp+DrW9KmQmeWHQyfaP6b03v+TdXynaoA964Wxt7mdR3An11z4NNCQgL4gKSK7y1ver2Fq+JKH6CWEzUA==}
+    peerDependencies:
+      react: ^17.0.0 || ^18.0.0 || ^19.0.0
+      react-dom: ^17.0.0 || ^18.0.0 || ^19.0.0
+    peerDependenciesMeta:
+      react-dom:
+        optional: true
+    dependencies:
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+    dev: false
+
+  /react-refresh@0.17.0:
+    resolution: {integrity: sha512-z6F7K9bV85EfseRCp2bzrpyQ0Gkw1uLoCel9XBVWPg/TjRj94SkJzUTGfOa4bs7iJvBWtQG0Wq7wnI0syw3EBQ==}
+    engines: {node: '>=0.10.0'}
+    dev: false
+
+  /react-remove-scroll-bar@2.3.8(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-9r+yi9+mgU33AKcj6IbT9oRCO78WriSj6t/cF8DWBZJ9aOGPOTEDvdUDz1FwKim7QXWwmHqtdHnRJfhAxEG46Q==}
+    engines: {node: '>=10'}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-style-singleton: 2.2.3(@types/react@19.1.10)(react@19.1.1)
+      tslib: 2.8.1
+    dev: false
+
+  /react-remove-scroll@2.7.1(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-HpMh8+oahmIdOuS5aFKKY6Pyog+FNaZV/XyJOq7b4YFwsFHe5yYfdbIalI4k3vU2nSDql7YskmUseHsRrJqIPA==}
+    engines: {node: '>=10'}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+      react-remove-scroll-bar: 2.3.8(@types/react@19.1.10)(react@19.1.1)
+      react-style-singleton: 2.2.3(@types/react@19.1.10)(react@19.1.1)
+      tslib: 2.8.1
+      use-callback-ref: 1.3.3(@types/react@19.1.10)(react@19.1.1)
+      use-sidecar: 1.1.3(@types/react@19.1.10)(react@19.1.1)
+    dev: false
+
+  /react-router@7.8.1(react-dom@19.1.1)(react@19.1.1):
+    resolution: {integrity: sha512-5cy/M8DHcG51/KUIka1nfZ2QeylS4PJRs6TT8I4PF5axVsI5JUxp0hC0NZ/AEEj8Vw7xsEoD7L/6FY+zoYaOGA==}
+    engines: {node: '>=20.0.0'}
+    peerDependencies:
+      react: '>=18'
+      react-dom: '>=18'
+    peerDependenciesMeta:
+      react-dom:
+        optional: true
+    dependencies:
+      cookie: 1.0.2
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+      set-cookie-parser: 2.7.1
+    dev: false
+
+  /react-style-singleton@2.2.3(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-b6jSvxvVnyptAiLjbkWLE/lOnR4lfTtDAl+eUC7RZy+QQWc6wRzIV2CE6xBuMmDxc2qIihtDCZD5NPOFl7fRBQ==}
+    engines: {node: '>=10'}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      get-nonce: 1.0.1
+      react: 19.1.1
+      tslib: 2.8.1
+    dev: false
+
+  /react@19.1.1:
+    resolution: {integrity: sha512-w8nqGImo45dmMIfljjMwOGtbmC/mk4CMYhWIicdSflH91J9TyCyczcPFXJzrZ/ZXcgGRFeP6BU0BEJTw6tZdfQ==}
+    engines: {node: '>=0.10.0'}
+    dev: false
+
+  /readable-stream@3.6.2:
+    resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==}
+    engines: {node: '>= 6'}
+    dependencies:
+      inherits: 2.0.4
+      string_decoder: 1.3.0
+      util-deprecate: 1.0.2
+    dev: false
+
+  /recma-build-jsx@1.0.0:
+    resolution: {integrity: sha512-8GtdyqaBcDfva+GUKDr3nev3VpKAhup1+RvkMvUxURHpW7QyIvk9F5wz7Vzo06CEMSilw6uArgRqhpiUcWp8ew==}
+    dependencies:
+      '@types/estree': 1.0.8
+      estree-util-build-jsx: 3.0.1
+      vfile: 6.0.3
+    dev: false
+
+  /recma-jsx@1.0.1(acorn@8.15.0):
+    resolution: {integrity: sha512-huSIy7VU2Z5OLv6oFLosQGGDqPqdO1iq6bWNAdhzMxSJP7RAso4fCZ1cKu8j9YHCZf3TPrq4dw3okhrylgcd7w==}
+    peerDependencies:
+      acorn: ^6.0.0 || ^7.0.0 || ^8.0.0
+    dependencies:
+      acorn: 8.15.0
+      acorn-jsx: 5.3.2(acorn@8.15.0)
+      estree-util-to-js: 2.0.0
+      recma-parse: 1.0.0
+      recma-stringify: 1.0.0
+      unified: 11.0.5
+    dev: false
+
+  /recma-parse@1.0.0:
+    resolution: {integrity: sha512-OYLsIGBB5Y5wjnSnQW6t3Xg7q3fQ7FWbw/vcXtORTnyaSFscOtABg+7Pnz6YZ6c27fG1/aN8CjfwoUEUIdwqWQ==}
+    dependencies:
+      '@types/estree': 1.0.8
+      esast-util-from-js: 2.0.1
+      unified: 11.0.5
+      vfile: 6.0.3
+    dev: false
+
+  /recma-stringify@1.0.0:
+    resolution: {integrity: sha512-cjwII1MdIIVloKvC9ErQ+OgAtwHBmcZ0Bg4ciz78FtbT8In39aAYbaA7zvxQ61xVMSPE8WxhLwLbhif4Js2C+g==}
+    dependencies:
+      '@types/estree': 1.0.8
+      estree-util-to-js: 2.0.0
+      unified: 11.0.5
+      vfile: 6.0.3
+    dev: false
+
+  /regex-recursion@5.1.1:
+    resolution: {integrity: sha512-ae7SBCbzVNrIjgSbh7wMznPcQel1DNlDtzensnFxpiNpXt1U2ju/bHugH422r+4LAVS1FpW1YCwilmnNsjum9w==}
+    dependencies:
+      regex: 5.1.1
+      regex-utilities: 2.3.0
+    dev: false
+
+  /regex-utilities@2.3.0:
+    resolution: {integrity: sha512-8VhliFJAWRaUiVvREIiW2NXXTmHs4vMNnSzuJVhscgmGav3g9VDxLrQndI3dZZVVdp0ZO/5v0xmX516/7M9cng==}
+    dev: false
+
+  /regex@5.1.1:
+    resolution: {integrity: sha512-dN5I359AVGPnwzJm2jN1k0W9LPZ+ePvoOeVMMfqIMFz53sSwXkxaJoxr50ptnsC771lK95BnTrVSZxq0b9yCGw==}
+    dependencies:
+      regex-utilities: 2.3.0
+    dev: false
+
+  /rehype-autolink-headings@7.1.0:
+    resolution: {integrity: sha512-rItO/pSdvnvsP4QRB1pmPiNHUskikqtPojZKJPPPAVx9Hj8i8TwMBhofrrAYRhYOOBZH9tgmG5lPqDLuIWPWmw==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@ungap/structured-clone': 1.3.0
+      hast-util-heading-rank: 3.0.0
+      hast-util-is-element: 3.0.0
+      unified: 11.0.5
+      unist-util-visit: 5.0.0
+    dev: false
+
+  /rehype-class-names@2.0.0:
+    resolution: {integrity: sha512-jldCIiAEvXKdq8hqr5f5PzNdIDkvHC6zfKhwta9oRoMu7bn0W7qLES/JrrjBvr9rKz3nJ8x4vY1EWI+dhjHVZQ==}
+    dependencies:
+      '@types/hast': 3.0.4
+      hast-util-classnames: 3.0.0
+      hast-util-select: 6.0.4
+      unified: 11.0.5
+    dev: false
+
+  /rehype-katex@7.0.1:
+    resolution: {integrity: sha512-OiM2wrZ/wuhKkigASodFoo8wimG3H12LWQaH8qSPVJn9apWKFSH3YOCtbKpBorTVw/eI7cuT21XBbvwEswbIOA==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@types/katex': 0.16.7
+      hast-util-from-html-isomorphic: 2.0.0
+      hast-util-to-text: 4.0.2
+      katex: 0.16.22
+      unist-util-visit-parents: 6.0.1
+      vfile: 6.0.3
+    dev: false
+
+  /rehype-mermaid@3.0.0(playwright@1.54.2):
+    resolution: {integrity: sha512-fxrD5E4Fa1WXUjmjNDvLOMT4XB1WaxcfycFIWiYU0yEMQhcTDElc9aDFnbDFRLxG1Cfo1I3mfD5kg4sjlWaB+Q==}
+    peerDependencies:
+      playwright: '1'
+    peerDependenciesMeta:
+      playwright:
+        optional: true
+    dependencies:
+      '@types/hast': 3.0.4
+      hast-util-from-html-isomorphic: 2.0.0
+      hast-util-to-text: 4.0.2
+      mermaid-isomorphic: 3.0.4(playwright@1.54.2)
+      mini-svg-data-uri: 1.4.4
+      playwright: 1.54.2
+      space-separated-tokens: 2.0.2
+      unified: 11.0.5
+      unist-util-visit-parents: 6.0.1
+      vfile: 6.0.3
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /rehype-recma@1.0.0:
+    resolution: {integrity: sha512-lqA4rGUf1JmacCNWWZx0Wv1dHqMwxzsDWYMTowuplHF3xH0N/MmrZ/G3BDZnzAkRmxDadujCjaKM2hqYdCBOGw==}
+    dependencies:
+      '@types/estree': 1.0.8
+      '@types/hast': 3.0.4
+      hast-util-to-estree: 3.1.3
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /rehype-slug@6.0.0:
+    resolution: {integrity: sha512-lWyvf/jwu+oS5+hL5eClVd3hNdmwM1kAC0BUvEGD19pajQMIzcNUd/k9GsfQ+FfECvX+JE+e9/btsKH0EjJT6A==}
+    dependencies:
+      '@types/hast': 3.0.4
+      github-slugger: 2.0.0
+      hast-util-heading-rank: 3.0.0
+      hast-util-to-string: 3.0.1
+      unist-util-visit: 5.0.0
+    dev: false
+
+  /remark-directive@3.0.1:
+    resolution: {integrity: sha512-gwglrEQEZcZYgVyG1tQuA+h58EZfq5CSULw7J90AFuCTyib1thgHPoqQ+h9iFvU6R+vnZ5oNFQR5QKgGpk741A==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      mdast-util-directive: 3.1.0
+      micromark-extension-directive: 3.0.2
+      unified: 11.0.5
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /remark-frontmatter@5.0.0:
+    resolution: {integrity: sha512-XTFYvNASMe5iPN0719nPrdItC9aU0ssC4v14mH1BCi1u0n1gAocqcujWUrByftZTbLhRtiKRyjYTSIOcr69UVQ==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      mdast-util-frontmatter: 2.0.1
+      micromark-extension-frontmatter: 2.0.0
+      unified: 11.0.5
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /remark-gfm@4.0.1:
+    resolution: {integrity: sha512-1quofZ2RQ9EWdeN34S79+KExV1764+wCUGop5CPL1WGdD0ocPpu91lzPGbwWMECpEpd42kJGQwzRfyov9j4yNg==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      mdast-util-gfm: 3.1.0
+      micromark-extension-gfm: 3.0.0
+      remark-parse: 11.0.0
+      remark-stringify: 11.0.0
+      unified: 11.0.5
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /remark-math@6.0.0:
+    resolution: {integrity: sha512-MMqgnP74Igy+S3WwnhQ7kqGlEerTETXMvJhrUzDikVZ2/uogJCb+WHUg97hK9/jcfc0dkD73s3LN8zU49cTEtA==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      mdast-util-math: 3.0.0
+      micromark-extension-math: 3.1.0
+      unified: 11.0.5
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /remark-mdx-disable-explicit-jsx@0.1.0:
+    resolution: {integrity: sha512-NC7NUbu4bExZnsWDTJE3UhBRZujW3gyqMufhTHn2GHhZ5LetWzyieyuZerBPdSniLx4d7QKDbf+d3u/qmMGyaQ==}
+    dependencies:
+      '@types/hast': 2.3.10
+      unified: 10.1.2
+      unist-util-visit: 4.1.2
+    dev: false
+
+  /remark-mdx-frontmatter@5.2.0:
+    resolution: {integrity: sha512-U/hjUYTkQqNjjMRYyilJgLXSPF65qbLPdoESOkXyrwz2tVyhAnm4GUKhfXqOOS9W34M3545xEMq+aMpHgVjEeQ==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      estree-util-value-to-estree: 3.4.0
+      toml: 3.0.0
+      unified: 11.0.5
+      unist-util-mdx-define: 1.1.2
+      yaml: 2.8.1
+    dev: false
+
+  /remark-mdx@3.1.0:
+    resolution: {integrity: sha512-Ngl/H3YXyBV9RcRNdlYsZujAmhsxwzxpDzpDEhFBVAGthS4GDgnctpDjgFl/ULx5UEDzqtW1cyBSNKqYYrqLBA==}
+    dependencies:
+      mdast-util-mdx: 3.0.0
+      micromark-extension-mdxjs: 3.0.0
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /remark-parse@11.0.0:
+    resolution: {integrity: sha512-FCxlKLNGknS5ba/1lmpYijMUzX2esxW5xQqjWxw2eHFfS2MSdaHVINFmhjo+qN1WhZhNimq0dZATN9pH0IDrpA==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      mdast-util-from-markdown: 2.0.2
+      micromark-util-types: 2.0.2
+      unified: 11.0.5
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /remark-rehype@11.1.2:
+    resolution: {integrity: sha512-Dh7l57ianaEoIpzbp0PC9UKAdCSVklD8E5Rpw7ETfbTl3FqcOOgq5q2LVDhgGCkaBv7p24JXikPdvhhmHvKMsw==}
+    dependencies:
+      '@types/hast': 3.0.4
+      '@types/mdast': 4.0.4
+      mdast-util-to-hast: 13.2.0
+      unified: 11.0.5
+      vfile: 6.0.3
+    dev: false
+
+  /remark-stringify@11.0.0:
+    resolution: {integrity: sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw==}
+    dependencies:
+      '@types/mdast': 4.0.4
+      mdast-util-to-markdown: 2.1.2
+      unified: 11.0.5
+    dev: false
+
+  /require-like@0.1.2:
+    resolution: {integrity: sha512-oyrU88skkMtDdauHDuKVrgR+zuItqr6/c//FXzvmxRGMexSDc6hNvJInGW3LL46n+8b50RykrvwSUIIQH2LQ5A==}
+    dev: false
+
+  /restore-cursor@4.0.0:
+    resolution: {integrity: sha512-I9fPXU9geO9bHOt9pHHOhOkYerIMsmVaWB0rA2AI9ERh/+x/i7MV5HKBNrg+ljO5eoPVgCcnFuRjJ9uH6I/3eg==}
+    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
+    dependencies:
+      onetime: 5.1.2
+      signal-exit: 3.0.7
+    dev: false
+
+  /reusify@1.1.0:
+    resolution: {integrity: sha512-g6QUff04oZpHs0eG5p83rFLhHeV00ug/Yf9nZM6fLeUrPguBTkTQOdpAWWspMh55TZfVQDPaN3NQJfbVRAxdIw==}
+    engines: {iojs: '>=1.0.0', node: '>=0.10.0'}
+    dev: false
+
+  /rimraf@2.7.1:
+    resolution: {integrity: sha512-uWjbaKIK3T1OSVptzX7Nl6PvQ3qAGtKEtVRjRuazjfL3Bx5eI409VZSqgND+4UNnmzLVdPj9FqFJNPqBZFve4w==}
+    deprecated: Rimraf versions prior to v4 are no longer supported
+    hasBin: true
+    dependencies:
+      glob: 7.2.3
+    dev: true
+
+  /robust-predicates@3.0.2:
+    resolution: {integrity: sha512-IXgzBWvWQwE6PrDI05OvmXUIruQTcoMDzRsOd5CDvHCVLcLHMTSYvOK5Cm46kWqlV3yAbuSpBZdJ5oP5OUoStg==}
+    dev: false
+
+  /rollup@4.46.2:
+    resolution: {integrity: sha512-WMmLFI+Boh6xbop+OAGo9cQ3OgX9MIg7xOQjn+pTCwOkk+FNDAeAemXkJ3HzDJrVXleLOFVa1ipuc1AmEx1Dwg==}
+    engines: {node: '>=18.0.0', npm: '>=8.0.0'}
+    hasBin: true
+    dependencies:
+      '@types/estree': 1.0.8
+    optionalDependencies:
+      '@rollup/rollup-android-arm-eabi': 4.46.2
+      '@rollup/rollup-android-arm64': 4.46.2
+      '@rollup/rollup-darwin-arm64': 4.46.2
+      '@rollup/rollup-darwin-x64': 4.46.2
+      '@rollup/rollup-freebsd-arm64': 4.46.2
+      '@rollup/rollup-freebsd-x64': 4.46.2
+      '@rollup/rollup-linux-arm-gnueabihf': 4.46.2
+      '@rollup/rollup-linux-arm-musleabihf': 4.46.2
+      '@rollup/rollup-linux-arm64-gnu': 4.46.2
+      '@rollup/rollup-linux-arm64-musl': 4.46.2
+      '@rollup/rollup-linux-loongarch64-gnu': 4.46.2
+      '@rollup/rollup-linux-ppc64-gnu': 4.46.2
+      '@rollup/rollup-linux-riscv64-gnu': 4.46.2
+      '@rollup/rollup-linux-riscv64-musl': 4.46.2
+      '@rollup/rollup-linux-s390x-gnu': 4.46.2
+      '@rollup/rollup-linux-x64-gnu': 4.46.2
+      '@rollup/rollup-linux-x64-musl': 4.46.2
+      '@rollup/rollup-win32-arm64-msvc': 4.46.2
+      '@rollup/rollup-win32-ia32-msvc': 4.46.2
+      '@rollup/rollup-win32-x64-msvc': 4.46.2
+      fsevents: 2.3.3
+    dev: false
+
+  /roughjs@4.6.6:
+    resolution: {integrity: sha512-ZUz/69+SYpFN/g/lUlo2FXcIjRkSu3nDarreVdGGndHEBJ6cXPdKguS8JGxwj5HA5xIbVKSmLgr5b3AWxtRfvQ==}
+    dependencies:
+      hachure-fill: 0.5.2
+      path-data-parser: 0.1.0
+      points-on-curve: 0.2.0
+      points-on-path: 0.2.1
+    dev: false
+
+  /run-parallel@1.2.0:
+    resolution: {integrity: sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==}
+    dependencies:
+      queue-microtask: 1.2.3
+    dev: false
+
+  /rw@1.3.3:
+    resolution: {integrity: sha512-PdhdWy89SiZogBLaw42zdeqtRJ//zFd2PgQavcICDUgJT5oW10QCRKbJ6bg4r0/UY2M6BWd5tkxuGFRvCkgfHQ==}
+    dev: false
+
+  /safe-buffer@5.2.1:
+    resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==}
+    dev: false
+
+  /safer-buffer@2.1.2:
+    resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==}
+    dev: false
+
+  /scheduler@0.26.0:
+    resolution: {integrity: sha512-NlHwttCI/l5gCPR3D1nNXtWABUmBwvZpEQiD4IXSbIDq8BzLIK/7Ir5gTFSGZDUu37K5cMNp0hFtzO38sC7gWA==}
+    dev: false
+
+  /semver@6.3.1:
+    resolution: {integrity: sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA==}
+    hasBin: true
+    dev: false
+
+  /semver@7.7.2:
+    resolution: {integrity: sha512-RF0Fw+rO5AMf9MAyaRXI4AV0Ulj5lMHqVxxdSgiVbixSCXoEmmX/jk0CuJw4+3SqroYO9VoUh+HcuJivvtJemA==}
+    engines: {node: '>=10'}
+    hasBin: true
+    dev: true
+
+  /send@0.19.0:
+    resolution: {integrity: sha512-dW41u5VfLXu8SJh5bwRmyYUbAoSB3c9uQh6L8h/KtsFREPWpbX1lrljJo186Jc4nmci/sGUZ9a0a0J2zgfq2hw==}
+    engines: {node: '>= 0.8.0'}
+    dependencies:
+      debug: 2.6.9
+      depd: 2.0.0
+      destroy: 1.2.0
+      encodeurl: 1.0.2
+      escape-html: 1.0.3
+      etag: 1.8.1
+      fresh: 0.5.2
+      http-errors: 2.0.0
+      mime: 1.6.0
+      ms: 2.1.3
+      on-finished: 2.4.1
+      range-parser: 1.2.1
+      statuses: 2.0.1
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /serve-static@1.16.2:
+    resolution: {integrity: sha512-VqpjJZKadQB/PEbEwvFdO43Ax5dFBZ2UECszz8bQ7pi7wt//PWe1P6MN7eCnjsatYtBT6EuiClbjSWP2WrIoTw==}
+    engines: {node: '>= 0.8.0'}
+    dependencies:
+      encodeurl: 2.0.0
+      escape-html: 1.0.3
+      parseurl: 1.3.3
+      send: 0.19.0
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /set-cookie-parser@2.7.1:
+    resolution: {integrity: sha512-IOc8uWeOZgnb3ptbCURJWNjWUPcO3ZnTTdzsurqERrP6nPyv+paC55vJM0LpOlT2ne+Ix+9+CRG1MNLlyZ4GjQ==}
+    dev: false
+
+  /set-function-length@1.2.2:
+    resolution: {integrity: sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==}
+    engines: {node: '>= 0.4'}
+    dependencies:
+      define-data-property: 1.1.4
+      es-errors: 1.3.0
+      function-bind: 1.1.2
+      get-intrinsic: 1.3.0
+      gopd: 1.2.0
+      has-property-descriptors: 1.0.2
+    dev: true
+
+  /setprototypeof@1.2.0:
+    resolution: {integrity: sha512-E5LDX7Wrp85Kil5bhZv46j8jOeboKq5JMmYM3gVGdGH8xFpPWXUMsNrlODCrkoxMEeNi/XZIwuRvY4XNwYMJpw==}
+    dev: false
+
+  /shebang-command@2.0.0:
+    resolution: {integrity: sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==}
+    engines: {node: '>=8'}
+    dependencies:
+      shebang-regex: 3.0.0
+
+  /shebang-regex@3.0.0:
+    resolution: {integrity: sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==}
+    engines: {node: '>=8'}
+
+  /shiki@1.29.2:
+    resolution: {integrity: sha512-njXuliz/cP+67jU2hukkxCNuH1yUi4QfdZZY+sMr5PPrIyXSu5iTb/qYC4BiWWB0vZ+7TbdvYUCeL23zpwCfbg==}
+    dependencies:
+      '@shikijs/core': 1.29.2
+      '@shikijs/engine-javascript': 1.29.2
+      '@shikijs/engine-oniguruma': 1.29.2
+      '@shikijs/langs': 1.29.2
+      '@shikijs/themes': 1.29.2
+      '@shikijs/types': 1.29.2
+      '@shikijs/vscode-textmate': 10.0.2
+      '@types/hast': 3.0.4
+    dev: false
+
+  /signal-exit@3.0.7:
+    resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==}
+    dev: false
+
+  /signal-exit@4.1.0:
+    resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==}
+    engines: {node: '>=14'}
+    dev: true
+
+  /sisteransi@1.0.5:
+    resolution: {integrity: sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg==}
+    dev: false
+
+  /slash@2.0.0:
+    resolution: {integrity: sha512-ZYKh3Wh2z1PpEXWr0MpSBZ0V6mZHAQfYevttO11c51CaWjGTaadiKZ+wVt1PbMlDV5qhMFslpZCemhwOK7C89A==}
+    engines: {node: '>=6'}
+    dev: true
+
+  /slash@5.1.0:
+    resolution: {integrity: sha512-ZA6oR3T/pEyuqwMgAKT0/hAv8oAXckzbkmR0UkUosQ+Mc4RxGoJkRmwHgHufaenlyAgE1Mxgpdcrf75y6XcnDg==}
+    engines: {node: '>=14.16'}
+    dev: false
+
+  /source-map-js@1.2.1:
+    resolution: {integrity: sha512-UXWMKhLOwVKb728IUtQPXxfYU+usdybtUrK/8uGE8CQMvrhOpwvzDBwj0QhSL7MQc7vIsISBG8VQ8+IDQxpfQA==}
+    engines: {node: '>=0.10.0'}
+    dev: false
+
+  /source-map@0.7.6:
+    resolution: {integrity: sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ==}
+    engines: {node: '>= 12'}
+    dev: false
+
+  /space-separated-tokens@2.0.2:
+    resolution: {integrity: sha512-PEGlAwrG8yXGXRjW32fGbg66JAlOAwbObuqVoJpv/mRgoWDQfgH1wDPvtzWyUSNAXBGSk8h755YDbbcEy3SH2Q==}
+    dev: false
+
+  /statuses@2.0.1:
+    resolution: {integrity: sha512-RwNA9Z/7PrK06rYLIzFMlaF+l73iwpzsqRIFgbMLbTcLD6cOao82TaWefPXQvB2fOC4AjuYSEndS7N/mTCbkdQ==}
+    engines: {node: '>= 0.8'}
+    dev: false
+
+  /stdin-discarder@0.1.0:
+    resolution: {integrity: sha512-xhV7w8S+bUwlPTb4bAOUQhv8/cSS5offJuX8GQGq32ONF0ZtDWKfkdomM3HMRA+LhX6um/FZ0COqlwsjD53LeQ==}
+    engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0}
+    dependencies:
+      bl: 5.1.0
+    dev: false
+
+  /string-width@4.2.3:
+    resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
+    engines: {node: '>=8'}
+    dependencies:
+      emoji-regex: 8.0.0
+      is-fullwidth-code-point: 3.0.0
+      strip-ansi: 6.0.1
+    dev: true
+
+  /string-width@5.1.2:
+    resolution: {integrity: sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==}
+    engines: {node: '>=12'}
+    dependencies:
+      eastasianwidth: 0.2.0
+      emoji-regex: 9.2.2
+      strip-ansi: 7.1.0
+    dev: true
+
+  /string-width@6.1.0:
+    resolution: {integrity: sha512-k01swCJAgQmuADB0YIc+7TuatfNvTBVOoaUWJjTB9R4VJzR5vNWzf5t42ESVZFPS8xTySF7CAdV4t/aaIm3UnQ==}
+    engines: {node: '>=16'}
+    dependencies:
+      eastasianwidth: 0.2.0
+      emoji-regex: 10.4.0
+      strip-ansi: 7.1.0
+    dev: false
+
+  /string_decoder@1.3.0:
+    resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==}
+    dependencies:
+      safe-buffer: 5.2.1
+    dev: false
+
+  /stringify-entities@4.0.4:
+    resolution: {integrity: sha512-IwfBptatlO+QCJUo19AqvrPNqlVMpW9YEL2LIVY+Rpv2qsjCGxaDLNRgeGsQWJhfItebuJhsGSLjaBbNSQ+ieg==}
+    dependencies:
+      character-entities-html4: 2.1.0
+      character-entities-legacy: 3.0.0
+    dev: false
+
+  /strip-ansi@6.0.1:
+    resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
+    engines: {node: '>=8'}
+    dependencies:
+      ansi-regex: 5.0.1
+    dev: true
+
+  /strip-ansi@7.1.0:
+    resolution: {integrity: sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==}
+    engines: {node: '>=12'}
+    dependencies:
+      ansi-regex: 6.1.0
+
+  /strip-final-newline@2.0.0:
+    resolution: {integrity: sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /style-to-js@1.1.17:
+    resolution: {integrity: sha512-xQcBGDxJb6jjFCTzvQtfiPn6YvvP2O8U1MDIPNfJQlWMYfktPy+iGsHE7cssjs7y84d9fQaK4UF3RIJaAHSoYA==}
+    dependencies:
+      style-to-object: 1.0.9
+    dev: false
+
+  /style-to-object@1.0.9:
+    resolution: {integrity: sha512-G4qppLgKu/k6FwRpHiGiKPaPTFcG3g4wNVX/Qsfu+RqQM30E7Tyu/TEgxcL9PNLF5pdRLwQdE3YKKf+KF2Dzlw==}
+    dependencies:
+      inline-style-parser: 0.2.4
+    dev: false
+
+  /stylis@4.3.6:
+    resolution: {integrity: sha512-yQ3rwFWRfwNUY7H5vpU0wfdkNSnvnJinhF9830Swlaxl03zsOjCfmX0ugac+3LtK0lYSgwL/KXc8oYL3mG4YFQ==}
+    dev: false
+
+  /supports-color@7.2.0:
+    resolution: {integrity: sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw==}
+    engines: {node: '>=8'}
+    dependencies:
+      has-flag: 4.0.0
+    dev: true
+
+  /tabbable@6.2.0:
+    resolution: {integrity: sha512-Cat63mxsVJlzYvN51JmVXIgNoUokrIaT2zLclCXjRd8boZ0004U4KCs/sToJ75C6sdlByWxpYnb5Boif1VSFew==}
+    dev: false
+
+  /tailwindcss@4.0.7:
+    resolution: {integrity: sha512-yH5bPPyapavo7L+547h3c4jcBXcrKwybQRjwdEIVAd9iXRvy/3T1CC6XSQEgZtRySjKfqvo3Cc0ZF1DTheuIdA==}
+    dev: false
+
+  /tapable@2.2.2:
+    resolution: {integrity: sha512-Re10+NauLTMCudc7T5WLFLAwDhQ0JWdrMK+9B2M8zR5hRExKmsRDCBA7/aV/pNJFltmBFO5BAMlQFi/vq3nKOg==}
+    engines: {node: '>=6'}
+    dev: false
+
+  /tinyexec@1.0.1:
+    resolution: {integrity: sha512-5uC6DDlmeqiOwCPmK9jMSdOuZTh8bU39Ys6yidB+UTt5hfZUPGAypSgFRiEp+jbi9qH40BLDvy85jIU88wKSqw==}
+    dev: false
+
+  /tinyglobby@0.2.14:
+    resolution: {integrity: sha512-tX5e7OM1HnYr2+a2C/4V0htOcSQcoSTH9KgJnVvNm5zm/cyEWKJ7j7YutsH9CxMdtOkkLFy2AHrMci9IM8IPZQ==}
+    engines: {node: '>=12.0.0'}
+    dependencies:
+      fdir: 6.5.0(picomatch@4.0.3)
+      picomatch: 4.0.3
+    dev: false
+
+  /tmp@0.0.33:
+    resolution: {integrity: sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw==}
+    engines: {node: '>=0.6.0'}
+    dependencies:
+      os-tmpdir: 1.0.2
+    dev: true
+
+  /to-regex-range@5.0.1:
+    resolution: {integrity: sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==}
+    engines: {node: '>=8.0'}
+    dependencies:
+      is-number: 7.0.0
+
+  /toidentifier@1.0.1:
+    resolution: {integrity: sha512-o5sSPKEkg/DIQNmH43V0/uerLrpzVedkUh8tGNvaeXpfpuwjKenlSox/2O/BTlZUtEe+JG7s5YhEz608PlAHRA==}
+    engines: {node: '>=0.6'}
+    dev: false
+
+  /toml@3.0.0:
+    resolution: {integrity: sha512-y/mWCZinnvxjTKYhJ+pYxwD0mRLVvOtdS2Awbgxln6iEnt4rk0yBxeSBHkGJcPucRiG0e55mwWp+g/05rsrd6w==}
+    dev: false
+
+  /trim-lines@3.0.1:
+    resolution: {integrity: sha512-kRj8B+YHZCc9kQYdWfJB2/oUl9rA99qbowYYBtr4ui4mZyAQ2JpvVBd/6U2YloATfqBhBTSMhTpgBHtU0Mf3Rg==}
+    dev: false
+
+  /trough@2.2.0:
+    resolution: {integrity: sha512-tmMpK00BjZiUyVyvrBK7knerNgmgvcV/KLVyuma/SC+TQN167GrMRciANTz09+k3zW8L8t60jWO1GpfkZdjTaw==}
+    dev: false
+
+  /ts-dedent@2.2.0:
+    resolution: {integrity: sha512-q5W7tVM71e2xjHZTlgfTDoPF/SmqKG5hddq9SzR49CH2hayqRKJtQ4mtRlSxKaJlR/+9rEM+mnBHf7I2/BQcpQ==}
+    engines: {node: '>=6.10'}
+    dev: false
+
+  /tslib@2.8.1:
+    resolution: {integrity: sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==}
+    dev: false
+
+  /twoslash-protocol@0.2.12:
+    resolution: {integrity: sha512-5qZLXVYfZ9ABdjqbvPc4RWMr7PrpPaaDSeaYY55vl/w1j6H6kzsWK/urAEIXlzYlyrFmyz1UbwIt+AA0ck+wbg==}
+    dev: false
+
+  /twoslash@0.2.12(typescript@5.9.2):
+    resolution: {integrity: sha512-tEHPASMqi7kqwfJbkk7hc/4EhlrKCSLcur+TcvYki3vhIfaRMXnXjaYFgXpoZRbT6GdprD4tGuVBEmTpUgLBsw==}
+    peerDependencies:
+      typescript: '*'
+    dependencies:
+      '@typescript/vfs': 1.6.1(typescript@5.9.2)
+      twoslash-protocol: 0.2.12
+      typescript: 5.9.2
+    transitivePeerDependencies:
+      - supports-color
+    dev: false
+
+  /typescript@5.9.2:
+    resolution: {integrity: sha512-CWBzXQrc/qOkhidw1OzBTQuYRbfyxDXJMVJ1XNwUHGROVmuaeiEm3OslpZ1RV96d7SKKjZKrSJu3+t/xlw3R9A==}
+    engines: {node: '>=14.17'}
+    hasBin: true
+
+  /ua-parser-js@1.0.40:
+    resolution: {integrity: sha512-z6PJ8Lml+v3ichVojCiB8toQJBuwR42ySM4ezjXIqXK3M0HczmKQ3LF4rhU55PfD99KEEXQG6yb7iOMyvYuHew==}
+    hasBin: true
+    dev: false
+
+  /ufo@1.6.1:
+    resolution: {integrity: sha512-9a4/uxlTWJ4+a5i0ooc1rU7C7YOw3wT+UGqdeNNHWnOF9qcMBgLRS+4IYUqbczewFx4mLEig6gawh7X6mFlEkA==}
+    dev: false
+
+  /undici-types@7.10.0:
+    resolution: {integrity: sha512-t5Fy/nfn+14LuOc2KNYg75vZqClpAiqscVvMygNnlsHBFpSXdJaYtXMcdNLpl/Qvc3P2cB3s6lOV51nqsFq4ag==}
+
+  /unicorn-magic@0.3.0:
+    resolution: {integrity: sha512-+QBBXBCvifc56fsbuxZQ6Sic3wqqc3WWaqxs58gvJrcOuN83HGTCwz3oS5phzU9LthRNE9VrJCFCLUgHeeFnfA==}
+    engines: {node: '>=18'}
+    dev: false
+
+  /unified@10.1.2:
+    resolution: {integrity: sha512-pUSWAi/RAnVy1Pif2kAoeWNBa3JVrx0MId2LASj8G+7AiHWoKZNTomq6LG326T68U7/e263X6fTdcXIy7XnF7Q==}
+    dependencies:
+      '@types/unist': 2.0.11
+      bail: 2.0.2
+      extend: 3.0.2
+      is-buffer: 2.0.5
+      is-plain-obj: 4.1.0
+      trough: 2.2.0
+      vfile: 5.3.7
+    dev: false
+
+  /unified@11.0.5:
+    resolution: {integrity: sha512-xKvGhPWw3k84Qjh8bI3ZeJjqnyadK+GEFtazSfZv/rKeTkTjOJho6mFqh2SM96iIcZokxiOpg78GazTSg8+KHA==}
+    dependencies:
+      '@types/unist': 3.0.3
+      bail: 2.0.2
+      devlop: 1.1.0
+      extend: 3.0.2
+      is-plain-obj: 4.1.0
+      trough: 2.2.0
+      vfile: 6.0.3
+    dev: false
+
+  /unist-util-find-after@5.0.0:
+    resolution: {integrity: sha512-amQa0Ep2m6hE2g72AugUItjbuM8X8cGQnFoHk0pGfrFeT9GZhzN5SW8nRsiGKK7Aif4CrACPENkA6P/Lw6fHGQ==}
+    dependencies:
+      '@types/unist': 3.0.3
+      unist-util-is: 6.0.0
+    dev: false
+
+  /unist-util-is@5.2.1:
+    resolution: {integrity: sha512-u9njyyfEh43npf1M+yGKDGVPbY/JWEemg5nH05ncKPfi+kBbKBJoTdsogMu33uhytuLlv9y0O7GH7fEdwLdLQw==}
+    dependencies:
+      '@types/unist': 2.0.11
+    dev: false
+
+  /unist-util-is@6.0.0:
+    resolution: {integrity: sha512-2qCTHimwdxLfz+YzdGfkqNlH0tLi9xjTnHddPmJwtIG9MGsdbutfTc4P+haPD7l7Cjxf/WZj+we5qfVPvvxfYw==}
+    dependencies:
+      '@types/unist': 3.0.3
+    dev: false
+
+  /unist-util-mdx-define@1.1.2:
+    resolution: {integrity: sha512-9ncH7i7TN5Xn7/tzX5bE3rXgz1X/u877gYVAUB3mLeTKYJmQHmqKTDBi6BTGXV7AeolBCI9ErcVsOt2qryoD0g==}
+    dependencies:
+      '@types/estree': 1.0.8
+      '@types/hast': 3.0.4
+      '@types/mdast': 4.0.4
+      estree-util-is-identifier-name: 3.0.0
+      estree-util-scope: 1.0.0
+      estree-walker: 3.0.3
+      vfile: 6.0.3
+    dev: false
+
+  /unist-util-position-from-estree@2.0.0:
+    resolution: {integrity: sha512-KaFVRjoqLyF6YXCbVLNad/eS4+OfPQQn2yOd7zF/h5T/CSL2v8NpN6a5TPvtbXthAGw5nG+PuTtq+DdIZr+cRQ==}
+    dependencies:
+      '@types/unist': 3.0.3
+    dev: false
+
+  /unist-util-position@5.0.0:
+    resolution: {integrity: sha512-fucsC7HjXvkB5R3kTCO7kUjRdrS0BJt3M/FPxmHMBOm8JQi2BsHAHFsy27E0EolP8rp0NzXsJ+jNPyDWvOJZPA==}
+    dependencies:
+      '@types/unist': 3.0.3
+    dev: false
+
+  /unist-util-remove-position@5.0.0:
+    resolution: {integrity: sha512-Hp5Kh3wLxv0PHj9m2yZhhLt58KzPtEYKQQ4yxfYFEO7EvHwzyDYnduhHnY1mDxoqr7VUwVuHXk9RXKIiYS1N8Q==}
+    dependencies:
+      '@types/unist': 3.0.3
+      unist-util-visit: 5.0.0
+    dev: false
+
+  /unist-util-stringify-position@3.0.3:
+    resolution: {integrity: sha512-k5GzIBZ/QatR8N5X2y+drfpWG8IDBzdnVj6OInRNWm1oXrzydiaAT2OQiA8DPRRZyAKb9b6I2a6PxYklZD0gKg==}
+    dependencies:
+      '@types/unist': 2.0.11
+    dev: false
+
+  /unist-util-stringify-position@4.0.0:
+    resolution: {integrity: sha512-0ASV06AAoKCDkS2+xw5RXJywruurpbC4JZSm7nr7MOt1ojAzvyyaO+UxZf18j8FCF6kmzCZKcAgN/yu2gm2XgQ==}
+    dependencies:
+      '@types/unist': 3.0.3
+    dev: false
+
+  /unist-util-visit-parents@5.1.3:
+    resolution: {integrity: sha512-x6+y8g7wWMyQhL1iZfhIPhDAs7Xwbn9nRosDXl7qoPTSCy0yNxnKc+hWokFifWQIDGi154rdUqKvbCa4+1kLhg==}
+    dependencies:
+      '@types/unist': 2.0.11
+      unist-util-is: 5.2.1
+    dev: false
+
+  /unist-util-visit-parents@6.0.1:
+    resolution: {integrity: sha512-L/PqWzfTP9lzzEa6CKs0k2nARxTdZduw3zyh8d2NVBnsyvHjSX4TWse388YrrQKbvI8w20fGjGlhgT96WwKykw==}
+    dependencies:
+      '@types/unist': 3.0.3
+      unist-util-is: 6.0.0
+    dev: false
+
+  /unist-util-visit@4.1.2:
+    resolution: {integrity: sha512-MSd8OUGISqHdVvfY9TPhyK2VdUrPgxkUtWSuMHF6XAAFuL4LokseigBnZtPnJMu+FbynTkFNnFlyjxpVKujMRg==}
+    dependencies:
+      '@types/unist': 2.0.11
+      unist-util-is: 5.2.1
+      unist-util-visit-parents: 5.1.3
+    dev: false
+
+  /unist-util-visit@5.0.0:
+    resolution: {integrity: sha512-MR04uvD+07cwl/yhVuVWAtw+3GOR/knlL55Nd/wAdblk27GCVt3lqpTivy/tkJcZoNPzTwS1Y+KMojlLDhoTzg==}
+    dependencies:
+      '@types/unist': 3.0.3
+      unist-util-is: 6.0.0
+      unist-util-visit-parents: 6.0.1
+    dev: false
+
+  /universalify@2.0.1:
+    resolution: {integrity: sha512-gptHNQghINnc/vTGIk0SOFGFNXw7JVrlRUtConJRlvaw6DuX0wO5Jeko9sWrMBhh+PsYAZ7oXAiOnf/UKogyiw==}
+    engines: {node: '>= 10.0.0'}
+
+  /update-browserslist-db@1.1.3(browserslist@4.25.2):
+    resolution: {integrity: sha512-UxhIZQ+QInVdunkDAaiazvvT/+fXL5Osr0JZlJulepYu6Jd7qJtDZjlur0emRlT71EN3ScPoE7gvsuIKKNavKw==}
+    hasBin: true
+    peerDependencies:
+      browserslist: '>= 4.21.0'
+    dependencies:
+      browserslist: 4.25.2
+      escalade: 3.2.0
+      picocolors: 1.1.1
+    dev: false
+
+  /use-callback-ref@1.3.3(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-jQL3lRnocaFtu3V00JToYz/4QkNWswxijDaCVNZRiRTO3HQDLsdu1ZtmIUvV4yPp+rvWm5j0y0TG/S61cuijTg==}
+    engines: {node: '>=10'}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      react: 19.1.1
+      tslib: 2.8.1
+    dev: false
+
+  /use-sidecar@1.1.3(@types/react@19.1.10)(react@19.1.1):
+    resolution: {integrity: sha512-Fedw0aZvkhynoPYlA5WXrMCAMm+nSWdZt6lzJQ7Ok8S6Q+VsHmHpRWndVRJ8Be0ZbkfPc5LRYH+5XrzXcEeLRQ==}
+    engines: {node: '>=10'}
+    peerDependencies:
+      '@types/react': '*'
+      react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0 || ^19.0.0-rc
+    peerDependenciesMeta:
+      '@types/react':
+        optional: true
+    dependencies:
+      '@types/react': 19.1.10
+      detect-node-es: 1.1.0
+      react: 19.1.1
+      tslib: 2.8.1
+    dev: false
+
+  /use-sync-external-store@1.5.0(react@19.1.1):
+    resolution: {integrity: sha512-Rb46I4cGGVBmjamjphe8L/UnvJD+uPPtTkNvX5mZgqdbavhI4EbgIWJiIHXJ8bc/i9EQGPRh4DwEURJ552Do0A==}
+    peerDependencies:
+      react: ^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0
+    dependencies:
+      react: 19.1.1
+    dev: false
+
+  /util-deprecate@1.0.2:
+    resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==}
+    dev: false
+
+  /uuid@11.1.0:
+    resolution: {integrity: sha512-0/A9rDy9P7cJ+8w1c9WD9V//9Wj15Ce2MPz8Ri6032usz+NfePxx5AcN3bN+r6ZL6jEo066/yNYB3tn4pQEx+A==}
+    hasBin: true
+    dev: false
+
+  /vary@1.1.2:
+    resolution: {integrity: sha512-BNGbWLfd0eUPabhkXUVm0j8uuvREyTh5ovRa/dyow/BqAbZJyC+5fU+IzQOzmAKzYqYRAISoRhdQr3eIZ/PXqg==}
+    engines: {node: '>= 0.8'}
+    dev: false
+
+  /vfile-location@5.0.3:
+    resolution: {integrity: sha512-5yXvWDEgqeiYiBe1lbxYF7UMAIm/IcopxMHrMQDq3nvKcjPKIhZklUKL+AE7J7uApI4kwe2snsK+eI6UTj9EHg==}
+    dependencies:
+      '@types/unist': 3.0.3
+      vfile: 6.0.3
+    dev: false
+
+  /vfile-message@3.1.4:
+    resolution: {integrity: sha512-fa0Z6P8HUrQN4BZaX05SIVXic+7kE3b05PWAtPuYP9QLHsLKYR7/AlLW3NtOrpXRLeawpDLMsVkmk5DG0NXgWw==}
+    dependencies:
+      '@types/unist': 2.0.11
+      unist-util-stringify-position: 3.0.3
+    dev: false
+
+  /vfile-message@4.0.3:
+    resolution: {integrity: sha512-QTHzsGd1EhbZs4AsQ20JX1rC3cOlt/IWJruk893DfLRr57lcnOeMaWG4K0JrRta4mIJZKth2Au3mM3u03/JWKw==}
+    dependencies:
+      '@types/unist': 3.0.3
+      unist-util-stringify-position: 4.0.0
+    dev: false
+
+  /vfile@5.3.7:
+    resolution: {integrity: sha512-r7qlzkgErKjobAmyNIkkSpizsFPYiUPuJb5pNW1RB4JcYVZhs4lIbVqk8XPk033CV/1z8ss5pkax8SuhGpcG8g==}
+    dependencies:
+      '@types/unist': 2.0.11
+      is-buffer: 2.0.5
+      unist-util-stringify-position: 3.0.3
+      vfile-message: 3.1.4
+    dev: false
+
+  /vfile@6.0.3:
+    resolution: {integrity: sha512-KzIbH/9tXat2u30jf+smMwFCsno4wHVdNmzFyL+T/L3UGqqk6JKfVqOFOZEpZSHADH1k40ab6NUIXZq422ov3Q==}
+    dependencies:
+      '@types/unist': 3.0.3
+      vfile-message: 4.0.3
+    dev: false
+
+  /vite-node@3.2.4(@types/node@24.3.0):
+    resolution: {integrity: sha512-EbKSKh+bh1E1IFxeO0pg1n4dvoOTt0UDiXMd/qn++r98+jPO1xtJilvXldeuQ8giIB5IkpjCgMleHMNEsGH6pg==}
+    engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0}
+    hasBin: true
+    dependencies:
+      cac: 6.7.14
+      debug: 4.4.1
+      es-module-lexer: 1.7.0
+      pathe: 2.0.3
+      vite: 6.3.5(@types/node@24.3.0)
+    transitivePeerDependencies:
+      - '@types/node'
+      - jiti
+      - less
+      - lightningcss
+      - sass
+      - sass-embedded
+      - stylus
+      - sugarss
+      - supports-color
+      - terser
+      - tsx
+      - yaml
+    dev: false
+
+  /vite@6.3.5(@types/node@24.3.0):
+    resolution: {integrity: sha512-cZn6NDFE7wdTpINgs++ZJ4N49W2vRp8LCKrn3Ob1kYNtOo21vfDoaV5GzBfLU4MovSAB8uNRm4jgzVQZ+mBzPQ==}
+    engines: {node: ^18.0.0 || ^20.0.0 || >=22.0.0}
+    hasBin: true
+    peerDependencies:
+      '@types/node': ^18.0.0 || ^20.0.0 || >=22.0.0
+      jiti: '>=1.21.0'
+      less: '*'
+      lightningcss: ^1.21.0
+      sass: '*'
+      sass-embedded: '*'
+      stylus: '*'
+      sugarss: '*'
+      terser: ^5.16.0
+      tsx: ^4.8.1
+      yaml: ^2.4.2
+    peerDependenciesMeta:
+      '@types/node':
+        optional: true
+      jiti:
+        optional: true
+      less:
+        optional: true
+      lightningcss:
+        optional: true
+      sass:
+        optional: true
+      sass-embedded:
+        optional: true
+      stylus:
+        optional: true
+      sugarss:
+        optional: true
+      terser:
+        optional: true
+      tsx:
+        optional: true
+      yaml:
+        optional: true
+    dependencies:
+      '@types/node': 24.3.0
+      esbuild: 0.25.9
+      fdir: 6.5.0(picomatch@4.0.3)
+      picomatch: 4.0.3
+      postcss: 8.5.6
+      rollup: 4.46.2
+      tinyglobby: 0.2.14
+    optionalDependencies:
+      fsevents: 2.3.3
+    dev: false
+
+  /vocs@1.0.13(@types/node@24.3.0)(@types/react@19.1.10)(acorn@8.15.0)(react-dom@19.1.1)(react@19.1.1)(rollup@4.46.2)(typescript@5.9.2):
+    resolution: {integrity: sha512-V/ogXG5xw7jMFXI2Wv0d0ZdCeeT5jzaX0PKdRKcqhnd21UtLZrqa5pKZkStNIZyVpvfsLW0WB7wjB4iBOpueiw==}
+    hasBin: true
+    peerDependencies:
+      react: ^19
+      react-dom: ^19
+    dependencies:
+      '@floating-ui/react': 0.27.15(react-dom@19.1.1)(react@19.1.1)
+      '@hono/node-server': 1.18.2(hono@4.9.2)
+      '@mdx-js/react': 3.1.0(@types/react@19.1.10)(react@19.1.1)
+      '@mdx-js/rollup': 3.1.0(acorn@8.15.0)(rollup@4.46.2)
+      '@noble/hashes': 1.8.0
+      '@radix-ui/colors': 3.0.0
+      '@radix-ui/react-accordion': 1.2.12(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-dialog': 1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-icons': 1.3.2(react@19.1.1)
+      '@radix-ui/react-label': 2.1.7(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-navigation-menu': 1.2.14(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-popover': 1.1.15(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@radix-ui/react-tabs': 1.1.13(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      '@shikijs/rehype': 1.29.2
+      '@shikijs/transformers': 1.29.2
+      '@shikijs/twoslash': 1.29.2(typescript@5.9.2)
+      '@tailwindcss/vite': 4.0.7(vite@6.3.5)
+      '@vanilla-extract/css': 1.17.4
+      '@vanilla-extract/dynamic': 2.1.5
+      '@vanilla-extract/vite-plugin': 5.1.1(@types/node@24.3.0)(vite@6.3.5)
+      '@vitejs/plugin-react': 4.7.0(vite@6.3.5)
+      autoprefixer: 10.4.21(postcss@8.5.6)
+      cac: 6.7.14
+      chroma-js: 3.1.2
+      clsx: 2.1.1
+      compression: 1.8.1
+      create-vocs: 1.0.0
+      cross-spawn: 7.0.6
+      fs-extra: 11.3.1
+      globby: 14.1.0
+      hastscript: 8.0.0
+      hono: 4.9.2
+      mark.js: 8.11.1
+      mdast-util-directive: 3.1.0
+      mdast-util-from-markdown: 2.0.2
+      mdast-util-frontmatter: 2.0.1
+      mdast-util-gfm: 3.1.0
+      mdast-util-mdx: 3.0.0
+      mdast-util-mdx-jsx: 3.2.0
+      mdast-util-to-hast: 13.2.0
+      mdast-util-to-markdown: 2.1.2
+      minimatch: 9.0.5
+      minisearch: 6.3.0
+      ora: 7.0.1
+      p-limit: 5.0.0
+      playwright: 1.54.2
+      postcss: 8.5.6
+      radix-ui: 1.4.3(@types/react@19.1.10)(react-dom@19.1.1)(react@19.1.1)
+      react: 19.1.1
+      react-dom: 19.1.1(react@19.1.1)
+      react-intersection-observer: 9.16.0(react-dom@19.1.1)(react@19.1.1)
+      react-router: 7.8.1(react-dom@19.1.1)(react@19.1.1)
+      rehype-autolink-headings: 7.1.0
+      rehype-class-names: 2.0.0
+      rehype-mermaid: 3.0.0(playwright@1.54.2)
+      rehype-slug: 6.0.0
+      remark-directive: 3.0.1
+      remark-frontmatter: 5.0.0
+      remark-gfm: 4.0.1
+      remark-mdx: 3.1.0
+      remark-mdx-frontmatter: 5.2.0
+      remark-parse: 11.0.0
+      serve-static: 1.16.2
+      shiki: 1.29.2
+      toml: 3.0.0
+      twoslash: 0.2.12(typescript@5.9.2)
+      ua-parser-js: 1.0.40
+      unified: 11.0.5
+      unist-util-visit: 5.0.0
+      vite: 6.3.5(@types/node@24.3.0)
+    transitivePeerDependencies:
+      - '@types/node'
+      - '@types/react'
+      - '@types/react-dom'
+      - acorn
+      - babel-plugin-macros
+      - jiti
+      - less
+      - lightningcss
+      - rollup
+      - sass
+      - sass-embedded
+      - stylus
+      - sugarss
+      - supports-color
+      - terser
+      - tsx
+      - typescript
+      - yaml
+    dev: false
+
+  /vscode-jsonrpc@8.2.0:
+    resolution: {integrity: sha512-C+r0eKJUIfiDIfwJhria30+TYWPtuHJXHtI7J0YlOmKAo7ogxP20T0zxB7HZQIFhIyvoBPwWskjxrvAtfjyZfA==}
+    engines: {node: '>=14.0.0'}
+    dev: false
+
+  /vscode-languageserver-protocol@3.17.5:
+    resolution: {integrity: sha512-mb1bvRJN8SVznADSGWM9u/b07H7Ecg0I3OgXDuLdn307rl/J3A9YD6/eYOssqhecL27hK1IPZAsaqh00i/Jljg==}
+    dependencies:
+      vscode-jsonrpc: 8.2.0
+      vscode-languageserver-types: 3.17.5
+    dev: false
+
+  /vscode-languageserver-textdocument@1.0.12:
+    resolution: {integrity: sha512-cxWNPesCnQCcMPeenjKKsOCKQZ/L6Tv19DTRIGuLWe32lyzWhihGVJ/rcckZXJxfdKCFvRLS3fpBIsV/ZGX4zA==}
+    dev: false
+
+  /vscode-languageserver-types@3.17.5:
+    resolution: {integrity: sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==}
+    dev: false
+
+  /vscode-languageserver@9.0.1:
+    resolution: {integrity: sha512-woByF3PDpkHFUreUa7Hos7+pUWdeWMXRd26+ZX2A8cFx6v/JPTtd4/uN0/jB6XQHYaOlHbio03NTHCqrgG5n7g==}
+    hasBin: true
+    dependencies:
+      vscode-languageserver-protocol: 3.17.5
+    dev: false
+
+  /vscode-uri@3.0.8:
+    resolution: {integrity: sha512-AyFQ0EVmsOZOlAnxoFOGOq1SQDWAB7C6aqMGS23svWAllfOaxbuFvcT8D1i8z3Gyn8fraVeZNNmN6e9bxxXkKw==}
+    dev: false
+
+  /web-namespaces@2.0.1:
+    resolution: {integrity: sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ==}
+    dev: false
+
+  /which@2.0.2:
+    resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==}
+    engines: {node: '>= 8'}
+    hasBin: true
+    dependencies:
+      isexe: 2.0.0
+
+  /wrap-ansi@7.0.0:
+    resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
+    engines: {node: '>=10'}
+    dependencies:
+      ansi-styles: 4.3.0
+      string-width: 4.2.3
+      strip-ansi: 6.0.1
+    dev: true
+
+  /wrap-ansi@8.1.0:
+    resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==}
+    engines: {node: '>=12'}
+    dependencies:
+      ansi-styles: 6.2.1
+      string-width: 5.1.2
+      strip-ansi: 7.1.0
+    dev: true
+
+  /wrappy@1.0.2:
+    resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==}
+    dev: true
+
+  /yallist@3.1.1:
+    resolution: {integrity: sha512-a4UGQaWPH59mOXUYnAG2ewncQS4i4F43Tv3JoAM+s2VDAmS9NsK8GpDMLrCHPksFT7h3K6TOoUNn2pb7RoXx4g==}
+    dev: false
+
+  /yaml@2.8.1:
+    resolution: {integrity: sha512-lcYcMxX2PO9XMGvAJkJ3OsNMw+/7FKes7/hgerGUYWIoWu5j/+YQqcZr5JnPZWzOsEBgMbSbiSTn/dv/69Mkpw==}
+    engines: {node: '>= 14.6'}
+    hasBin: true
+
+  /yocto-queue@0.1.0:
+    resolution: {integrity: sha512-rVksvsnNCdJ/ohGc6xgPwyN8eheCxsiLM8mxuE/t/mOVqJewPuO1miLpTHQiRgTKCLexL4MeAFVagts7HmNZ2Q==}
+    engines: {node: '>=10'}
+    dev: false
+
+  /yocto-queue@1.2.1:
+    resolution: {integrity: sha512-AyeEbWOu/TAXdxlV9wmGcR0+yh2j3vYPGOECcIj2S7MkrLyC7ne+oye2BKTItt0ii2PHk4cDy+95+LshzbXnGg==}
+    engines: {node: '>=12.20'}
+    dev: false
+
+  /zwitch@2.0.4:
+    resolution: {integrity: sha512-bXE4cR/kVZhKZX/RjPEflHaKVhUVl85noU3v6b8apfQEc1x4A+zBxjZ4lN8LqGd6WZ3dl98pY4o717VFmoPp+A==}
+    dev: false
diff --git a/docs/vocs/sidebar.ts b/docs/vocs/sidebar.ts
new file mode 100644
index 0000000000..770ff84d1a
--- /dev/null
+++ b/docs/vocs/sidebar.ts
@@ -0,0 +1,182 @@
+import { SidebarItem } from "vocs";
+
+export const specsSidebar: SidebarItem[] = [
+    {
+        text: "OpenVM Design",
+        items: [
+            {
+                text: "Overview",
+                link: "/specs/openvm/overview"
+            },
+            {
+                text: "Modular ISA Design",
+                link: "/specs/openvm/isa"
+            },
+        ]
+    },
+    {
+        text: "VM Architecture",
+        items: [
+            {
+                text: "Circuit Architecture",
+                link: "/specs/architecture/circuit-architecture"
+            },
+            {
+                text: "Memory Design",
+                link: "/specs/architecture/memory"
+            },
+            {
+                text: "Continuations Design",
+                link: "/specs/architecture/continuations"
+            }
+        ]
+    },
+    {
+        text: "OpenVM Reference",
+        items: [
+            {
+                text: "Instruction Reference",
+                link: "/specs/reference/instruction-reference"
+            },
+            {
+                text: "RISC-V Custom Code",
+                link: "/specs/reference/riscv-custom-code"
+            },
+            {
+                text: "RISC-V Transpiler",
+                link: "/specs/reference/transpiler"
+            }
+        ]
+    }
+]
+
+export const bookSidebar: SidebarItem[] = [
+    {
+        text: "Introduction",
+        link: "/book/getting-started/introduction"
+    },
+    {
+        text: "Install",
+        link: "/book/getting-started/install"
+    },
+    {
+        text: "Quickstart",
+        link: "/book/getting-started/quickstart"
+    },
+    {
+        text: "Writing Apps",
+        items: [
+            {
+                text: "Overview",
+                link: "/book/writing-apps/overview"
+            },
+            {
+                text: "Writing a Program",
+                link: "/book/writing-apps/writing-a-program"
+            },
+            {
+                text: "Compiling",
+                link: "/book/writing-apps/compiling"
+            },
+            {
+                text: "Running a Program",
+                link: "/book/writing-apps/running-a-program"
+            },
+            {
+                text: "Generating Proofs",
+                link: "/book/writing-apps/generating-proofs"
+            },
+            {
+                text: "Verifying Proofs",
+                link: "/book/writing-apps/verifying-proofs"
+            },
+            {
+                text: "Solidity SDK",
+                link: "/book/writing-apps/solidity-sdk"
+            }
+        ]
+    },
+    {
+        text: "Acceleration Using Extensions",
+        items: [
+            {
+                text: "Overview",
+                link: "/book/acceleration-using-extensions/overview"
+            },
+            {
+                text: "Keccak",
+                link: "/book/acceleration-using-extensions/keccak"
+            },
+            {
+                text: "SHA-256",
+                link: "/book/acceleration-using-extensions/sha-256"
+            },
+            {
+                text: "Big Integer",
+                link: "/book/acceleration-using-extensions/big-integer"
+            },
+            {
+                text: "Algebra (Modular Arithmetic)",
+                link: "/book/acceleration-using-extensions/algebra"
+            },
+            {
+                text: "Elliptic Curve Cryptography",
+                link: "/book/acceleration-using-extensions/elliptic-curve-cryptography"
+            },
+            {
+                text: "Elliptic Curve Pairing",
+                link: "/book/acceleration-using-extensions/elliptic-curve-pairing"
+            }
+        ]
+    },
+    {
+        text: "Guest Libraries",
+        items: [
+            {
+                text: "Keccak256",
+                link: "/book/guest-libraries/keccak256"
+            },
+            {
+                text: "SHA2",
+                link: "/book/guest-libraries/sha2"
+            },
+            {
+                text: "Ruint",
+                link: "/book/guest-libraries/ruint"
+            },
+            {
+                text: "K256",
+                link: "/book/guest-libraries/k256"
+            },
+            {
+                text: "P256",
+                link: "/book/guest-libraries/p256"
+            },
+            {
+                text: "Pairing",
+                link: "/book/guest-libraries/pairing"
+            },
+            {
+                text: "Verify STARK",
+                link: "/book/guest-libraries/verify-stark"
+            }
+        ]
+    },
+    {
+        text: "Advanced Usage",
+        items: [
+            {
+                text: "SDK",
+                link: "/book/advanced-usage/sdk"
+            },
+            {
+                text: "Creating a New Extension",
+                link: "/book/advanced-usage/creating-a-new-extension"
+            },
+            {
+                text: "Recursive Verification",
+                link: "/book/advanced-usage/recursive-verification"
+            }
+        ]
+    },
+]
\ No newline at end of file
diff --git a/docs/vocs/vocs.config.ts b/docs/vocs/vocs.config.ts
new file mode 100644
index 0000000000..50b87e4b7c
--- /dev/null
+++ b/docs/vocs/vocs.config.ts
@@ -0,0 +1,94 @@
+import React from 'react'
+import { defineConfig } from 'vocs'
+import remarkMath from 'remark-math'
+import rehypeKatex from 'rehype-katex'
+import remarkMdxDisableExplicitJsx from 'remark-mdx-disable-explicit-jsx'
+
+import { bookSidebar, specsSidebar } from './sidebar'
+
+export default defineConfig({
+  title: 'OpenVM',
+  logoUrl: '/OpenVM-horizontal.svg',
+  iconUrl: '/OpenVM-favicon.svg',
+  ogImageUrl: '/OpenVM-horizontal.svg',
+  sidebar: {
+    '/book/': bookSidebar,
+    '/specs/': specsSidebar
+  },
+  basePath: '/',
+  topNav: [
+    { text: 'Book', link: '/book/getting-started/introduction' },
+    { text: 'Specs', link: '/specs/openvm/overview' },
+    {
+      text: 'Rustdocs',
+      link: 'https://openvm.dev/docs'
+    },
+    { text: 'GitHub', link: 'https://github.com/openvm-org/openvm' },
+    {
+      text: 'v1.4.0',
+      items: [
+        {
+          text: 'Releases',
+          link: 'https://github.com/openvm-org/openvm/releases'
+        },
+      ]
+    }
+  ],
+  socials: [
+    {
+      icon: 'github',
+      link: 'https://github.com/openvm-org/openvm',
+    },
+    {
+      icon: 'telegram',
+      link: 'https://t.me/openvm',
+    },
+  ],
+  sponsors: [
+    {
+      name: 'Collaborators',
+      height: 120,
+      items: [
+        [
+          {
+            name: 'Axiom',
+            link: 'https://axiom.xyz',
+            image: '',
+          },
+        ]
+      ]
+    }
+  ],
+  markdown: {
+    remarkPlugins: [[remarkMath, { singleDollarTextMath: true }]],
+    rehypePlugins: [[rehypeKatex, {
+        // Strict mode can help with parsing
+        strict: false,
+        // Trust all LaTeX commands
+        trust: true
+      }]],
+  },
+  vite: {  
+    plugins: [  
+      {  
+        name: 'exclude-llms',  
+        configResolved(config) {  
+          // Cast to mutable to modify the readonly plugins array  
+          const mutableConfig = config as any  
+          mutableConfig.plugins = config.plugins.filter(  
+            plugin => plugin && plugin.name !== 'llms'  
+          )  
+        }  
+      }  
+    ],
+  },
+  theme: {
+    accentColor: {
+      light: '#1f1f1f',
+      dark: '#ffffff',
+    }
+  },
+  editLink: {
+    pattern: "https://github.com/openvm-org/openvm/edit/main/docs/vocs/docs/pages/:path",
+  }
+})
\ No newline at end of file
diff --git a/examples/ecc/src/main.rs b/examples/ecc/src/main.rs
index 5114b9968f..b81d89dfa5 100644
--- a/examples/ecc/src/main.rs
+++ b/examples/ecc/src/main.rs
@@ -1,11 +1,11 @@
-// ANCHOR: imports
+// [!region imports]
 use hex_literal::hex;
 use openvm_algebra_guest::IntMod;
 use openvm_ecc_guest::weierstrass::WeierstrassPoint;
 use openvm_k256::{Secp256k1Coord, Secp256k1Point};
-// ANCHOR_END: imports
+// [!endregion imports]
 
-// ANCHOR: init
+// [!region init]
 openvm::init!();
 /* The init! macro will expand to the following
 openvm_algebra_guest::moduli_macros::moduli_init! {
@@ -15,9 +15,9 @@ openvm_algebra_guest::moduli_macros::moduli_init! {
 
 openvm_ecc_guest::sw_macros::sw_init! { "Secp256k1Point" }
 */
-// ANCHOR_END: init
+// [!endregion init]
 
-// ANCHOR: main
+// [!region main]
 pub fn main() {
     let x1 = Secp256k1Coord::from_u32(1);
     let y1 = Secp256k1Coord::from_le_bytes_unchecked(&hex!(
@@ -34,4 +34,4 @@ pub fn main() {
     #[allow(clippy::op_ref)]
     let _p3 = &p1 + &p2;
 }
-// ANCHOR_END: main
+// [!endregion main]
diff --git a/examples/i256/src/main.rs b/examples/i256/src/main.rs
index 8f008f40a0..ec911bc1cd 100644
--- a/examples/i256/src/main.rs
+++ b/examples/i256/src/main.rs
@@ -1,4 +1,6 @@
 #![allow(clippy::needless_range_loop)]
+openvm::entry!(main);
+
 use core::array;
 
 use alloy_primitives::I256;
diff --git a/examples/keccak/src/main.rs b/examples/keccak/src/main.rs
index 7b98d36ed1..7d1261d6aa 100644
--- a/examples/keccak/src/main.rs
+++ b/examples/keccak/src/main.rs
@@ -1,11 +1,13 @@
-// ANCHOR: imports
+openvm::entry!(main);
+
+// [!region imports]
 use core::hint::black_box;
 
 use hex::FromHex;
 use openvm_keccak256::keccak256;
-// ANCHOR_END: imports
+// [!endregion imports]
 
-// ANCHOR: main
+// [!region main]
 openvm::entry!(main);
 
 pub fn main() {
@@ -28,4 +30,4 @@ pub fn main() {
         }
     }
 }
-// ANCHOR_END: main
+// [!endregion main]
diff --git a/examples/pairing/src/main.rs b/examples/pairing/src/main.rs
index 681527ca99..2fbb06aaf5 100644
--- a/examples/pairing/src/main.rs
+++ b/examples/pairing/src/main.rs
@@ -1,16 +1,16 @@
-// ANCHOR: pre
+// [!region pre]
 use hex_literal::hex;
-// ANCHOR_END: pre
-// ANCHOR: imports
+// [!endregion pre]
+// [!region imports]
 use openvm_algebra_guest::{field::FieldExtension, IntMod};
 use openvm_ecc_guest::AffinePoint;
 use openvm_pairing::{
     bls12_381::{Bls12_381, Fp, Fp2},
     PairingCheck,
 };
-// ANCHOR_END: imports
+// [!endregion imports]
 
-// ANCHOR: init
+// [!region init]
 openvm::init!();
 /* The init! macro will expand to the following
 openvm_algebra_moduli_macros::moduli_init! {
@@ -22,9 +22,9 @@ openvm_algebra_complex_macros::complex_init! {
     "Bls12_381Fp2" { mod_idx = 0 },
 }
 */
-// ANCHOR_END: init
+// [!endregion init]
 
-// ANCHOR: main
+// [!region main]
 pub fn main() {
     let p0 = AffinePoint::new(
         Fp::from_be_bytes_unchecked(&hex!("17f1d3a73197d7942695638c4fa9ac0fc3688c4f9774b905a14e3a3f171bac586c55e83ff97a1aeffb3af00adb22c6bb")),
@@ -55,9 +55,9 @@ pub fn main() {
         ]),
     );
 
-    // ANCHOR: pairing_check
+    // [!region pairing_check]
     let res = Bls12_381::pairing_check(&[p0, -q0], &[p1, q1]);
     assert!(res.is_ok());
-    // ANCHOR_END: pairing_check
+    // [!endregion pairing_check]
 }
-// ANCHOR_END: main
+// [!endregion main]
diff --git a/examples/sha256/src/main.rs b/examples/sha256/src/main.rs
index a6195390a4..64f203fd74 100644
--- a/examples/sha256/src/main.rs
+++ b/examples/sha256/src/main.rs
@@ -1,11 +1,13 @@
-// ANCHOR: imports
+openvm::entry!(main);
+
+// [!region imports]
 use core::hint::black_box;
 
 use hex::FromHex;
 use openvm_sha2::sha256;
-// ANCHOR_END: imports
+// [!endregion imports]
 
-// ANCHOR: main
+// [!region main]
 openvm::entry!(main);
 
 pub fn main() {
@@ -22,4 +24,4 @@ pub fn main() {
         }
     }
 }
-// ANCHOR_END: main
+// [!endregion main]
diff --git a/examples/u256/src/main.rs b/examples/u256/src/main.rs
index 75b80afd3d..05319a2a17 100644
--- a/examples/u256/src/main.rs
+++ b/examples/u256/src/main.rs
@@ -1,4 +1,6 @@
 #![allow(clippy::needless_range_loop)]
+openvm::entry!(main);
+
 use core::array;
 
 use openvm_ruint::aliases::U256;
diff --git a/extensions/algebra/circuit/Cargo.toml b/extensions/algebra/circuit/Cargo.toml
index 258bff450b..7d0eb389e6 100644
--- a/extensions/algebra/circuit/Cargo.toml
+++ b/extensions/algebra/circuit/Cargo.toml
@@ -20,21 +20,23 @@ openvm-rv32im-circuit = { workspace = true }
 openvm-rv32-adapters = { workspace = true }
 openvm-algebra-transpiler = { workspace = true }
 
-itertools = { workspace = true }
+halo2curves-axiom = { workspace = true }
 num-bigint = { workspace = true, features = ["serde"] }
 num-traits = { workspace = true }
 rand = { workspace = true }
-derive_more = { workspace = true, features = ["from"] }
+derive_more = { workspace = true, features = ["from", "deref", "deref_mut"] }
 strum = { workspace = true }
 derive-new = { workspace = true }
 serde.workspace = true
 serde_with = { workspace = true }
-serde-big-array = { workspace = true }
 eyre = { workspace = true }
 
 [dev-dependencies]
-halo2curves-axiom = { workspace = true }
 openvm-mod-circuit-builder = { workspace = true, features = ["test-utils"] }
 openvm-circuit = { workspace = true, features = ["test-utils"] }
 openvm-rv32-adapters = { workspace = true, features = ["test-utils"] }
 openvm-pairing-guest = { workspace = true, features = ["halo2curves"] }
+test-case = { workspace = true }
+
+[package.metadata.cargo-shear]
+ignored = ["derive_more"]
diff --git a/extensions/algebra/circuit/src/config.rs b/extensions/algebra/circuit/src/config.rs
index 5b43163b77..2210f54c17 100644
--- a/extensions/algebra/circuit/src/config.rs
+++ b/extensions/algebra/circuit/src/config.rs
@@ -1,15 +1,33 @@
+use std::result::Result;
+
 use num_bigint::BigUint;
-use openvm_circuit::arch::{InitFileGenerator, SystemConfig};
+use openvm_circuit::{
+    arch::{
+        AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, SystemConfig,
+        VmBuilder, VmChipComplex, VmProverExtension,
+    },
+    system::{SystemChipInventory, SystemCpuBuilder, SystemExecutor},
+};
 use openvm_circuit_derive::VmConfig;
-use openvm_rv32im_circuit::*;
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_rv32im_circuit::{
+    Rv32I, Rv32IExecutor, Rv32ImCpuProverExt, Rv32Io, Rv32IoExecutor, Rv32M, Rv32MExecutor,
+};
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use openvm_stark_sdk::engine::StarkEngine;
 use serde::{Deserialize, Serialize};
 
-use super::*;
+use crate::{
+    AlgebraCpuProverExt, Fp2Extension, Fp2ExtensionExecutor, ModularExtension,
+    ModularExtensionExecutor,
+};
 
 #[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
 pub struct Rv32ModularConfig {
-    #[system]
+    #[config(executor = "SystemExecutor<F>")]
     pub system: SystemConfig,
     #[extension]
     pub base: Rv32I,
@@ -33,7 +51,7 @@ impl InitFileGenerator for Rv32ModularConfig {
 impl Rv32ModularConfig {
     pub fn new(moduli: Vec<BigUint>) -> Self {
         Self {
-            system: SystemConfig::default().with_continuations(),
+            system: SystemConfig::default(),
             base: Default::default(),
             mul: Default::default(),
             io: Default::default(),
@@ -44,16 +62,8 @@ impl Rv32ModularConfig {
 
 #[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
 pub struct Rv32ModularWithFp2Config {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub base: Rv32I,
-    #[extension]
-    pub mul: Rv32M,
-    #[extension]
-    pub io: Rv32Io,
-    #[extension]
-    pub modular: ModularExtension,
+    #[config(generics = true)]
+    pub modular: Rv32ModularConfig,
     #[extension]
     pub fp2: Fp2Extension,
 }
@@ -65,11 +75,7 @@ impl Rv32ModularWithFp2Config {
             .map(|(_, modulus)| modulus.clone())
             .collect();
         Self {
-            system: SystemConfig::default().with_continuations(),
-            base: Default::default(),
-            mul: Default::default(),
-            io: Default::default(),
-            modular: ModularExtension::new(moduli),
+            modular: Rv32ModularConfig::new(moduli),
             fp2: Fp2Extension::new(moduli_with_names),
         }
     }
@@ -79,8 +85,73 @@ impl InitFileGenerator for Rv32ModularWithFp2Config {
     fn generate_init_file_contents(&self) -> Option<String> {
         Some(format!(
             "// This file is automatically generated by cargo openvm. Do not rename or edit.\n{}\n{}\n",
-            self.modular.generate_moduli_init(),
-            self.fp2.generate_complex_init(&self.modular)
+            self.modular.modular.generate_moduli_init(),
+            self.fp2.generate_complex_init(&self.modular.modular)
         ))
     }
 }
+
+#[derive(Clone)]
+pub struct Rv32ModularCpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Rv32ModularCpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Rv32ModularConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Rv32ModularConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.base, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.mul, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.io, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &AlgebraCpuProverExt,
+            &config.modular,
+            inventory,
+        )?;
+        Ok(chip_complex)
+    }
+}
+
+#[derive(Clone)]
+pub struct Rv32ModularWithFp2CpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Rv32ModularWithFp2CpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Rv32ModularWithFp2Config;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Rv32ModularWithFp2Config,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&Rv32ModularCpuBuilder, &config.modular, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&AlgebraCpuProverExt, &config.fp2, inventory)?;
+        Ok(chip_complex)
+    }
+}
diff --git a/extensions/algebra/circuit/src/execution.rs b/extensions/algebra/circuit/src/execution.rs
new file mode 100644
index 0000000000..a99c4ba37b
--- /dev/null
+++ b/extensions/algebra/circuit/src/execution.rs
@@ -0,0 +1,597 @@
+use std::{
+    array::from_fn,
+    borrow::{Borrow, BorrowMut},
+};
+
+use num_bigint::BigUint;
+use openvm_algebra_transpiler::{Fp2Opcode, Rv32ModularArithmeticOpcode};
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::GuestMemory, POINTER_MAX_BITS},
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+};
+use openvm_mod_circuit_builder::{run_field_expression_precomputed, FieldExpr};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::FieldExprVecHeapExecutor;
+use crate::fields::{
+    field_operation, fp2_operation, get_field_type, get_fp2_field_type, FieldType, Operation,
+};
+
+macro_rules! generate_field_dispatch {
+    (
+        $field_type:expr,
+        $op:expr,
+        $blocks:expr,
+        $block_size:expr,
+        $execute_fn:ident,
+        [$(($curve:ident, $operation:ident)),* $(,)?]
+    ) => {
+        match ($field_type, $op) {
+            $(
+                (FieldType::$curve, Operation::$operation) => Ok($execute_fn::<
+                    _,
+                    _,
+                    $blocks,
+                    $block_size,
+                    false,
+                    { FieldType::$curve as u8 },
+                    { Operation::$operation as u8 },
+                >),
+            )*
+        }
+    };
+}
+
+macro_rules! generate_fp2_dispatch {
+    (
+        $field_type:expr,
+        $op:expr,
+        $blocks:expr,
+        $block_size:expr,
+        $execute_fn:ident,
+        [$(($curve:ident, $operation:ident)),* $(,)?]
+    ) => {
+        match ($field_type, $op) {
+            $(
+                (FieldType::$curve, Operation::$operation) => Ok($execute_fn::<
+                    _,
+                    _,
+                    $blocks,
+                    $block_size,
+                    true,
+                    { FieldType::$curve as u8 },
+                    { Operation::$operation as u8 },
+                >),
+            )*
+            _ => panic!("Unsupported fp2 field")
+        }
+    };
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct FieldExpressionPreCompute<'a> {
+    expr: &'a FieldExpr,
+    rs_addrs: [u8; 2],
+    a: u8,
+    flag_idx: u8,
+}
+
+impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize, const IS_FP2: bool>
+    FieldExprVecHeapExecutor<BLOCKS, BLOCK_SIZE, IS_FP2>
+{
+    fn pre_compute_impl<F: PrimeField32>(
+        &'a self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut FieldExpressionPreCompute<'a>,
+    ) -> Result<Option<Operation>, StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        let c = c.as_canonical_u32();
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+        if d != RV32_REGISTER_AS || e != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let local_opcode = opcode.local_opcode_idx(self.0.offset);
+
+        let needs_setup = self.0.expr.needs_setup();
+        let mut flag_idx = self.0.expr.num_flags() as u8;
+        if needs_setup {
+            if let Some(opcode_position) = self
+                .0
+                .local_opcode_idx
+                .iter()
+                .position(|&idx| idx == local_opcode)
+            {
+                if opcode_position < self.0.opcode_flag_idx.len() {
+                    flag_idx = self.0.opcode_flag_idx[opcode_position] as u8;
+                }
+            }
+        }
+
+        let rs_addrs = from_fn(|i| if i == 0 { b } else { c } as u8);
+        *data = FieldExpressionPreCompute {
+            a: a as u8,
+            rs_addrs,
+            expr: &self.0.expr,
+            flag_idx,
+        };
+
+        if IS_FP2 {
+            let is_setup = local_opcode == Fp2Opcode::SETUP_ADDSUB as usize
+                || local_opcode == Fp2Opcode::SETUP_MULDIV as usize;
+
+            let op = if is_setup {
+                None
+            } else {
+                match local_opcode {
+                    x if x == Fp2Opcode::ADD as usize => Some(Operation::Add),
+                    x if x == Fp2Opcode::SUB as usize => Some(Operation::Sub),
+                    x if x == Fp2Opcode::MUL as usize => Some(Operation::Mul),
+                    x if x == Fp2Opcode::DIV as usize => Some(Operation::Div),
+                    _ => unreachable!(),
+                }
+            };
+
+            Ok(op)
+        } else {
+            let is_setup = local_opcode == Rv32ModularArithmeticOpcode::SETUP_ADDSUB as usize
+                || local_opcode == Rv32ModularArithmeticOpcode::SETUP_MULDIV as usize;
+
+            let op = if is_setup {
+                None
+            } else {
+                match local_opcode {
+                    x if x == Rv32ModularArithmeticOpcode::ADD as usize => Some(Operation::Add),
+                    x if x == Rv32ModularArithmeticOpcode::SUB as usize => Some(Operation::Sub),
+                    x if x == Rv32ModularArithmeticOpcode::MUL as usize => Some(Operation::Mul),
+                    x if x == Rv32ModularArithmeticOpcode::DIV as usize => Some(Operation::Div),
+                    _ => unreachable!(),
+                }
+            };
+
+            Ok(op)
+        }
+    }
+}
+
+impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize, const IS_FP2: bool> Executor<F>
+    for FieldExprVecHeapExecutor<BLOCKS, BLOCK_SIZE, IS_FP2>
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        std::mem::size_of::<FieldExpressionPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let pre_compute: &mut FieldExpressionPreCompute = data.borrow_mut();
+
+        let op = self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        if let Some(op) = op {
+            let modulus = &pre_compute.expr.prime;
+            if IS_FP2 {
+                if let Some(field_type) = get_fp2_field_type(modulus) {
+                    generate_fp2_dispatch!(
+                        field_type,
+                        op,
+                        BLOCKS,
+                        BLOCK_SIZE,
+                        execute_e1_impl,
+                        [
+                            (BN254Coordinate, Add),
+                            (BN254Coordinate, Sub),
+                            (BN254Coordinate, Mul),
+                            (BN254Coordinate, Div),
+                            (BLS12_381Coordinate, Add),
+                            (BLS12_381Coordinate, Sub),
+                            (BLS12_381Coordinate, Mul),
+                            (BLS12_381Coordinate, Div),
+                        ]
+                    )
+                } else {
+                    Ok(execute_e1_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>)
+                }
+            } else if let Some(field_type) = get_field_type(modulus) {
+                generate_field_dispatch!(
+                    field_type,
+                    op,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    execute_e1_impl,
+                    [
+                        (K256Coordinate, Add),
+                        (K256Coordinate, Sub),
+                        (K256Coordinate, Mul),
+                        (K256Coordinate, Div),
+                        (K256Scalar, Add),
+                        (K256Scalar, Sub),
+                        (K256Scalar, Mul),
+                        (K256Scalar, Div),
+                        (P256Coordinate, Add),
+                        (P256Coordinate, Sub),
+                        (P256Coordinate, Mul),
+                        (P256Coordinate, Div),
+                        (P256Scalar, Add),
+                        (P256Scalar, Sub),
+                        (P256Scalar, Mul),
+                        (P256Scalar, Div),
+                        (BN254Coordinate, Add),
+                        (BN254Coordinate, Sub),
+                        (BN254Coordinate, Mul),
+                        (BN254Coordinate, Div),
+                        (BN254Scalar, Add),
+                        (BN254Scalar, Sub),
+                        (BN254Scalar, Mul),
+                        (BN254Scalar, Div),
+                        (BLS12_381Coordinate, Add),
+                        (BLS12_381Coordinate, Sub),
+                        (BLS12_381Coordinate, Mul),
+                        (BLS12_381Coordinate, Div),
+                        (BLS12_381Scalar, Add),
+                        (BLS12_381Scalar, Sub),
+                        (BLS12_381Scalar, Mul),
+                        (BLS12_381Scalar, Div),
+                    ]
+                )
+            } else {
+                Ok(execute_e1_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>)
+            }
+        } else {
+            Ok(execute_e1_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>)
+        }
+    }
+}
+
+impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize, const IS_FP2: bool>
+    MeteredExecutor<F> for FieldExprVecHeapExecutor<BLOCKS, BLOCK_SIZE, IS_FP2>
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        std::mem::size_of::<E2PreCompute<FieldExpressionPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<FieldExpressionPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        let op = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        if let Some(op) = op {
+            let modulus = &pre_compute.data.expr.prime;
+            if IS_FP2 {
+                if let Some(field_type) = get_fp2_field_type(modulus) {
+                    generate_fp2_dispatch!(
+                        field_type,
+                        op,
+                        BLOCKS,
+                        BLOCK_SIZE,
+                        execute_e2_impl,
+                        [
+                            (BN254Coordinate, Add),
+                            (BN254Coordinate, Sub),
+                            (BN254Coordinate, Mul),
+                            (BN254Coordinate, Div),
+                            (BLS12_381Coordinate, Add),
+                            (BLS12_381Coordinate, Sub),
+                            (BLS12_381Coordinate, Mul),
+                            (BLS12_381Coordinate, Div),
+                        ]
+                    )
+                } else {
+                    Ok(execute_e2_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>)
+                }
+            } else if let Some(field_type) = get_field_type(modulus) {
+                generate_field_dispatch!(
+                    field_type,
+                    op,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    execute_e2_impl,
+                    [
+                        (K256Coordinate, Add),
+                        (K256Coordinate, Sub),
+                        (K256Coordinate, Mul),
+                        (K256Coordinate, Div),
+                        (K256Scalar, Add),
+                        (K256Scalar, Sub),
+                        (K256Scalar, Mul),
+                        (K256Scalar, Div),
+                        (P256Coordinate, Add),
+                        (P256Coordinate, Sub),
+                        (P256Coordinate, Mul),
+                        (P256Coordinate, Div),
+                        (P256Scalar, Add),
+                        (P256Scalar, Sub),
+                        (P256Scalar, Mul),
+                        (P256Scalar, Div),
+                        (BN254Coordinate, Add),
+                        (BN254Coordinate, Sub),
+                        (BN254Coordinate, Mul),
+                        (BN254Coordinate, Div),
+                        (BN254Scalar, Add),
+                        (BN254Scalar, Sub),
+                        (BN254Scalar, Mul),
+                        (BN254Scalar, Div),
+                        (BLS12_381Coordinate, Add),
+                        (BLS12_381Coordinate, Sub),
+                        (BLS12_381Coordinate, Mul),
+                        (BLS12_381Coordinate, Div),
+                        (BLS12_381Scalar, Add),
+                        (BLS12_381Scalar, Sub),
+                        (BLS12_381Scalar, Mul),
+                        (BLS12_381Scalar, Div),
+                    ]
+                )
+            } else {
+                Ok(execute_e2_generic_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>)
+            }
+        } else {
+            Ok(execute_e2_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>)
+        }
+    }
+}
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+    const FIELD_TYPE: u8,
+    const OP: u8,
+>(
+    pre_compute: &FieldExpressionPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs_vals = pre_compute
+        .rs_addrs
+        .map(|addr| u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, addr as u32)));
+
+    let read_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2] = rs_vals.map(|address| {
+        debug_assert!(address as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+        from_fn(|i| vm_state.vm_read(RV32_MEMORY_AS, address + (i * BLOCK_SIZE) as u32))
+    });
+
+    let output_data = if IS_FP2 {
+        fp2_operation::<FIELD_TYPE, BLOCKS, BLOCK_SIZE, OP>(read_data)
+    } else {
+        field_operation::<FIELD_TYPE, BLOCKS, BLOCK_SIZE, OP>(read_data)
+    };
+
+    let rd_val = u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, pre_compute.a as u32));
+    debug_assert!(rd_val as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+
+    for (i, block) in output_data.into_iter().enumerate() {
+        vm_state.vm_write(RV32_MEMORY_AS, rd_val + (i * BLOCK_SIZE) as u32, &block);
+    }
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e12_generic_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+>(
+    pre_compute: &FieldExpressionPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs_vals = pre_compute
+        .rs_addrs
+        .map(|addr| u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, addr as u32)));
+
+    let read_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2] = rs_vals.map(|address| {
+        debug_assert!(address as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+        from_fn(|i| vm_state.vm_read(RV32_MEMORY_AS, address + (i * BLOCK_SIZE) as u32))
+    });
+    let read_data_dyn: DynArray<u8> = read_data.into();
+
+    let writes = run_field_expression_precomputed::<true>(
+        pre_compute.expr,
+        pre_compute.flag_idx as usize,
+        &read_data_dyn.0,
+    );
+
+    let rd_val = u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, pre_compute.a as u32));
+    debug_assert!(rd_val as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+
+    let data: [[u8; BLOCK_SIZE]; BLOCKS] = writes.into();
+    for (i, block) in data.into_iter().enumerate() {
+        vm_state.vm_write(RV32_MEMORY_AS, rd_val + (i * BLOCK_SIZE) as u32, &block);
+    }
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e12_setup_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+>(
+    pre_compute: &FieldExpressionPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    // Read the first input (which should be the prime)
+    let rs_vals = pre_compute
+        .rs_addrs
+        .map(|addr| u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, addr as u32)));
+    let read_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2] = rs_vals.map(|address| {
+        debug_assert!(address as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+        from_fn(|i| vm_state.vm_read(RV32_MEMORY_AS, address + (i * BLOCK_SIZE) as u32))
+    });
+
+    // Extract first field element as the prime
+    let input_prime = if IS_FP2 {
+        BigUint::from_bytes_le(read_data[0][..BLOCKS / 2].as_flattened())
+    } else {
+        BigUint::from_bytes_le(read_data[0].as_flattened())
+    };
+
+    if input_prime != pre_compute.expr.prime {
+        vm_state.exit_code = Err(ExecutionError::Fail {
+            pc: vm_state.pc,
+            msg: "ModularSetup: mismatched prime",
+        });
+        return;
+    }
+
+    let read_data_dyn: DynArray<u8> = read_data.into();
+
+    let writes = run_field_expression_precomputed::<true>(
+        pre_compute.expr,
+        pre_compute.flag_idx as usize,
+        &read_data_dyn.0,
+    );
+
+    let rd_val = u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, pre_compute.a as u32));
+    debug_assert!(rd_val as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+
+    let data: [[u8; BLOCK_SIZE]; BLOCKS] = writes.into();
+    for (i, block) in data.into_iter().enumerate() {
+        vm_state.vm_write(RV32_MEMORY_AS, rd_val + (i * BLOCK_SIZE) as u32, &block);
+    }
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_setup_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &FieldExpressionPreCompute = pre_compute.borrow();
+    execute_e12_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_setup_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<FieldExpressionPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_setup_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2>(&pre_compute.data, vm_state);
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+    const FIELD_TYPE: u8,
+    const OP: u8,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &FieldExpressionPreCompute = pre_compute.borrow();
+    execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2, FIELD_TYPE, OP>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+    const FIELD_TYPE: u8,
+    const OP: u8,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<FieldExpressionPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, IS_FP2, FIELD_TYPE, OP>(
+        &pre_compute.data,
+        vm_state,
+    );
+}
+
+unsafe fn execute_e1_generic_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &FieldExpressionPreCompute = pre_compute.borrow();
+    execute_e12_generic_impl::<_, _, BLOCKS, BLOCK_SIZE>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_generic_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<FieldExpressionPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_generic_impl::<_, _, BLOCKS, BLOCK_SIZE>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/algebra/circuit/src/fields.rs b/extensions/algebra/circuit/src/fields.rs
new file mode 100644
index 0000000000..fdba871da7
--- /dev/null
+++ b/extensions/algebra/circuit/src/fields.rs
@@ -0,0 +1,376 @@
+use halo2curves_axiom::ff::PrimeField;
+use num_bigint::BigUint;
+use num_traits::Num;
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum FieldType {
+    K256Coordinate = 0,
+    K256Scalar = 1,
+    P256Coordinate = 2,
+    P256Scalar = 3,
+    BN254Coordinate = 4,
+    BN254Scalar = 5,
+    BLS12_381Coordinate = 6,
+    BLS12_381Scalar = 7,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Operation {
+    Add = 0,
+    Sub = 1,
+    Mul = 2,
+    Div = 3,
+}
+
+fn get_modulus_as_bigint<F: PrimeField>() -> BigUint {
+    BigUint::from_str_radix(F::MODULUS.trim_start_matches("0x"), 16).unwrap()
+}
+
+pub fn get_field_type(modulus: &BigUint) -> Option<FieldType> {
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::secq256k1::Fq>() {
+        return Some(FieldType::K256Coordinate);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::secq256k1::Fp>() {
+        return Some(FieldType::K256Scalar);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::secp256r1::Fp>() {
+        return Some(FieldType::P256Coordinate);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::secp256r1::Fq>() {
+        return Some(FieldType::P256Scalar);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::bn256::Fq>() {
+        return Some(FieldType::BN254Coordinate);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::bn256::Fr>() {
+        return Some(FieldType::BN254Scalar);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::bls12_381::Fq>() {
+        return Some(FieldType::BLS12_381Coordinate);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::bls12_381::Fr>() {
+        return Some(FieldType::BLS12_381Scalar);
+    }
+
+    None
+}
+
+pub fn get_fp2_field_type(modulus: &BigUint) -> Option<FieldType> {
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::bn256::Fq>() {
+        return Some(FieldType::BN254Coordinate);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::bls12_381::Fq>() {
+        return Some(FieldType::BLS12_381Coordinate);
+    }
+
+    None
+}
+
+#[inline(always)]
+pub fn field_operation<
+    const FIELD: u8,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const OP: u8,
+>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    match FIELD {
+        x if x == FieldType::K256Coordinate as u8 => {
+            field_operation_256bit::<halo2curves_axiom::secq256k1::Fq, BLOCKS, BLOCK_SIZE, OP>(
+                input_data,
+            )
+        }
+        x if x == FieldType::K256Scalar as u8 => {
+            field_operation_256bit::<halo2curves_axiom::secq256k1::Fp, BLOCKS, BLOCK_SIZE, OP>(
+                input_data,
+            )
+        }
+        x if x == FieldType::P256Coordinate as u8 => {
+            field_operation_256bit::<halo2curves_axiom::secp256r1::Fp, BLOCKS, BLOCK_SIZE, OP>(
+                input_data,
+            )
+        }
+        x if x == FieldType::P256Scalar as u8 => {
+            field_operation_256bit::<halo2curves_axiom::secp256r1::Fq, BLOCKS, BLOCK_SIZE, OP>(
+                input_data,
+            )
+        }
+        x if x == FieldType::BN254Coordinate as u8 => {
+            field_operation_256bit::<halo2curves_axiom::bn256::Fq, BLOCKS, BLOCK_SIZE, OP>(
+                input_data,
+            )
+        }
+        x if x == FieldType::BN254Scalar as u8 => {
+            field_operation_256bit::<halo2curves_axiom::bn256::Fr, BLOCKS, BLOCK_SIZE, OP>(
+                input_data,
+            )
+        }
+        x if x == FieldType::BLS12_381Coordinate as u8 => {
+            field_operation_bls12_381_coordinate::<BLOCKS, BLOCK_SIZE, OP>(input_data)
+        }
+        x if x == FieldType::BLS12_381Scalar as u8 => {
+            field_operation_256bit::<halo2curves_axiom::bls12_381::Fr, BLOCKS, BLOCK_SIZE, OP>(
+                input_data,
+            )
+        }
+        _ => panic!("Unsupported field type: {}", FIELD),
+    }
+}
+
+#[inline(always)]
+pub fn fp2_operation<
+    const FIELD: u8,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const OP: u8,
+>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    match FIELD {
+        x if x == FieldType::BN254Coordinate as u8 => {
+            fp2_operation_bn254::<BLOCKS, BLOCK_SIZE, OP>(input_data)
+        }
+        x if x == FieldType::BLS12_381Coordinate as u8 => {
+            fp2_operation_bls12_381::<BLOCKS, BLOCK_SIZE, OP>(input_data)
+        }
+        _ => panic!("Unsupported field type for Fp2: {}", FIELD),
+    }
+}
+
+#[inline(always)]
+fn field_operation_256bit<
+    F: PrimeField<Repr = [u8; 32]>,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const OP: u8,
+>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    let a = blocks_to_field_element::<F>(input_data[0].as_flattened());
+    let b = blocks_to_field_element::<F>(input_data[1].as_flattened());
+    let c = match OP {
+        x if x == Operation::Add as u8 => a + b,
+        x if x == Operation::Sub as u8 => a - b,
+        x if x == Operation::Mul as u8 => a * b,
+        x if x == Operation::Div as u8 => a * b.invert().unwrap(),
+        _ => panic!("Unsupported operation: {}", OP),
+    };
+
+    let mut output = [[0u8; BLOCK_SIZE]; BLOCKS];
+    field_element_to_blocks(&c, &mut output);
+    output
+}
+
+#[inline(always)]
+fn field_operation_bls12_381_coordinate<
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const OP: u8,
+>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    let a = blocks_to_field_element_bls12_381_coordinate(input_data[0].as_flattened());
+    let b = blocks_to_field_element_bls12_381_coordinate(input_data[1].as_flattened());
+    let c = match OP {
+        x if x == Operation::Add as u8 => a + b,
+        x if x == Operation::Sub as u8 => a - b,
+        x if x == Operation::Mul as u8 => a * b,
+        x if x == Operation::Div as u8 => a * b.invert().unwrap(),
+        _ => panic!("Unsupported operation: {}", OP),
+    };
+
+    let mut output = [[0u8; BLOCK_SIZE]; BLOCKS];
+    field_element_to_blocks_bls12_381_coordinate(&c, &mut output);
+    output
+}
+
+#[inline(always)]
+fn fp2_operation_bn254<const BLOCKS: usize, const BLOCK_SIZE: usize, const OP: u8>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    let a = blocks_to_fp2_bn254::<BLOCKS, BLOCK_SIZE>(input_data[0].as_ref());
+    let b = blocks_to_fp2_bn254::<BLOCKS, BLOCK_SIZE>(input_data[1].as_ref());
+    let c = match OP {
+        x if x == Operation::Add as u8 => a + b,
+        x if x == Operation::Sub as u8 => a - b,
+        x if x == Operation::Mul as u8 => a * b,
+        x if x == Operation::Div as u8 => a * b.invert().unwrap(),
+        _ => panic!("Unsupported operation: {}", OP),
+    };
+
+    let mut output = [[0u8; BLOCK_SIZE]; BLOCKS];
+    fp2_to_blocks_bn254(&c, &mut output);
+    output
+}
+
+#[inline(always)]
+fn fp2_operation_bls12_381<const BLOCKS: usize, const BLOCK_SIZE: usize, const OP: u8>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    let a = blocks_to_fp2_bls12_381::<BLOCKS, BLOCK_SIZE>(input_data[0].as_ref());
+    let b = blocks_to_fp2_bls12_381::<BLOCKS, BLOCK_SIZE>(input_data[1].as_ref());
+    let c = match OP {
+        x if x == Operation::Add as u8 => a + b,
+        x if x == Operation::Sub as u8 => a - b,
+        x if x == Operation::Mul as u8 => a * b,
+        x if x == Operation::Div as u8 => a * b.invert().unwrap(),
+        _ => panic!("Unsupported operation: {}", OP),
+    };
+
+    let mut output = [[0u8; BLOCK_SIZE]; BLOCKS];
+    fp2_to_blocks_bls12_381(&c, &mut output);
+    output
+}
+
+#[inline(always)]
+fn from_repr_with_reduction<F: PrimeField<Repr = [u8; 32]>>(bytes: [u8; 32]) -> F {
+    F::from_repr_vartime(bytes).unwrap_or_else(|| {
+        // Reduce modulo the field's modulus for non-canonical representations
+        let modulus = get_modulus_as_bigint::<F>();
+        let value = BigUint::from_bytes_le(&bytes);
+        let reduced = value % modulus;
+
+        let reduced_le_bytes = reduced.to_bytes_le();
+        let mut reduced_bytes = [0u8; 32];
+        reduced_bytes[..reduced_le_bytes.len()]
+            .copy_from_slice(&reduced_le_bytes[..reduced_le_bytes.len()]);
+
+        F::from_repr_vartime(reduced_bytes).unwrap()
+    })
+}
+
+#[inline(always)]
+fn from_repr_with_reduction_bls12_381_coordinate(
+    bytes: [u8; 48],
+) -> halo2curves_axiom::bls12_381::Fq {
+    halo2curves_axiom::bls12_381::Fq::from_bytes(&bytes).unwrap_or_else(|| {
+        // Reduce modulo the field's modulus for non-canonical representations
+        let modulus = get_modulus_as_bigint::<halo2curves_axiom::bls12_381::Fq>();
+        let value = BigUint::from_bytes_le(&bytes);
+        let reduced = value % modulus;
+
+        let reduced_le_bytes = reduced.to_bytes_le();
+        let mut reduced_bytes = [0u8; 48];
+        reduced_bytes[..reduced_le_bytes.len()]
+            .copy_from_slice(&reduced_le_bytes[..reduced_le_bytes.len()]);
+
+        halo2curves_axiom::bls12_381::Fq::from_bytes(&reduced_bytes).unwrap()
+    })
+}
+
+#[inline(always)]
+pub fn blocks_to_field_element<F: PrimeField<Repr = [u8; 32]>>(blocks: &[u8]) -> F {
+    debug_assert!(blocks.len() == 32);
+    let mut bytes = [0u8; 32];
+    bytes[..blocks.len()].copy_from_slice(&blocks[..blocks.len()]);
+
+    from_repr_with_reduction::<F>(bytes)
+}
+
+#[inline(always)]
+pub fn field_element_to_blocks<F: PrimeField<Repr = [u8; 32]>, const BLOCK_SIZE: usize>(
+    field_element: &F,
+    output: &mut [[u8; BLOCK_SIZE]],
+) {
+    debug_assert!(output.len() * BLOCK_SIZE == 32);
+    let bytes = field_element.to_repr();
+    let mut byte_idx = 0;
+
+    for block in output.iter_mut() {
+        for byte in block.iter_mut() {
+            *byte = if byte_idx < bytes.len() {
+                bytes[byte_idx]
+            } else {
+                0
+            };
+            byte_idx += 1;
+        }
+    }
+}
+
+#[inline(always)]
+pub fn blocks_to_field_element_bls12_381_coordinate(
+    blocks: &[u8],
+) -> halo2curves_axiom::bls12_381::Fq {
+    debug_assert!(blocks.len() == 48);
+    let mut bytes = [0u8; 48];
+    bytes[..blocks.len()].copy_from_slice(&blocks[..blocks.len()]);
+
+    from_repr_with_reduction_bls12_381_coordinate(bytes)
+}
+
+#[inline(always)]
+pub fn field_element_to_blocks_bls12_381_coordinate<const BLOCK_SIZE: usize>(
+    field_element: &halo2curves_axiom::bls12_381::Fq,
+    output: &mut [[u8; BLOCK_SIZE]],
+) {
+    debug_assert!(output.len() * BLOCK_SIZE == 48);
+    let bytes = field_element.to_bytes();
+    let mut byte_idx = 0;
+
+    for block in output.iter_mut() {
+        for byte in block.iter_mut() {
+            *byte = if byte_idx < bytes.len() {
+                bytes[byte_idx]
+            } else {
+                0
+            };
+            byte_idx += 1;
+        }
+    }
+}
+
+#[inline(always)]
+fn blocks_to_fp2_bn254<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    blocks: &[[u8; BLOCK_SIZE]],
+) -> halo2curves_axiom::bn256::Fq2 {
+    let c0 = blocks_to_field_element::<halo2curves_axiom::bn256::Fq>(
+        blocks[..BLOCKS / 2].as_flattened(),
+    );
+    let c1 = blocks_to_field_element::<halo2curves_axiom::bn256::Fq>(
+        blocks[BLOCKS / 2..].as_flattened(),
+    );
+    halo2curves_axiom::bn256::Fq2::new(c0, c1)
+}
+
+#[inline(always)]
+fn fp2_to_blocks_bn254<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    fp2: &halo2curves_axiom::bn256::Fq2,
+    output: &mut [[u8; BLOCK_SIZE]; BLOCKS],
+) {
+    field_element_to_blocks::<halo2curves_axiom::bn256::Fq, BLOCK_SIZE>(
+        &fp2.c0,
+        &mut output[..BLOCKS / 2],
+    );
+    field_element_to_blocks::<halo2curves_axiom::bn256::Fq, BLOCK_SIZE>(
+        &fp2.c1,
+        &mut output[BLOCKS / 2..],
+    );
+}
+
+#[inline(always)]
+fn blocks_to_fp2_bls12_381<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    blocks: &[[u8; BLOCK_SIZE]],
+) -> halo2curves_axiom::bls12_381::Fq2 {
+    let c0 = blocks_to_field_element_bls12_381_coordinate(blocks[..BLOCKS / 2].as_flattened());
+    let c1 = blocks_to_field_element_bls12_381_coordinate(blocks[BLOCKS / 2..].as_flattened());
+    halo2curves_axiom::bls12_381::Fq2 { c0, c1 }
+}
+
+#[inline(always)]
+fn fp2_to_blocks_bls12_381<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    fp2: &halo2curves_axiom::bls12_381::Fq2,
+    output: &mut [[u8; BLOCK_SIZE]; BLOCKS],
+) {
+    field_element_to_blocks_bls12_381_coordinate(&fp2.c0, &mut output[..BLOCKS / 2]);
+    field_element_to_blocks_bls12_381_coordinate(&fp2.c1, &mut output[BLOCKS / 2..]);
+}
diff --git a/extensions/algebra/circuit/src/fp2_chip/addsub.rs b/extensions/algebra/circuit/src/fp2_chip/addsub.rs
index 4eca1ad102..120ba83dd0 100644
--- a/extensions/algebra/circuit/src/fp2_chip/addsub.rs
+++ b/extensions/algebra/circuit/src/fp2_chip/addsub.rs
@@ -1,62 +1,25 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
+use std::{cell::RefCell, rc::Rc};
 
 use openvm_algebra_transpiler::Fp2Opcode;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
+use openvm_circuit::{
+    arch::ExecutionBridge,
+    system::memory::{offline_checker::MemoryBridge, SharedMemoryHelper},
+};
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
+use openvm_instructions::riscv::RV32_CELL_BITS;
 use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
+    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreAir, FieldExpressionExecutor,
+    FieldExpressionFiller,
+};
+use openvm_rv32_adapters::{
+    Rv32VecHeapAdapterAir, Rv32VecHeapAdapterExecutor, Rv32VecHeapAdapterFiller,
 };
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-use crate::Fp2;
 
-// Input: Fp2 * 2
-// Output: Fp2
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct Fp2AddSubChip<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>
-    Fp2AddSubChip<F, BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        config: ExprBuilderConfig,
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let (expr, is_add_flag, is_sub_flag) = fp2_addsub_expr(config, range_checker.bus());
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![
-                Fp2Opcode::ADD as usize,
-                Fp2Opcode::SUB as usize,
-                Fp2Opcode::SETUP_ADDSUB as usize,
-            ],
-            vec![is_add_flag, is_sub_flag],
-            range_checker,
-            "Fp2AddSub",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
+use super::{Fp2Air, Fp2Chip, Fp2Executor};
+use crate::{FieldExprVecHeapExecutor, Fp2};
 
 pub fn fp2_addsub_expr(
     config: ExprBuilderConfig,
@@ -85,123 +48,81 @@ pub fn fp2_addsub_expr(
     )
 }
 
-#[cfg(test)]
-mod tests {
-
-    use halo2curves_axiom::{bn256::Fq2, ff::Field};
-    use itertools::Itertools;
-    use openvm_algebra_transpiler::Fp2Opcode;
-    use openvm_circuit::arch::testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
-    use openvm_circuit_primitives::bitwise_op_lookup::{
-        BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-    };
-    use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-    use openvm_mod_circuit_builder::{
-        test_utils::{biguint_to_limbs, bn254_fq2_to_biguint_vec, bn254_fq_to_biguint},
-        ExprBuilderConfig,
-    };
-    use openvm_pairing_guest::bn254::BN254_MODULUS;
-    use openvm_rv32_adapters::{rv32_write_heap_default, Rv32VecHeapAdapterChip};
-    use openvm_stark_backend::p3_field::FieldAlgebra;
-    use openvm_stark_sdk::p3_baby_bear::BabyBear;
-    use rand::{rngs::StdRng, SeedableRng};
-
-    use super::Fp2AddSubChip;
-
-    const NUM_LIMBS: usize = 32;
-    const LIMB_BITS: usize = 8;
-    type F = BabyBear;
-
-    #[test]
-    fn test_fp2_addsub() {
-        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-        let modulus = BN254_MODULUS.clone();
-        let config = ExprBuilderConfig {
-            modulus: modulus.clone(),
-            num_limbs: NUM_LIMBS,
-            limb_bits: LIMB_BITS,
-        };
-        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-        let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-        let adapter = Rv32VecHeapAdapterChip::<F, 2, 2, 2, NUM_LIMBS, NUM_LIMBS>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        );
-        let mut chip = Fp2AddSubChip::new(
-            adapter,
-            config,
-            Fp2Opcode::CLASS_OFFSET,
-            tester.range_checker(),
-            tester.offline_memory_mutex_arc(),
-        );
-
-        let mut rng = StdRng::seed_from_u64(42);
-        let x = Fq2::random(&mut rng);
-        let y = Fq2::random(&mut rng);
-        let inputs = [x.c0, x.c1, y.c0, y.c1].map(bn254_fq_to_biguint);
+// Input: Fp2 * 2
+// Output: Fp2
+fn gen_base_expr(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+) -> (FieldExpr, Vec<usize>, Vec<usize>) {
+    let (expr, is_add_flag, is_sub_flag) = fp2_addsub_expr(config, range_checker_bus);
+
+    let local_opcode_idx = vec![
+        Fp2Opcode::ADD as usize,
+        Fp2Opcode::SUB as usize,
+        Fp2Opcode::SETUP_ADDSUB as usize,
+    ];
+    let opcode_flag_idx = vec![is_add_flag, is_sub_flag];
+
+    (expr, local_opcode_idx, opcode_flag_idx)
+}
 
-        let expected_sum = bn254_fq2_to_biguint_vec(x + y);
-        let r_sum = chip
-            .0
-            .core
-            .expr()
-            .execute_with_output(inputs.to_vec(), vec![true, false]);
-        assert_eq!(r_sum.len(), 2);
-        assert_eq!(r_sum[0], expected_sum[0]);
-        assert_eq!(r_sum[1], expected_sum[1]);
+pub fn get_fp2_addsub_air<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    exec_bridge: ExecutionBridge,
+    mem_bridge: MemoryBridge,
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    bitwise_lookup_bus: BitwiseOperationLookupBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> Fp2Air<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker_bus);
+    Fp2Air::new(
+        Rv32VecHeapAdapterAir::new(
+            exec_bridge,
+            mem_bridge,
+            bitwise_lookup_bus,
+            pointer_max_bits,
+        ),
+        FieldExpressionCoreAir::new(expr, offset, local_opcode_idx, opcode_flag_idx),
+    )
+}
 
-        let expected_sub = bn254_fq2_to_biguint_vec(x - y);
-        let r_sub = chip
-            .0
-            .core
-            .expr()
-            .execute_with_output(inputs.to_vec(), vec![false, true]);
-        assert_eq!(r_sub.len(), 2);
-        assert_eq!(r_sub[0], expected_sub[0]);
-        assert_eq!(r_sub[1], expected_sub[1]);
+// TODO[arayi]: rename step->executor (for all algebra and ecc functions)
+pub fn get_fp2_addsub_step<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> Fp2Executor<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker_bus);
+
+    FieldExprVecHeapExecutor(FieldExpressionExecutor::new(
+        Rv32VecHeapAdapterExecutor::new(pointer_max_bits),
+        expr,
+        offset,
+        local_opcode_idx,
+        opcode_flag_idx,
+        "Fp2AddSub",
+    ))
+}
 
-        let x_limbs = inputs[0..2]
-            .iter()
-            .map(|x| {
-                biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS)
-                    .map(BabyBear::from_canonical_u32)
-            })
-            .collect_vec();
-        let y_limbs = inputs[2..4]
-            .iter()
-            .map(|x| {
-                biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS)
-                    .map(BabyBear::from_canonical_u32)
-            })
-            .collect_vec();
-        let modulus =
-            biguint_to_limbs::<NUM_LIMBS>(modulus, LIMB_BITS).map(BabyBear::from_canonical_u32);
-        let zero = [BabyBear::ZERO; NUM_LIMBS];
-        let setup_instruction = rv32_write_heap_default(
-            &mut tester,
-            vec![modulus, zero],
-            vec![zero; 2],
-            chip.0.core.air.offset + Fp2Opcode::SETUP_ADDSUB as usize,
-        );
-        let instruction1 = rv32_write_heap_default(
-            &mut tester,
-            x_limbs.clone(),
-            y_limbs.clone(),
-            chip.0.core.air.offset + Fp2Opcode::ADD as usize,
-        );
-        let instruction2 = rv32_write_heap_default(
-            &mut tester,
-            x_limbs,
-            y_limbs,
-            chip.0.core.air.offset + Fp2Opcode::SUB as usize,
-        );
-        tester.execute(&mut chip, &setup_instruction);
-        tester.execute(&mut chip, &instruction1);
-        tester.execute(&mut chip, &instruction2);
-        let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-        tester.simple_test().expect("Verification failed");
-    }
+pub fn get_fp2_addsub_chip<F, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    mem_helper: SharedMemoryHelper<F>,
+    range_checker: SharedVariableRangeCheckerChip,
+    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    pointer_max_bits: usize,
+) -> Fp2Chip<F, BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker.bus());
+    Fp2Chip::new(
+        FieldExpressionFiller::new(
+            Rv32VecHeapAdapterFiller::new(pointer_max_bits, bitwise_lookup_chip),
+            expr,
+            local_opcode_idx,
+            opcode_flag_idx,
+            range_checker,
+            false,
+        ),
+        mem_helper,
+    )
 }
diff --git a/extensions/algebra/circuit/src/fp2_chip/mod.rs b/extensions/algebra/circuit/src/fp2_chip/mod.rs
index cd316fd70c..c581d253f5 100644
--- a/extensions/algebra/circuit/src/fp2_chip/mod.rs
+++ b/extensions/algebra/circuit/src/fp2_chip/mod.rs
@@ -1,5 +1,27 @@
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
+use openvm_mod_circuit_builder::{FieldExpressionCoreAir, FieldExpressionFiller};
+use openvm_rv32_adapters::{Rv32VecHeapAdapterAir, Rv32VecHeapAdapterFiller};
+
+use crate::FieldExprVecHeapExecutor;
+
 mod addsub;
 pub use addsub::*;
 
 mod muldiv;
 pub use muldiv::*;
+
+pub type Fp2Air<const BLOCKS: usize, const BLOCK_SIZE: usize> = VmAirWrapper<
+    Rv32VecHeapAdapterAir<2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
+    FieldExpressionCoreAir,
+>;
+
+pub type Fp2Executor<const BLOCKS: usize, const BLOCK_SIZE: usize> =
+    FieldExprVecHeapExecutor<BLOCKS, BLOCK_SIZE, true>;
+
+pub type Fp2Chip<F, const BLOCKS: usize, const BLOCK_SIZE: usize> = VmChipWrapper<
+    F,
+    FieldExpressionFiller<Rv32VecHeapAdapterFiller<2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>>,
+>;
+
+#[cfg(test)]
+mod tests;
diff --git a/extensions/algebra/circuit/src/fp2_chip/muldiv.rs b/extensions/algebra/circuit/src/fp2_chip/muldiv.rs
index 83ef9565f3..c17b94ae9e 100644
--- a/extensions/algebra/circuit/src/fp2_chip/muldiv.rs
+++ b/extensions/algebra/circuit/src/fp2_chip/muldiv.rs
@@ -1,62 +1,25 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
+use std::{cell::RefCell, rc::Rc};
 
 use openvm_algebra_transpiler::Fp2Opcode;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
+use openvm_circuit::{
+    arch::ExecutionBridge,
+    system::memory::{offline_checker::MemoryBridge, SharedMemoryHelper},
+};
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
+use openvm_instructions::riscv::RV32_CELL_BITS;
 use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip, SymbolicExpr,
+    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreAir, FieldExpressionExecutor,
+    FieldExpressionFiller, SymbolicExpr,
+};
+use openvm_rv32_adapters::{
+    Rv32VecHeapAdapterAir, Rv32VecHeapAdapterExecutor, Rv32VecHeapAdapterFiller,
 };
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-use crate::Fp2;
 
-// Input: Fp2 * 2
-// Output: Fp2
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct Fp2MulDivChip<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>
-    Fp2MulDivChip<F, BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        config: ExprBuilderConfig,
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let (expr, is_mul_flag, is_div_flag) = fp2_muldiv_expr(config, range_checker.bus());
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![
-                Fp2Opcode::MUL as usize,
-                Fp2Opcode::DIV as usize,
-                Fp2Opcode::SETUP_MULDIV as usize,
-            ],
-            vec![is_mul_flag, is_div_flag],
-            range_checker,
-            "Fp2MulDiv",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
+use super::{Fp2Air, Fp2Chip, Fp2Executor};
+use crate::{FieldExprVecHeapExecutor, Fp2};
 
 pub fn fp2_muldiv_expr(
     config: ExprBuilderConfig,
@@ -124,128 +87,81 @@ pub fn fp2_muldiv_expr(
     )
 }
 
-#[cfg(test)]
-mod tests {
-
-    use halo2curves_axiom::{bn256::Fq2, ff::Field};
-    use itertools::Itertools;
-    use openvm_algebra_transpiler::Fp2Opcode;
-    use openvm_circuit::arch::testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
-    use openvm_circuit_primitives::bitwise_op_lookup::{
-        BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-    };
-    use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-    use openvm_mod_circuit_builder::{
-        test_utils::{biguint_to_limbs, bn254_fq2_to_biguint_vec, bn254_fq_to_biguint},
-        ExprBuilderConfig,
-    };
-    use openvm_pairing_guest::bn254::BN254_MODULUS;
-    use openvm_rv32_adapters::{rv32_write_heap_default, Rv32VecHeapAdapterChip};
-    use openvm_stark_backend::p3_field::FieldAlgebra;
-    use openvm_stark_sdk::p3_baby_bear::BabyBear;
-    use rand::{rngs::StdRng, SeedableRng};
-
-    use super::Fp2MulDivChip;
-
-    const NUM_LIMBS: usize = 32;
-    const LIMB_BITS: usize = 8;
-    type F = BabyBear;
-
-    #[test]
-    fn test_fp2_muldiv() {
-        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-        let modulus = BN254_MODULUS.clone();
-        let config = ExprBuilderConfig {
-            modulus: modulus.clone(),
-            num_limbs: NUM_LIMBS,
-            limb_bits: LIMB_BITS,
-        };
-        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-        let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-        let adapter = Rv32VecHeapAdapterChip::<F, 2, 2, 2, NUM_LIMBS, NUM_LIMBS>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        );
-        let mut chip = Fp2MulDivChip::new(
-            adapter,
-            config,
-            Fp2Opcode::CLASS_OFFSET,
-            tester.range_checker(),
-            tester.offline_memory_mutex_arc(),
-        );
-        assert_eq!(
-            chip.0.core.expr().builder.num_variables,
-            2,
-            "Fp2MulDiv should only introduce new z Fp2 variable (2 Fp var)"
-        );
-
-        let mut rng = StdRng::seed_from_u64(42);
-        let x = Fq2::random(&mut rng);
-        let y = Fq2::random(&mut rng);
-        let inputs = [x.c0, x.c1, y.c0, y.c1].map(bn254_fq_to_biguint);
-
-        let expected_mul = bn254_fq2_to_biguint_vec(x * y);
-        let r_mul = chip
-            .0
-            .core
-            .expr()
-            .execute_with_output(inputs.to_vec(), vec![true, false]);
-        assert_eq!(r_mul.len(), 2);
-        assert_eq!(r_mul[0], expected_mul[0]);
-        assert_eq!(r_mul[1], expected_mul[1]);
-
-        let expected_div = bn254_fq2_to_biguint_vec(x * y.invert().unwrap());
-        let r_div = chip
-            .0
-            .core
-            .expr()
-            .execute_with_output(inputs.to_vec(), vec![false, true]);
-        assert_eq!(r_div.len(), 2);
-        assert_eq!(r_div[0], expected_div[0]);
-        assert_eq!(r_div[1], expected_div[1]);
-
-        let x_limbs = inputs[0..2]
-            .iter()
-            .map(|x| {
-                biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS)
-                    .map(BabyBear::from_canonical_u32)
-            })
-            .collect_vec();
-        let y_limbs = inputs[2..4]
-            .iter()
-            .map(|x| {
-                biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS)
-                    .map(BabyBear::from_canonical_u32)
-            })
-            .collect_vec();
-        let modulus =
-            biguint_to_limbs::<NUM_LIMBS>(modulus, LIMB_BITS).map(BabyBear::from_canonical_u32);
-        let zero = [BabyBear::ZERO; NUM_LIMBS];
-        let setup_instruction = rv32_write_heap_default(
-            &mut tester,
-            vec![modulus, zero],
-            vec![zero; 2],
-            chip.0.core.air.offset + Fp2Opcode::SETUP_MULDIV as usize,
-        );
-        let instruction1 = rv32_write_heap_default(
-            &mut tester,
-            x_limbs.clone(),
-            y_limbs.clone(),
-            chip.0.core.air.offset + Fp2Opcode::MUL as usize,
-        );
-        let instruction2 = rv32_write_heap_default(
-            &mut tester,
-            x_limbs,
-            y_limbs,
-            chip.0.core.air.offset + Fp2Opcode::DIV as usize,
-        );
-        tester.execute(&mut chip, &setup_instruction);
-        tester.execute(&mut chip, &instruction1);
-        tester.execute(&mut chip, &instruction2);
-        let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-        tester.simple_test().expect("Verification failed");
-    }
+// Input: Fp2 * 2
+// Output: Fp2
+
+fn gen_base_expr(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+) -> (FieldExpr, Vec<usize>, Vec<usize>) {
+    let (expr, is_mul_flag, is_div_flag) = fp2_muldiv_expr(config, range_checker_bus);
+
+    let local_opcode_idx = vec![
+        Fp2Opcode::MUL as usize,
+        Fp2Opcode::DIV as usize,
+        Fp2Opcode::SETUP_MULDIV as usize,
+    ];
+    let opcode_flag_idx = vec![is_mul_flag, is_div_flag];
+
+    (expr, local_opcode_idx, opcode_flag_idx)
+}
+
+pub fn get_fp2_muldiv_air<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    exec_bridge: ExecutionBridge,
+    mem_bridge: MemoryBridge,
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    bitwise_lookup_bus: BitwiseOperationLookupBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> Fp2Air<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker_bus);
+    Fp2Air::new(
+        Rv32VecHeapAdapterAir::new(
+            exec_bridge,
+            mem_bridge,
+            bitwise_lookup_bus,
+            pointer_max_bits,
+        ),
+        FieldExpressionCoreAir::new(expr, offset, local_opcode_idx, opcode_flag_idx),
+    )
+}
+
+pub fn get_fp2_muldiv_step<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> Fp2Executor<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker_bus);
+
+    FieldExprVecHeapExecutor(FieldExpressionExecutor::new(
+        Rv32VecHeapAdapterExecutor::new(pointer_max_bits),
+        expr,
+        offset,
+        local_opcode_idx,
+        opcode_flag_idx,
+        "Fp2MulDiv",
+    ))
+}
+
+pub fn get_fp2_muldiv_chip<F, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    mem_helper: SharedMemoryHelper<F>,
+    range_checker: SharedVariableRangeCheckerChip,
+    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    pointer_max_bits: usize,
+) -> Fp2Chip<F, BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker.bus());
+    Fp2Chip::new(
+        FieldExpressionFiller::new(
+            Rv32VecHeapAdapterFiller::new(pointer_max_bits, bitwise_lookup_chip),
+            expr,
+            local_opcode_idx,
+            opcode_flag_idx,
+            range_checker,
+            false,
+        ),
+        mem_helper,
+    )
 }
diff --git a/extensions/algebra/circuit/src/fp2_chip/tests.rs b/extensions/algebra/circuit/src/fp2_chip/tests.rs
new file mode 100644
index 0000000000..4475bff1d4
--- /dev/null
+++ b/extensions/algebra/circuit/src/fp2_chip/tests.rs
@@ -0,0 +1,344 @@
+use std::{str::FromStr, sync::Arc};
+
+use num_bigint::BigUint;
+use num_traits::Zero;
+use openvm_algebra_transpiler::Fp2Opcode;
+use openvm_circuit::arch::testing::{
+    memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS,
+};
+use openvm_circuit_primitives::{
+    bigint::utils::secp256k1_coord_prime,
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
+};
+use openvm_instructions::{
+    instruction::Instruction,
+    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode, VmOpcode,
+};
+use openvm_mod_circuit_builder::{
+    test_utils::generate_random_biguint, utils::biguint_to_limbs_vec, ExprBuilderConfig,
+};
+use openvm_pairing_guest::{bls12_381::BLS12_381_MODULUS, bn254::BN254_MODULUS};
+use openvm_stark_backend::p3_field::FieldAlgebra;
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{rngs::StdRng, Rng};
+
+use crate::fp2_chip::{
+    get_fp2_addsub_air, get_fp2_addsub_chip, get_fp2_addsub_step, get_fp2_muldiv_air,
+    get_fp2_muldiv_chip, get_fp2_muldiv_step, Fp2Air, Fp2Chip, Fp2Executor,
+};
+
+const LIMB_BITS: usize = 8;
+const MAX_INS_CAPACITY: usize = 128;
+type F = BabyBear;
+type Harness<const BLOCKS: usize, const BLOCK_SIZE: usize> = TestChipHarness<
+    F,
+    Fp2Executor<BLOCKS, BLOCK_SIZE>,
+    Fp2Air<BLOCKS, BLOCK_SIZE>,
+    Fp2Chip<F, BLOCKS, BLOCK_SIZE>,
+>;
+
+fn create_addsub_test_chips<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    tester: &mut VmChipTestBuilder<F>,
+    config: ExprBuilderConfig,
+    offset: usize,
+) -> (
+    Harness<BLOCKS, BLOCK_SIZE>,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let air = get_fp2_addsub_air(
+        tester.execution_bridge(),
+        tester.memory_bridge(),
+        config.clone(),
+        tester.range_checker().bus(),
+        bitwise_bus,
+        tester.address_bits(),
+        offset,
+    );
+    let executor = get_fp2_addsub_step(
+        config.clone(),
+        tester.range_checker().bus(),
+        tester.address_bits(),
+        offset,
+    );
+    let chip = get_fp2_addsub_chip(
+        config,
+        tester.memory_helper(),
+        tester.range_checker(),
+        bitwise_chip.clone(),
+        tester.address_bits(),
+    );
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
+fn create_muldiv_test_chips<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    tester: &mut VmChipTestBuilder<F>,
+    config: ExprBuilderConfig,
+    offset: usize,
+) -> (
+    Harness<BLOCKS, BLOCK_SIZE>,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let air = get_fp2_muldiv_air(
+        tester.execution_bridge(),
+        tester.memory_bridge(),
+        config.clone(),
+        tester.range_checker().bus(),
+        bitwise_bus,
+        tester.address_bits(),
+        offset,
+    );
+    let executor = get_fp2_muldiv_step(
+        config.clone(),
+        tester.range_checker().bus(),
+        tester.address_bits(),
+        offset,
+    );
+    let chip = get_fp2_muldiv_chip(
+        config,
+        tester.memory_helper(),
+        tester.range_checker(),
+        bitwise_chip.clone(),
+        tester.address_bits(),
+    );
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
+fn set_and_execute_fp2<const BLOCKS: usize, const BLOCK_SIZE: usize, const NUM_LIMBS: usize>(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness<BLOCKS, BLOCK_SIZE>,
+    rng: &mut StdRng,
+    modulus: &BigUint,
+    is_setup: bool,
+    is_addsub: bool,
+    offset: usize,
+) {
+    let (a_c0, a_c1, b_c0, b_c1, op_local) = if is_setup {
+        (
+            modulus.clone(),
+            BigUint::zero(),
+            BigUint::zero(),
+            BigUint::zero(),
+            if is_addsub {
+                Fp2Opcode::SETUP_ADDSUB as usize
+            } else {
+                Fp2Opcode::SETUP_MULDIV as usize
+            },
+        )
+    } else {
+        let a_c0 = generate_random_biguint(modulus);
+        let a_c1 = generate_random_biguint(modulus);
+
+        let b_c0 = generate_random_biguint(modulus);
+        let b_c1 = generate_random_biguint(modulus);
+
+        let op = rng.gen_range(0..2);
+        let op = if is_addsub {
+            match op {
+                0 => Fp2Opcode::ADD as usize,
+                1 => Fp2Opcode::SUB as usize,
+                _ => panic!(),
+            }
+        } else {
+            match op {
+                0 => Fp2Opcode::MUL as usize,
+                1 => Fp2Opcode::DIV as usize,
+                _ => panic!(),
+            }
+        };
+        (a_c0, a_c1, b_c0, b_c1, op)
+    };
+
+    let ptr_as = RV32_REGISTER_AS as usize;
+    let data_as = RV32_MEMORY_AS as usize;
+
+    let rs1_ptr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+    let rs2_ptr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+    let rd_ptr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+
+    let a_base_addr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS) as u32;
+    let b_base_addr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS) as u32;
+    let result_base_addr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS) as u32;
+
+    tester.write::<RV32_REGISTER_NUM_LIMBS>(
+        ptr_as,
+        rs1_ptr,
+        a_base_addr.to_le_bytes().map(F::from_canonical_u8),
+    );
+    tester.write::<RV32_REGISTER_NUM_LIMBS>(
+        ptr_as,
+        rs2_ptr,
+        b_base_addr.to_le_bytes().map(F::from_canonical_u8),
+    );
+    tester.write::<RV32_REGISTER_NUM_LIMBS>(
+        ptr_as,
+        rd_ptr,
+        result_base_addr.to_le_bytes().map(F::from_canonical_u8),
+    );
+
+    let a_c0_limbs: Vec<F> = biguint_to_limbs_vec(&a_c0, NUM_LIMBS)
+        .into_iter()
+        .map(F::from_canonical_u8)
+        .collect();
+    let a_c1_limbs: Vec<F> = biguint_to_limbs_vec(&a_c1, NUM_LIMBS)
+        .into_iter()
+        .map(F::from_canonical_u8)
+        .collect();
+    let b_c0_limbs: Vec<F> = biguint_to_limbs_vec(&b_c0, NUM_LIMBS)
+        .into_iter()
+        .map(F::from_canonical_u8)
+        .collect();
+    let b_c1_limbs: Vec<F> = biguint_to_limbs_vec(&b_c1, NUM_LIMBS)
+        .into_iter()
+        .map(F::from_canonical_u8)
+        .collect();
+
+    for i in (0..NUM_LIMBS).step_by(BLOCK_SIZE) {
+        tester.write::<BLOCK_SIZE>(
+            data_as,
+            a_base_addr as usize + i,
+            a_c0_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+        );
+
+        tester.write::<BLOCK_SIZE>(
+            data_as,
+            (a_base_addr + NUM_LIMBS as u32) as usize + i,
+            a_c1_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+        );
+
+        tester.write::<BLOCK_SIZE>(
+            data_as,
+            b_base_addr as usize + i,
+            b_c0_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+        );
+
+        tester.write::<BLOCK_SIZE>(
+            data_as,
+            (b_base_addr + NUM_LIMBS as u32) as usize + i,
+            b_c1_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+        );
+    }
+
+    let instruction = Instruction::from_isize(
+        VmOpcode::from_usize(offset + op_local),
+        rd_ptr as isize,
+        rs1_ptr as isize,
+        rs2_ptr as isize,
+        ptr_as as isize,
+        data_as as isize,
+    );
+    tester.execute(harness, &instruction);
+}
+
+fn run_test_with_config<const BLOCKS: usize, const BLOCK_SIZE: usize, const NUM_LIMBS: usize>(
+    modulus: BigUint,
+    num_ops: usize,
+    is_addsub: bool,
+) {
+    let mut rng = create_seeded_rng();
+    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+    let config = ExprBuilderConfig {
+        modulus: modulus.clone(),
+        num_limbs: NUM_LIMBS,
+        limb_bits: LIMB_BITS,
+    };
+
+    let offset = Fp2Opcode::CLASS_OFFSET;
+
+    let (mut harness, bitwise) = if is_addsub {
+        create_addsub_test_chips::<BLOCKS, BLOCK_SIZE>(&mut tester, config, offset)
+    } else {
+        create_muldiv_test_chips::<BLOCKS, BLOCK_SIZE>(&mut tester, config, offset)
+    };
+
+    for i in 0..num_ops {
+        set_and_execute_fp2::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            i == 0,
+            is_addsub,
+            offset,
+        );
+    }
+
+    tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize()
+        .simple_test()
+        .unwrap();
+}
+
+#[test]
+fn test_fp2_addsub_2x32_small() {
+    run_test_with_config::<2, 32, 32>(
+        BigUint::from_str("357686312646216567629137").unwrap(),
+        50,
+        true,
+    );
+}
+
+#[test]
+fn test_fp2_addsub_2x32_secp256k1() {
+    run_test_with_config::<2, 32, 32>(secp256k1_coord_prime(), 50, true);
+}
+
+#[test]
+fn test_fp2_addsub_2x32_bn254() {
+    run_test_with_config::<2, 32, 32>(BN254_MODULUS.clone(), 50, true);
+}
+
+#[test]
+fn test_fp2_addsub_6x16() {
+    run_test_with_config::<6, 16, 48>(BLS12_381_MODULUS.clone(), 50, true);
+}
+
+#[test]
+fn test_fp2_muldiv_2x32_small() {
+    run_test_with_config::<2, 32, 32>(
+        BigUint::from_str("357686312646216567629137").unwrap(),
+        50,
+        false,
+    );
+}
+
+#[test]
+fn test_fp2_muldiv_2x32_secp256k1() {
+    run_test_with_config::<2, 32, 32>(secp256k1_coord_prime(), 50, false);
+}
+
+#[test]
+fn test_fp2_muldiv_2x32_bn254() {
+    run_test_with_config::<2, 32, 32>(BN254_MODULUS.clone(), 50, false);
+}
+
+#[test]
+fn test_fp2_muldiv_6x16() {
+    run_test_with_config::<6, 16, 48>(BLS12_381_MODULUS.clone(), 50, false);
+}
diff --git a/extensions/algebra/circuit/src/fp2_extension.rs b/extensions/algebra/circuit/src/fp2_extension.rs
index dc293c579b..3081c88565 100644
--- a/extensions/algebra/circuit/src/fp2_extension.rs
+++ b/extensions/algebra/circuit/src/fp2_extension.rs
@@ -1,26 +1,41 @@
-use derive_more::derive::From;
+use std::sync::Arc;
+
 use num_bigint::BigUint;
 use openvm_algebra_transpiler::Fp2Opcode;
 use openvm_circuit::{
-    arch::{SystemPort, VmExtension, VmInventory, VmInventoryBuilder, VmInventoryError},
-    system::phantom::PhantomChip,
+    arch::{
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutionBridge,
+        ExecutorInventoryBuilder, ExecutorInventoryError, RowMajorMatrixArena, VmCircuitExtension,
+        VmExecutionExtension, VmProverExtension,
+    },
+    system::{memory::SharedMemoryHelper, SystemPort},
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor};
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
+    var_range::VariableRangeCheckerBus,
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
 use openvm_instructions::{LocalOpcode, VmOpcode};
 use openvm_mod_circuit_builder::ExprBuilderConfig;
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use openvm_stark_sdk::engine::StarkEngine;
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use strum::EnumCount;
 
 use crate::{
-    fp2_chip::{Fp2AddSubChip, Fp2MulDivChip},
-    ModularExtension,
+    fp2_chip::{
+        get_fp2_addsub_air, get_fp2_addsub_chip, get_fp2_addsub_step, get_fp2_muldiv_air,
+        get_fp2_muldiv_chip, get_fp2_muldiv_step, Fp2Air, Fp2Executor,
+    },
+    AlgebraCpuProverExt, ModularExtension,
 };
 
 #[serde_as]
@@ -59,144 +74,287 @@ impl Fp2Extension {
     }
 }
 
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, AnyEnum, From)]
-pub enum Fp2ExtensionExecutor<F: PrimeField32> {
+#[derive(Clone, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum Fp2ExtensionExecutor {
     // 32 limbs prime
-    Fp2AddSubRv32_32(Fp2AddSubChip<F, 2, 32>),
-    Fp2MulDivRv32_32(Fp2MulDivChip<F, 2, 32>),
+    Fp2AddSubRv32_32(Fp2Executor<2, 32>), // Fp2AddSub
+    Fp2MulDivRv32_32(Fp2Executor<2, 32>), // Fp2MulDiv
     // 48 limbs prime
-    Fp2AddSubRv32_48(Fp2AddSubChip<F, 6, 16>),
-    Fp2MulDivRv32_48(Fp2MulDivChip<F, 6, 16>),
-}
-
-#[derive(ChipUsageGetter, Chip, AnyEnum, From)]
-pub enum Fp2ExtensionPeriphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    // We put this only to get the <F> generic to work
-    Phantom(PhantomChip<F>),
+    Fp2AddSubRv32_48(Fp2Executor<6, 16>), // Fp2AddSub
+    Fp2MulDivRv32_48(Fp2Executor<6, 16>), // Fp2MulDiv
 }
 
-impl<F: PrimeField32> VmExtension<F> for Fp2Extension {
-    type Executor = Fp2ExtensionExecutor<F>;
-    type Periphery = Fp2ExtensionPeriphery<F>;
+impl<F: PrimeField32> VmExecutionExtension<F> for Fp2Extension {
+    type Executor = Fp2ExtensionExecutor;
 
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
-        let SystemPort {
-            execution_bus,
-            program_bus,
-            memory_bridge,
-        } = builder.system_port();
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
-        };
-        let offline_memory = builder.system_base().offline_memory();
-        let range_checker = builder.system_base().range_checker_chip.clone();
-        let address_bits = builder.system_config().memory_config.pointer_max_bits;
-
-        let addsub_opcodes = (Fp2Opcode::ADD as usize)..=(Fp2Opcode::SETUP_ADDSUB as usize);
-        let muldiv_opcodes = (Fp2Opcode::MUL as usize)..=(Fp2Opcode::SETUP_MULDIV as usize);
-
+        inventory: &mut ExecutorInventoryBuilder<F, Fp2ExtensionExecutor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
+        // TODO: somehow get the range checker bus from `ExecutorInventory`
+        let dummy_range_checker_bus = VariableRangeCheckerBus::new(u16::MAX, 16);
         for (i, (_, modulus)) in self.supported_moduli.iter().enumerate() {
             // determine the number of bytes needed to represent a prime field element
             let bytes = modulus.bits().div_ceil(8);
             let start_offset = Fp2Opcode::CLASS_OFFSET + i * Fp2Opcode::COUNT;
 
-            let config32 = ExprBuilderConfig {
-                modulus: modulus.clone(),
-                num_limbs: 32,
-                limb_bits: 8,
-            };
-            let config48 = ExprBuilderConfig {
-                modulus: modulus.clone(),
-                num_limbs: 48,
-                limb_bits: 8,
-            };
-            let adapter_chip_32 = Rv32VecHeapAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
-            );
-            let adapter_chip_48 = Rv32VecHeapAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
-            );
-
             if bytes <= 32 {
-                let addsub_chip = Fp2AddSubChip::new(
-                    adapter_chip_32.clone(),
-                    config32.clone(),
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+                let addsub = get_fp2_addsub_step(
+                    config.clone(),
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    Fp2ExtensionExecutor::Fp2AddSubRv32_32(addsub_chip),
-                    addsub_opcodes
-                        .clone()
+                    Fp2ExtensionExecutor::Fp2AddSubRv32_32(addsub),
+                    ((Fp2Opcode::ADD as usize)..=(Fp2Opcode::SETUP_ADDSUB as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
-                let muldiv_chip = Fp2MulDivChip::new(
-                    adapter_chip_32.clone(),
-                    config32.clone(),
+
+                let muldiv = get_fp2_muldiv_step(
+                    config,
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    Fp2ExtensionExecutor::Fp2MulDivRv32_32(muldiv_chip),
-                    muldiv_opcodes
-                        .clone()
+                    Fp2ExtensionExecutor::Fp2MulDivRv32_32(muldiv),
+                    ((Fp2Opcode::MUL as usize)..=(Fp2Opcode::SETUP_MULDIV as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
             } else if bytes <= 48 {
-                let addsub_chip = Fp2AddSubChip::new(
-                    adapter_chip_48.clone(),
-                    config48.clone(),
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+                let addsub = get_fp2_addsub_step(
+                    config.clone(),
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    Fp2ExtensionExecutor::Fp2AddSubRv32_48(addsub_chip),
-                    addsub_opcodes
-                        .clone()
+                    Fp2ExtensionExecutor::Fp2AddSubRv32_48(addsub),
+                    ((Fp2Opcode::ADD as usize)..=(Fp2Opcode::SETUP_ADDSUB as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
-                let muldiv_chip = Fp2MulDivChip::new(
-                    adapter_chip_48.clone(),
-                    config48.clone(),
+
+                let muldiv = get_fp2_muldiv_step(
+                    config,
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    Fp2ExtensionExecutor::Fp2MulDivRv32_48(muldiv_chip),
-                    muldiv_opcodes
-                        .clone()
+                    Fp2ExtensionExecutor::Fp2MulDivRv32_48(muldiv),
+                    ((Fp2Opcode::MUL as usize)..=(Fp2Opcode::SETUP_MULDIV as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
             } else {
                 panic!("Modulus too large");
             }
         }
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Fp2Extension {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
+        let SystemPort {
+            execution_bus,
+            program_bus,
+            memory_bridge,
+        } = inventory.system().port();
+
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let range_checker_bus = inventory.range_checker().bus;
+        let pointer_max_bits = inventory.pointer_max_bits();
+
+        let bitwise_lu = {
+            // A trick to get around Rust's borrow rules
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
+        };
+        for (i, (_, modulus)) in self.supported_moduli.iter().enumerate() {
+            // determine the number of bytes needed to represent a prime field element
+            let bytes = modulus.bits().div_ceil(8);
+            let start_offset = Fp2Opcode::CLASS_OFFSET + i * Fp2Opcode::COUNT;
+
+            if bytes <= 32 {
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+
+                let addsub = get_fp2_addsub_air::<2, 32>(
+                    exec_bridge,
+                    memory_bridge,
+                    config.clone(),
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(addsub);
+
+                let muldiv = get_fp2_muldiv_air::<2, 32>(
+                    exec_bridge,
+                    memory_bridge,
+                    config,
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(muldiv);
+            } else if bytes <= 48 {
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+
+                let addsub = get_fp2_addsub_air::<6, 16>(
+                    exec_bridge,
+                    memory_bridge,
+                    config.clone(),
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(addsub);
+
+                let muldiv = get_fp2_muldiv_air::<6, 16>(
+                    exec_bridge,
+                    memory_bridge,
+                    config,
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(muldiv);
+            } else {
+                panic!("Modulus too large");
+            }
+        }
+
+        Ok(())
+    }
+}
+
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, Fp2Extension> for AlgebraCpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        extension: &Fp2Extension,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let pointer_max_bits = inventory.airs().pointer_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+        for (_, modulus) in extension.supported_moduli.iter() {
+            // determine the number of bytes needed to represent a prime field element
+            let bytes = modulus.bits().div_ceil(8);
+
+            if bytes <= 32 {
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+
+                inventory.next_air::<Fp2Air<2, 32>>()?;
+                let addsub = get_fp2_addsub_chip::<Val<SC>, 2, 32>(
+                    config.clone(),
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(addsub);
+
+                inventory.next_air::<Fp2Air<2, 32>>()?;
+                let muldiv = get_fp2_muldiv_chip::<Val<SC>, 2, 32>(
+                    config,
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(muldiv);
+            } else if bytes <= 48 {
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+
+                inventory.next_air::<Fp2Air<6, 16>>()?;
+                let addsub = get_fp2_addsub_chip::<Val<SC>, 6, 16>(
+                    config.clone(),
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(addsub);
+
+                inventory.next_air::<Fp2Air<6, 16>>()?;
+                let muldiv = get_fp2_muldiv_chip::<Val<SC>, 6, 16>(
+                    config,
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(muldiv);
+            } else {
+                panic!("Modulus too large");
+            }
+        }
 
-        Ok(inventory)
+        Ok(())
     }
 }
diff --git a/extensions/algebra/circuit/src/lib.rs b/extensions/algebra/circuit/src/lib.rs
index ffddacc61a..b4e494c812 100644
--- a/extensions/algebra/circuit/src/lib.rs
+++ b/extensions/algebra/circuit/src/lib.rs
@@ -1,6 +1,12 @@
+use derive_more::derive::{Deref, DerefMut};
+use openvm_circuit_derive::PreflightExecutor;
+use openvm_mod_circuit_builder::FieldExpressionExecutor;
+use openvm_rv32_adapters::Rv32VecHeapAdapterExecutor;
+
 pub mod fp2_chip;
 pub mod modular_chip;
 
+mod execution;
 mod fp2;
 pub use fp2::*;
 mod modular_extension;
@@ -9,3 +15,13 @@ mod fp2_extension;
 pub use fp2_extension::*;
 mod config;
 pub use config::*;
+pub mod fields;
+
+pub struct AlgebraCpuProverExt;
+
+#[derive(Clone, PreflightExecutor, Deref, DerefMut)]
+pub struct FieldExprVecHeapExecutor<
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const IS_FP2: bool,
+>(FieldExpressionExecutor<Rv32VecHeapAdapterExecutor<2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>>);
diff --git a/extensions/algebra/circuit/src/modular_chip/addsub.rs b/extensions/algebra/circuit/src/modular_chip/addsub.rs
index 34bede150f..abde6ea6dd 100644
--- a/extensions/algebra/circuit/src/modular_chip/addsub.rs
+++ b/extensions/algebra/circuit/src/modular_chip/addsub.rs
@@ -1,21 +1,25 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
+use std::{cell::RefCell, rc::Rc};
 
 use openvm_algebra_transpiler::Rv32ModularArithmeticOpcode;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
+use openvm_circuit::{
+    arch::ExecutionBridge,
+    system::memory::{offline_checker::MemoryBridge, SharedMemoryHelper},
+};
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
+use openvm_instructions::riscv::RV32_CELL_BITS;
 use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip, FieldVariable,
+    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreAir, FieldExpressionExecutor,
+    FieldExpressionFiller, FieldVariable,
+};
+use openvm_rv32_adapters::{
+    Rv32VecHeapAdapterAir, Rv32VecHeapAdapterExecutor, Rv32VecHeapAdapterFiller,
 };
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::{ModularAir, ModularChip, ModularExecutor};
+use crate::FieldExprVecHeapExecutor;
 
 pub fn addsub_expr(
     config: ExprBuilderConfig,
@@ -29,12 +33,12 @@ pub fn addsub_expr(
     let x2 = ExprBuilder::new_input(builder.clone());
     let x3 = x1.clone() + x2.clone();
     let x4 = x1.clone() - x2.clone();
-    let is_add_flag = builder.borrow_mut().new_flag();
-    let is_sub_flag = builder.borrow_mut().new_flag();
+    let is_add_flag = (*builder).borrow_mut().new_flag();
+    let is_sub_flag = (*builder).borrow_mut().new_flag();
     let x5 = FieldVariable::select(is_sub_flag, &x4, &x1);
     let mut x6 = FieldVariable::select(is_add_flag, &x3, &x5);
     x6.save_output();
-    let builder = builder.borrow().clone();
+    let builder = (*builder).borrow().clone();
 
     (
         FieldExpr::new(builder, range_bus, true),
@@ -43,39 +47,78 @@ pub fn addsub_expr(
     )
 }
 
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct ModularAddSubChip<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
+fn gen_base_expr(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+) -> (FieldExpr, Vec<usize>, Vec<usize>) {
+    let (expr, is_add_flag, is_sub_flag) = addsub_expr(config, range_checker_bus);
+
+    let local_opcode_idx = vec![
+        Rv32ModularArithmeticOpcode::ADD as usize,
+        Rv32ModularArithmeticOpcode::SUB as usize,
+        Rv32ModularArithmeticOpcode::SETUP_ADDSUB as usize,
+    ];
+    let opcode_flag_idx = vec![is_add_flag, is_sub_flag];
+
+    (expr, local_opcode_idx, opcode_flag_idx)
+}
+
+pub fn get_modular_addsub_air<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    exec_bridge: ExecutionBridge,
+    mem_bridge: MemoryBridge,
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    bitwise_lookup_bus: BitwiseOperationLookupBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> ModularAir<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker_bus);
+    ModularAir::new(
+        Rv32VecHeapAdapterAir::new(
+            exec_bridge,
+            mem_bridge,
+            bitwise_lookup_bus,
+            pointer_max_bits,
+        ),
+        FieldExpressionCoreAir::new(expr, offset, local_opcode_idx, opcode_flag_idx),
+    )
+}
+
+pub fn get_modular_addsub_step<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> ModularExecutor<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker_bus);
 
-impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>
-    ModularAddSubChip<F, BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        config: ExprBuilderConfig,
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let (expr, is_add_flag, is_sub_flag) = addsub_expr(config, range_checker.bus());
-        let core = FieldExpressionCoreChip::new(
+    FieldExprVecHeapExecutor(FieldExpressionExecutor::new(
+        Rv32VecHeapAdapterExecutor::new(pointer_max_bits),
+        expr,
+        offset,
+        local_opcode_idx,
+        opcode_flag_idx,
+        "ModularAddSub",
+    ))
+}
+
+pub fn get_modular_addsub_chip<F, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    mem_helper: SharedMemoryHelper<F>,
+    range_checker: SharedVariableRangeCheckerChip,
+    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    pointer_max_bits: usize,
+) -> ModularChip<F, BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker.bus());
+    ModularChip::new(
+        FieldExpressionFiller::new(
+            Rv32VecHeapAdapterFiller::new(pointer_max_bits, bitwise_lookup_chip),
             expr,
-            offset,
-            vec![
-                Rv32ModularArithmeticOpcode::ADD as usize,
-                Rv32ModularArithmeticOpcode::SUB as usize,
-                Rv32ModularArithmeticOpcode::SETUP_ADDSUB as usize,
-            ],
-            vec![is_add_flag, is_sub_flag],
+            local_opcode_idx,
+            opcode_flag_idx,
             range_checker,
-            "ModularAddSub",
             false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
+        ),
+        mem_helper,
+    )
 }
diff --git a/extensions/algebra/circuit/src/modular_chip/is_eq.rs b/extensions/algebra/circuit/src/modular_chip/is_eq.rs
index fe91585466..348fb22bd6 100644
--- a/extensions/algebra/circuit/src/modular_chip/is_eq.rs
+++ b/extensions/algebra/circuit/src/modular_chip/is_eq.rs
@@ -5,32 +5,41 @@ use std::{
 
 use num_bigint::BigUint;
 use openvm_algebra_transpiler::Rv32ModularArithmeticOpcode;
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{
+        online::{GuestMemory, TracingMemory},
+        MemoryAuxColsFactory, POINTER_MAX_BITS,
+    },
 };
 use openvm_circuit_primitives::{
     bigint::utils::big_uint_to_limbs,
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     is_equal_array::{IsEqArrayIo, IsEqArraySubAir},
-    SubAir, TraceSubRowGenerator,
+    AlignedBytesBorrow, SubAir, TraceSubRowGenerator,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32_adapters::Rv32IsEqualModAdapterExecutor;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::{AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
+
+use crate::modular_chip::VmModularIsEqualExecutor;
 // Given two numbers b and c, we want to prove that a) b == c or b != c, depending on
 // result of cmp_result and b) b, c < N for some modulus N that is passed into the AIR
 // at runtime (i.e. when chip is instantiated).
 
 #[repr(C)]
-#[derive(AlignedBorrow)]
+#[derive(AlignedBorrow, Debug)]
 pub struct ModularIsEqualCoreCols<T, const READ_LIMBS: usize> {
     pub is_valid: T,
     pub is_setup: T,
@@ -278,155 +287,395 @@ where
 }
 
 #[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct ModularIsEqualCoreRecord<T, const READ_LIMBS: usize> {
-    #[serde(with = "BigArray")]
-    pub b: [T; READ_LIMBS],
-    #[serde(with = "BigArray")]
-    pub c: [T; READ_LIMBS],
-    pub cmp_result: T,
-    #[serde(with = "BigArray")]
-    pub eq_marker: [T; READ_LIMBS],
-    pub b_diff_idx: usize,
-    pub c_diff_idx: usize,
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct ModularIsEqualRecord<const READ_LIMBS: usize> {
     pub is_setup: bool,
+    pub b: [u8; READ_LIMBS],
+    pub c: [u8; READ_LIMBS],
 }
 
-pub struct ModularIsEqualCoreChip<
+#[derive(derive_new::new, Clone)]
+pub struct ModularIsEqualExecutor<
+    A,
     const READ_LIMBS: usize,
     const WRITE_LIMBS: usize,
     const LIMB_BITS: usize,
 > {
-    pub air: ModularIsEqualCoreAir<READ_LIMBS, WRITE_LIMBS, LIMB_BITS>,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
+    adapter: A,
+    pub offset: usize,
+    pub modulus_limbs: [u8; READ_LIMBS],
 }
 
-impl<const READ_LIMBS: usize, const WRITE_LIMBS: usize, const LIMB_BITS: usize>
-    ModularIsEqualCoreChip<READ_LIMBS, WRITE_LIMBS, LIMB_BITS>
-{
-    pub fn new(
-        modulus: BigUint,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
-        offset: usize,
-    ) -> Self {
-        Self {
-            air: ModularIsEqualCoreAir::new(modulus, bitwise_lookup_chip.bus(), offset),
-            bitwise_lookup_chip,
-        }
-    }
+#[derive(derive_new::new, Clone)]
+pub struct ModularIsEqualFiller<
+    A,
+    const READ_LIMBS: usize,
+    const WRITE_LIMBS: usize,
+    const LIMB_BITS: usize,
+> {
+    adapter: A,
+    pub offset: usize,
+    pub modulus_limbs: [u8; READ_LIMBS],
+    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
 }
 
-impl<
-        F: PrimeField32,
-        I: VmAdapterInterface<F>,
-        const READ_LIMBS: usize,
-        const WRITE_LIMBS: usize,
-        const LIMB_BITS: usize,
-    > VmCoreChip<F, I> for ModularIsEqualCoreChip<READ_LIMBS, WRITE_LIMBS, LIMB_BITS>
+impl<F, A, RA, const READ_LIMBS: usize, const WRITE_LIMBS: usize, const LIMB_BITS: usize>
+    PreflightExecutor<F, RA> for ModularIsEqualExecutor<A, READ_LIMBS, WRITE_LIMBS, LIMB_BITS>
 where
-    I::Reads: Into<[[F; READ_LIMBS]; 2]>,
-    I::Writes: From<[[F; WRITE_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData: Into<[[u8; READ_LIMBS]; 2]>,
+            WriteData: From<[u8; WRITE_LIMBS]>,
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut ModularIsEqualRecord<READ_LIMBS>,
+        ),
+    >,
 {
-    type Record = ModularIsEqualCoreRecord<F, READ_LIMBS>;
-    type Air = ModularIsEqualCoreAir<READ_LIMBS, WRITE_LIMBS, LIMB_BITS>;
-
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let data: [[F; READ_LIMBS]; 2] = reads.into();
-        let b = data[0].map(|x| x.as_canonical_u32());
-        let c = data[1].map(|y| y.as_canonical_u32());
-        let (b_cmp, b_diff_idx) = run_unsigned_less_than::<READ_LIMBS>(&b, &self.air.modulus_limbs);
-        let (c_cmp, c_diff_idx) = run_unsigned_less_than::<READ_LIMBS>(&c, &self.air.modulus_limbs);
-        let is_setup = instruction.opcode.local_opcode_idx(self.air.offset)
+    ) -> Result<(), ExecutionError> {
+        let Instruction { opcode, .. } = instruction;
+
+        let local_opcode =
+            Rv32ModularArithmeticOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        matches!(
+            local_opcode,
+            Rv32ModularArithmeticOpcode::IS_EQ | Rv32ModularArithmeticOpcode::SETUP_ISEQ
+        );
+
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+        [core_record.b, core_record.c] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
+
+        core_record.is_setup = instruction.opcode.local_opcode_idx(self.offset)
             == Rv32ModularArithmeticOpcode::SETUP_ISEQ as usize;
 
-        if !is_setup {
-            assert!(b_cmp, "{:?} >= {:?}", b, self.air.modulus_limbs);
-        }
-        assert!(c_cmp, "{:?} >= {:?}", c, self.air.modulus_limbs);
-        if !is_setup {
-            self.bitwise_lookup_chip.request_range(
-                self.air.modulus_limbs[b_diff_idx] - b[b_diff_idx] - 1,
-                self.air.modulus_limbs[c_diff_idx] - c[c_diff_idx] - 1,
-            );
-        }
+        let mut write_data = [0u8; WRITE_LIMBS];
+        write_data[0] = (core_record.b == core_record.c) as u8;
 
-        let mut eq_marker = [F::ZERO; READ_LIMBS];
-        let mut cmp_result = F::ZERO;
-        self.air
-            .subair
-            .generate_subrow((&data[0], &data[1]), (&mut eq_marker, &mut cmp_result));
-
-        let mut writes = [F::ZERO; WRITE_LIMBS];
-        writes[0] = cmp_result;
-
-        let output = AdapterRuntimeContext::without_pc([writes]);
-        let record = ModularIsEqualCoreRecord {
-            is_setup,
-            b: data[0],
-            c: data[1],
-            cmp_result,
-            eq_marker,
-            b_diff_idx,
-            c_diff_idx,
-        };
+        self.adapter.write(
+            state.memory,
+            instruction,
+            write_data.into(),
+            &mut adapter_record,
+        );
 
-        Ok((output, record))
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
     }
 
     fn get_opcode_name(&self, opcode: usize) -> String {
         format!(
             "{:?}",
-            Rv32ModularArithmeticOpcode::from_usize(opcode - self.air.offset)
+            Rv32ModularArithmeticOpcode::from_usize(opcode - self.offset)
         )
     }
+}
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let row_slice: &mut ModularIsEqualCoreCols<_, READ_LIMBS> = row_slice.borrow_mut();
-        row_slice.is_valid = F::ONE;
-        row_slice.is_setup = F::from_bool(record.is_setup);
-        row_slice.b = record.b;
-        row_slice.c = record.c;
-        row_slice.cmp_result = record.cmp_result;
-
-        row_slice.eq_marker = record.eq_marker;
+impl<F, A, const READ_LIMBS: usize, const WRITE_LIMBS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for ModularIsEqualFiller<A, READ_LIMBS, WRITE_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = row_slice.split_at_mut(A::WIDTH);
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &ModularIsEqualRecord<READ_LIMBS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+        let cols: &mut ModularIsEqualCoreCols<F, READ_LIMBS> = core_row.borrow_mut();
+        let (b_cmp, b_diff_idx) =
+            run_unsigned_less_than::<READ_LIMBS>(&record.b, &self.modulus_limbs);
+        let (c_cmp, c_diff_idx) =
+            run_unsigned_less_than::<READ_LIMBS>(&record.c, &self.modulus_limbs);
 
         if !record.is_setup {
-            row_slice.b_lt_diff = F::from_canonical_u32(self.air.modulus_limbs[record.b_diff_idx])
-                - record.b[record.b_diff_idx];
+            assert!(b_cmp, "{:?} >= {:?}", record.b, self.modulus_limbs);
         }
-        row_slice.c_lt_diff = F::from_canonical_u32(self.air.modulus_limbs[record.c_diff_idx])
-            - record.c[record.c_diff_idx];
-        row_slice.c_lt_mark = if record.b_diff_idx == record.c_diff_idx {
+        assert!(c_cmp, "{:?} >= {:?}", record.c, self.modulus_limbs);
+
+        // Writing in reverse order
+        cols.c_lt_mark = if b_diff_idx == c_diff_idx {
             F::ONE
         } else {
-            F::from_canonical_u8(2)
+            F::TWO
         };
-        row_slice.lt_marker = from_fn(|i| {
-            if i == record.b_diff_idx {
+
+        cols.c_lt_diff =
+            F::from_canonical_u8(self.modulus_limbs[c_diff_idx] - record.c[c_diff_idx]);
+        if !record.is_setup {
+            cols.b_lt_diff =
+                F::from_canonical_u8(self.modulus_limbs[b_diff_idx] - record.b[b_diff_idx]);
+            self.bitwise_lookup_chip.request_range(
+                (self.modulus_limbs[b_diff_idx] - record.b[b_diff_idx] - 1) as u32,
+                (self.modulus_limbs[c_diff_idx] - record.c[c_diff_idx] - 1) as u32,
+            );
+        } else {
+            cols.b_lt_diff = F::ZERO;
+        }
+
+        cols.lt_marker = from_fn(|i| {
+            if i == b_diff_idx {
                 F::ONE
-            } else if i == record.c_diff_idx {
-                row_slice.c_lt_mark
+            } else if i == c_diff_idx {
+                cols.c_lt_mark
             } else {
                 F::ZERO
             }
         });
+
+        cols.c = record.c.map(F::from_canonical_u8);
+        cols.b = record.b.map(F::from_canonical_u8);
+        let sub_air = IsEqArraySubAir::<READ_LIMBS>;
+        sub_air.generate_subrow(
+            (&cols.b, &cols.c),
+            (&mut cols.eq_marker, &mut cols.cmp_result),
+        );
+
+        cols.is_setup = F::from_bool(record.is_setup);
+        cols.is_valid = F::ONE;
+    }
+}
+
+impl<const NUM_LANES: usize, const LANE_SIZE: usize, const TOTAL_LIMBS: usize>
+    VmModularIsEqualExecutor<NUM_LANES, LANE_SIZE, TOTAL_LIMBS>
+{
+    pub fn new(
+        adapter: Rv32IsEqualModAdapterExecutor<2, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+        offset: usize,
+        modulus_limbs: [u8; TOTAL_LIMBS],
+    ) -> Self {
+        Self(ModularIsEqualExecutor::new(adapter, offset, modulus_limbs))
+    }
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct ModularIsEqualPreCompute<const READ_LIMBS: usize> {
+    a: u8,
+    rs_addrs: [u8; 2],
+    modulus_limbs: [u8; READ_LIMBS],
+}
+
+impl<const NUM_LANES: usize, const LANE_SIZE: usize, const TOTAL_READ_SIZE: usize>
+    VmModularIsEqualExecutor<NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE>
+{
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut ModularIsEqualPreCompute<TOTAL_READ_SIZE>,
+    ) -> Result<bool, StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+
+        let local_opcode =
+            Rv32ModularArithmeticOpcode::from_usize(opcode.local_opcode_idx(self.0.offset));
+
+        // Validate instruction format
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        let c = c.as_canonical_u32();
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+        if d != RV32_REGISTER_AS || e != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        if !matches!(
+            local_opcode,
+            Rv32ModularArithmeticOpcode::IS_EQ | Rv32ModularArithmeticOpcode::SETUP_ISEQ
+        ) {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let rs_addrs = from_fn(|i| if i == 0 { b } else { c } as u8);
+        *data = ModularIsEqualPreCompute {
+            a: a as u8,
+            rs_addrs,
+            modulus_limbs: self.0.modulus_limbs,
+        };
+
+        let is_setup = local_opcode == Rv32ModularArithmeticOpcode::SETUP_ISEQ;
+
+        Ok(is_setup)
     }
+}
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+impl<F, const NUM_LANES: usize, const LANE_SIZE: usize, const TOTAL_READ_SIZE: usize> Executor<F>
+    for VmModularIsEqualExecutor<NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        std::mem::size_of::<ModularIsEqualPreCompute<TOTAL_READ_SIZE>>()
     }
+
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut ModularIsEqualPreCompute<TOTAL_READ_SIZE> = data.borrow_mut();
+
+        let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?;
+        let fn_ptr = if is_setup {
+            execute_e1_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, true>
+        } else {
+            execute_e1_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, false>
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, const NUM_LANES: usize, const LANE_SIZE: usize, const TOTAL_READ_SIZE: usize>
+    MeteredExecutor<F> for VmModularIsEqualExecutor<NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        std::mem::size_of::<E2PreCompute<ModularIsEqualPreCompute<TOTAL_READ_SIZE>>>()
+    }
+
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut E2PreCompute<ModularIsEqualPreCompute<TOTAL_READ_SIZE>> =
+            data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        let is_setup = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+        let fn_ptr = if is_setup {
+            execute_e2_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, true>
+        } else {
+            execute_e2_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, false>
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const NUM_LANES: usize,
+    const LANE_SIZE: usize,
+    const TOTAL_READ_SIZE: usize,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &ModularIsEqualPreCompute<TOTAL_READ_SIZE> = pre_compute.borrow();
+
+    execute_e12_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, IS_SETUP>(
+        pre_compute,
+        vm_state,
+    );
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const NUM_LANES: usize,
+    const LANE_SIZE: usize,
+    const TOTAL_READ_SIZE: usize,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<ModularIsEqualPreCompute<TOTAL_READ_SIZE>> =
+        pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<_, _, NUM_LANES, LANE_SIZE, TOTAL_READ_SIZE, IS_SETUP>(
+        &pre_compute.data,
+        vm_state,
+    );
+}
+
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const NUM_LANES: usize,
+    const LANE_SIZE: usize,
+    const TOTAL_READ_SIZE: usize,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &ModularIsEqualPreCompute<TOTAL_READ_SIZE>,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    // Read register values
+    let rs_vals = pre_compute
+        .rs_addrs
+        .map(|addr| u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, addr as u32)));
+
+    // Read memory values
+    let [b, c]: [[u8; TOTAL_READ_SIZE]; 2] = rs_vals.map(|address| {
+        debug_assert!(address as usize + TOTAL_READ_SIZE - 1 < (1 << POINTER_MAX_BITS));
+        from_fn::<_, NUM_LANES, _>(|i| {
+            vm_state.vm_read::<_, LANE_SIZE>(RV32_MEMORY_AS, address + (i * LANE_SIZE) as u32)
+        })
+        .concat()
+        .try_into()
+        .unwrap()
+    });
+
+    if !IS_SETUP {
+        let (b_cmp, _) = run_unsigned_less_than::<TOTAL_READ_SIZE>(&b, &pre_compute.modulus_limbs);
+        debug_assert!(b_cmp, "{:?} >= {:?}", b, pre_compute.modulus_limbs);
+    }
+
+    let (c_cmp, _) = run_unsigned_less_than::<TOTAL_READ_SIZE>(&c, &pre_compute.modulus_limbs);
+    debug_assert!(c_cmp, "{:?} >= {:?}", c, pre_compute.modulus_limbs);
+
+    // Compute result
+    let mut write_data = [0u8; RV32_REGISTER_NUM_LIMBS];
+    write_data[0] = (b == c) as u8;
+
+    // Write result to register
+    vm_state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &write_data);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
 }
 
 // Returns (cmp_result, diff_idx)
+#[inline(always)]
 pub(super) fn run_unsigned_less_than<const NUM_LIMBS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
 ) -> (bool, usize) {
     for i in (0..NUM_LIMBS).rev() {
         if x[i] != y[i] {
diff --git a/extensions/algebra/circuit/src/modular_chip/mod.rs b/extensions/algebra/circuit/src/modular_chip/mod.rs
index 2dd9838206..24c0aa8b1a 100644
--- a/extensions/algebra/circuit/src/modular_chip/mod.rs
+++ b/extensions/algebra/circuit/src/modular_chip/mod.rs
@@ -1,17 +1,61 @@
-mod addsub;
-pub use addsub::*;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
+use openvm_circuit_derive::PreflightExecutor;
+use openvm_instructions::riscv::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use openvm_mod_circuit_builder::{FieldExpressionCoreAir, FieldExpressionFiller};
+use openvm_rv32_adapters::{
+    Rv32IsEqualModAdapterAir, Rv32IsEqualModAdapterExecutor, Rv32IsEqualModAdapterFiller,
+    Rv32VecHeapAdapterAir, Rv32VecHeapAdapterFiller,
+};
+
+use crate::FieldExprVecHeapExecutor;
+
 mod is_eq;
 pub use is_eq::*;
+mod addsub;
+pub use addsub::*;
 mod muldiv;
 pub use muldiv::*;
-use openvm_circuit::arch::VmChipWrapper;
-use openvm_instructions::riscv::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
-use openvm_rv32_adapters::Rv32IsEqualModAdapterChip;
 
 #[cfg(test)]
 mod tests;
 
+pub type ModularAir<const BLOCKS: usize, const BLOCK_SIZE: usize> = VmAirWrapper<
+    Rv32VecHeapAdapterAir<2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
+    FieldExpressionCoreAir,
+>;
+
+pub type ModularExecutor<const BLOCKS: usize, const BLOCK_SIZE: usize> =
+    FieldExprVecHeapExecutor<BLOCKS, BLOCK_SIZE, false>;
+
+pub type ModularChip<F, const BLOCKS: usize, const BLOCK_SIZE: usize> = VmChipWrapper<
+    F,
+    FieldExpressionFiller<Rv32VecHeapAdapterFiller<2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>>,
+>;
+
 // Must have TOTAL_LIMBS = NUM_LANES * LANE_SIZE
+pub type ModularIsEqualAir<
+    const NUM_LANES: usize,
+    const LANE_SIZE: usize,
+    const TOTAL_LIMBS: usize,
+> = VmAirWrapper<
+    Rv32IsEqualModAdapterAir<2, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+    ModularIsEqualCoreAir<TOTAL_LIMBS, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+>;
+
+#[derive(Clone, PreflightExecutor)]
+pub struct VmModularIsEqualExecutor<
+    const NUM_LANES: usize,
+    const LANE_SIZE: usize,
+    const TOTAL_LIMBS: usize,
+>(
+    ModularIsEqualExecutor<
+        Rv32IsEqualModAdapterExecutor<2, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+        TOTAL_LIMBS,
+        RV32_REGISTER_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
+);
+
 pub type ModularIsEqualChip<
     F,
     const NUM_LANES: usize,
@@ -19,6 +63,10 @@ pub type ModularIsEqualChip<
     const TOTAL_LIMBS: usize,
 > = VmChipWrapper<
     F,
-    Rv32IsEqualModAdapterChip<F, 2, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
-    ModularIsEqualCoreChip<TOTAL_LIMBS, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+    ModularIsEqualFiller<
+        Rv32IsEqualModAdapterFiller<2, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+        TOTAL_LIMBS,
+        RV32_REGISTER_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
 >;
diff --git a/extensions/algebra/circuit/src/modular_chip/muldiv.rs b/extensions/algebra/circuit/src/modular_chip/muldiv.rs
index 30f063e2b1..fef9d1e9a7 100644
--- a/extensions/algebra/circuit/src/modular_chip/muldiv.rs
+++ b/extensions/algebra/circuit/src/modular_chip/muldiv.rs
@@ -1,21 +1,25 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
+use std::{cell::RefCell, rc::Rc};
 
 use openvm_algebra_transpiler::Rv32ModularArithmeticOpcode;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
+use openvm_circuit::{
+    arch::ExecutionBridge,
+    system::memory::{offline_checker::MemoryBridge, SharedMemoryHelper},
+};
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
+use openvm_instructions::riscv::RV32_CELL_BITS;
 use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip, FieldVariable, SymbolicExpr,
+    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreAir, FieldExpressionExecutor,
+    FieldExpressionFiller, FieldVariable, SymbolicExpr,
+};
+use openvm_rv32_adapters::{
+    Rv32VecHeapAdapterAir, Rv32VecHeapAdapterExecutor, Rv32VecHeapAdapterFiller,
 };
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::{ModularAir, ModularChip, ModularExecutor};
+use crate::FieldExprVecHeapExecutor;
 
 pub fn muldiv_expr(
     config: ExprBuilderConfig,
@@ -26,17 +30,19 @@ pub fn muldiv_expr(
     let builder = Rc::new(RefCell::new(builder));
     let x = ExprBuilder::new_input(builder.clone());
     let y = ExprBuilder::new_input(builder.clone());
-    let (z_idx, z) = builder.borrow_mut().new_var();
+    let (z_idx, z) = (*builder).borrow_mut().new_var();
     let mut z = FieldVariable::from_var(builder.clone(), z);
-    let is_mul_flag = builder.borrow_mut().new_flag();
-    let is_div_flag = builder.borrow_mut().new_flag();
+    let is_mul_flag = (*builder).borrow_mut().new_flag();
+    let is_div_flag = (*builder).borrow_mut().new_flag();
     // constraint is x * y = z, or z * y = x
     let lvar = FieldVariable::select(is_mul_flag, &x, &z);
     let rvar = FieldVariable::select(is_mul_flag, &z, &x);
     // When it's SETUP op, x = p == 0, y = 0, both flags are false, and it still works: z * 0 - x =
     // 0, whatever z is.
     let constraint = lvar * y.clone() - rvar;
-    builder.borrow_mut().set_constraint(z_idx, constraint.expr);
+    (*builder)
+        .borrow_mut()
+        .set_constraint(z_idx, constraint.expr);
     let compute = SymbolicExpr::Select(
         is_mul_flag,
         Box::new(x.expr.clone() * y.expr.clone()),
@@ -46,10 +52,10 @@ pub fn muldiv_expr(
             Box::new(x.expr.clone()),
         )),
     );
-    builder.borrow_mut().set_compute(z_idx, compute);
+    (*builder).borrow_mut().set_compute(z_idx, compute);
     z.save_output();
 
-    let builder = builder.borrow().clone();
+    let builder = (*builder).borrow().clone();
 
     (
         FieldExpr::new(builder, range_bus, true),
@@ -58,39 +64,78 @@ pub fn muldiv_expr(
     )
 }
 
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct ModularMulDivChip<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
+fn gen_base_expr(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+) -> (FieldExpr, Vec<usize>, Vec<usize>) {
+    let (expr, is_mul_flag, is_div_flag) = muldiv_expr(config, range_checker_bus);
+
+    let local_opcode_idx = vec![
+        Rv32ModularArithmeticOpcode::MUL as usize,
+        Rv32ModularArithmeticOpcode::DIV as usize,
+        Rv32ModularArithmeticOpcode::SETUP_MULDIV as usize,
+    ];
+    let opcode_flag_idx = vec![is_mul_flag, is_div_flag];
+
+    (expr, local_opcode_idx, opcode_flag_idx)
+}
+
+pub fn get_modular_muldiv_air<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    exec_bridge: ExecutionBridge,
+    mem_bridge: MemoryBridge,
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    bitwise_lookup_bus: BitwiseOperationLookupBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> ModularAir<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker_bus);
+    ModularAir::new(
+        Rv32VecHeapAdapterAir::new(
+            exec_bridge,
+            mem_bridge,
+            bitwise_lookup_bus,
+            pointer_max_bits,
+        ),
+        FieldExpressionCoreAir::new(expr, offset, local_opcode_idx, opcode_flag_idx),
+    )
+}
+
+pub fn get_modular_muldiv_step<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> ModularExecutor<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker_bus);
 
-impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>
-    ModularMulDivChip<F, BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        config: ExprBuilderConfig,
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let (expr, is_mul_flag, is_div_flag) = muldiv_expr(config, range_checker.bus());
-        let core = FieldExpressionCoreChip::new(
+    FieldExprVecHeapExecutor(FieldExpressionExecutor::new(
+        Rv32VecHeapAdapterExecutor::new(pointer_max_bits),
+        expr,
+        offset,
+        local_opcode_idx,
+        opcode_flag_idx,
+        "ModularMulDiv",
+    ))
+}
+
+pub fn get_modular_muldiv_chip<F, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    mem_helper: SharedMemoryHelper<F>,
+    range_checker: SharedVariableRangeCheckerChip,
+    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    pointer_max_bits: usize,
+) -> ModularChip<F, BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx, opcode_flag_idx) = gen_base_expr(config, range_checker.bus());
+    ModularChip::new(
+        FieldExpressionFiller::new(
+            Rv32VecHeapAdapterFiller::new(pointer_max_bits, bitwise_lookup_chip),
             expr,
-            offset,
-            vec![
-                Rv32ModularArithmeticOpcode::MUL as usize,
-                Rv32ModularArithmeticOpcode::DIV as usize,
-                Rv32ModularArithmeticOpcode::SETUP_MULDIV as usize,
-            ],
-            vec![is_mul_flag, is_div_flag],
+            local_opcode_idx,
+            opcode_flag_idx,
             range_checker,
-            "ModularMulDiv",
             false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
+        ),
+        mem_helper,
+    )
 }
diff --git a/extensions/algebra/circuit/src/modular_chip/tests.rs b/extensions/algebra/circuit/src/modular_chip/tests.rs
index 1ad3310f76..3770483647 100644
--- a/extensions/algebra/circuit/src/modular_chip/tests.rs
+++ b/extensions/algebra/circuit/src/modular_chip/tests.rs
@@ -1,122 +1,136 @@
-use std::{array::from_fn, borrow::BorrowMut};
+use std::{borrow::BorrowMut, str::FromStr, sync::Arc};
 
 use num_bigint::BigUint;
 use num_traits::Zero;
 use openvm_algebra_transpiler::Rv32ModularArithmeticOpcode;
 use openvm_circuit::arch::{
     instructions::LocalOpcode,
-    testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-    AdapterRuntimeContext, Result, VmAdapterInterface, VmChipWrapper, VmCoreChip,
+    testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
+    Arena, DenseRecordArena, MatrixRecordArena, PreflightExecutor,
 };
 use openvm_circuit_primitives::{
-    bigint::utils::{big_uint_to_limbs, secp256k1_coord_prime, secp256k1_scalar_prime},
-    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    bigint::utils::{secp256k1_coord_prime, secp256k1_scalar_prime},
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
 };
-use openvm_instructions::{instruction::Instruction, riscv::RV32_CELL_BITS, VmOpcode};
-use openvm_mod_circuit_builder::{
-    test_utils::{biguint_to_limbs, generate_field_element},
-    ExprBuilderConfig,
+use openvm_instructions::{
+    instruction::Instruction,
+    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS},
+    VmOpcode,
 };
-use openvm_pairing_guest::bls12_381::BLS12_381_MODULUS;
-use openvm_rv32_adapters::{
-    rv32_write_heap_default, write_ptr_reg, Rv32IsEqualModAdapterChip, Rv32VecHeapAdapterChip,
+use openvm_mod_circuit_builder::{
+    test_utils::{generate_field_element, generate_random_biguint},
+    utils::biguint_to_limbs_vec,
+    ExprBuilderConfig, FieldExpressionCoreRecordMut,
 };
+use openvm_pairing_guest::{bls12_381::BLS12_381_MODULUS, bn254::BN254_MODULUS};
+use openvm_rv32_adapters::{rv32_write_heap_default, write_ptr_reg, Rv32VecHeapAdapterRecord};
 use openvm_rv32im_circuit::adapters::RV32_REGISTER_NUM_LIMBS;
-use openvm_stark_backend::p3_field::{FieldAlgebra, PrimeField32};
+use openvm_stark_backend::p3_field::FieldAlgebra;
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::Rng;
+use rand::{rngs::StdRng, Rng};
 
-use super::{
-    ModularAddSubChip, ModularIsEqualChip, ModularIsEqualCoreAir, ModularIsEqualCoreChip,
-    ModularIsEqualCoreCols, ModularIsEqualCoreRecord, ModularMulDivChip,
+use crate::modular_chip::{
+    get_modular_addsub_air, get_modular_addsub_chip, get_modular_addsub_step,
+    get_modular_muldiv_air, get_modular_muldiv_chip, get_modular_muldiv_step, ModularAir,
+    ModularChip, ModularExecutor, ModularIsEqualAir, ModularIsEqualChip, ModularIsEqualCoreAir,
+    ModularIsEqualCoreCols, ModularIsEqualFiller, VmModularIsEqualExecutor,
 };
 
-const NUM_LIMBS: usize = 32;
 const LIMB_BITS: usize = 8;
-const BLOCK_SIZE: usize = 32;
+const MAX_INS_CAPACITY: usize = 128;
 type F = BabyBear;
 
-const ADD_LOCAL: usize = Rv32ModularArithmeticOpcode::ADD as usize;
-const MUL_LOCAL: usize = Rv32ModularArithmeticOpcode::MUL as usize;
+#[cfg(test)]
+mod addsub_tests {
 
-#[test]
-fn test_coord_addsub() {
-    let opcode_offset = 0;
-    let modulus = secp256k1_coord_prime();
-    test_addsub(opcode_offset, modulus);
-}
+    use test_case::test_case;
 
-#[test]
-fn test_scalar_addsub() {
-    let opcode_offset = 4;
-    let modulus = secp256k1_scalar_prime();
-    test_addsub(opcode_offset, modulus);
-}
+    use super::*;
 
-fn test_addsub(opcode_offset: usize, modulus: BigUint) {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: modulus.clone(),
-        num_limbs: NUM_LIMBS,
-        limb_bits: LIMB_BITS,
-    };
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
-    // doing 1xNUM_LIMBS reads and writes
-    let adapter = Rv32VecHeapAdapterChip::<F, 2, 1, 1, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-    let mut chip = ModularAddSubChip::new(
-        adapter,
-        config,
-        Rv32ModularArithmeticOpcode::CLASS_OFFSET + opcode_offset,
-        tester.range_checker(),
-        tester.offline_memory_mutex_arc(),
-    );
-    let mut rng = create_seeded_rng();
-    let num_tests = 50;
-    let mut all_ops = vec![ADD_LOCAL + 2]; // setup
-    let mut all_a = vec![modulus.clone()];
-    let mut all_b = vec![BigUint::zero()];
-
-    // First loop: generate all random test data.
-    for _ in 0..num_tests {
-        let a_digits: Vec<_> = (0..NUM_LIMBS)
-            .map(|_| rng.gen_range(0..(1 << LIMB_BITS)))
-            .collect();
-        let mut a = BigUint::new(a_digits.clone());
-        let b_digits: Vec<_> = (0..NUM_LIMBS)
-            .map(|_| rng.gen_range(0..(1 << LIMB_BITS)))
-            .collect();
-        let mut b = BigUint::new(b_digits.clone());
+    type Harness<RA, const BLOCKS: usize, const BLOCK_SIZE: usize> = TestChipHarness<
+        F,
+        ModularExecutor<BLOCKS, BLOCK_SIZE>,
+        ModularAir<BLOCKS, BLOCK_SIZE>,
+        ModularChip<F, BLOCKS, BLOCK_SIZE>,
+        RA,
+    >;
+    const ADD_LOCAL: usize = Rv32ModularArithmeticOpcode::ADD as usize;
 
-        let op = rng.gen_range(0..2) + ADD_LOCAL; // 0 for add, 1 for sub
-        a %= &modulus;
-        b %= &modulus;
+    fn create_test_chip<RA: Arena, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+        tester: &VmChipTestBuilder<F>,
+        config: ExprBuilderConfig,
+        offset: usize,
+    ) -> (
+        Harness<RA, BLOCKS, BLOCK_SIZE>,
+        (
+            BitwiseOperationLookupAir<RV32_CELL_BITS>,
+            SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+        ),
+    ) {
+        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+        let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+            bitwise_bus,
+        ));
+
+        let air = get_modular_addsub_air(
+            tester.execution_bridge(),
+            tester.memory_bridge(),
+            config.clone(),
+            tester.range_checker().bus(),
+            bitwise_bus,
+            tester.address_bits(),
+            offset,
+        );
+        let executor = get_modular_addsub_step(
+            config.clone(),
+            tester.range_checker().bus(),
+            tester.address_bits(),
+            offset,
+        );
+        let chip = get_modular_addsub_chip(
+            config,
+            tester.memory_helper(),
+            tester.range_checker(),
+            bitwise_chip.clone(),
+            tester.address_bits(),
+        );
+        let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-        all_ops.push(op);
-        all_a.push(a);
-        all_b.push(b);
+        (harness, (bitwise_chip.air, bitwise_chip))
     }
-    // Second loop: actually run the tests.
-    for i in 0..=num_tests {
-        let op = all_ops[i];
-        let a = all_a[i].clone();
-        let b = all_b[i].clone();
-        if i > 0 {
-            // if not setup
-            assert!(a < modulus);
-            assert!(b < modulus);
-        }
+
+    fn set_and_execute_addsub<
+        RA: Arena,
+        const BLOCKS: usize,
+        const BLOCK_SIZE: usize,
+        const NUM_LIMBS: usize,
+    >(
+        tester: &mut VmChipTestBuilder<F>,
+        harness: &mut Harness<RA, BLOCKS, BLOCK_SIZE>,
+        rng: &mut StdRng,
+        modulus: &BigUint,
+        is_setup: bool,
+        offset: usize,
+    ) where
+        ModularExecutor<BLOCKS, BLOCK_SIZE>: PreflightExecutor<F, RA>,
+    {
+        let (a, b, op) = if is_setup {
+            (modulus.clone(), BigUint::zero(), ADD_LOCAL + 2)
+        } else {
+            let a = generate_random_biguint(modulus);
+            let b = generate_random_biguint(modulus);
+
+            let op = rng.gen_range(0..2) + ADD_LOCAL; // 0 for add, 1 for sub
+            (a, b, op)
+        };
+
         let expected_answer = match op - ADD_LOCAL {
-            0 => (&a + &b) % &modulus,
-            1 => (&a + &modulus - &b) % &modulus,
-            2 => a.clone() % &modulus,
+            0 => (&a + &b) % modulus,
+            1 => (&a + modulus - &b) % modulus,
+            2 => a.clone() % modulus,
             _ => panic!(),
         };
 
@@ -125,127 +139,261 @@ fn test_addsub(opcode_offset: usize, modulus: BigUint) {
         // 1. address_ptr which stores the actual address
         // 2. actual address which stores the biguint limbs
         // The write of result r is done in the chip.
-        let ptr_as = 1;
+        let ptr_as = RV32_REGISTER_AS as usize;
         let addr_ptr1 = 0;
         let addr_ptr2 = 3 * RV32_REGISTER_NUM_LIMBS;
         let addr_ptr3 = 6 * RV32_REGISTER_NUM_LIMBS;
 
-        let data_as = 2;
-        let address1 = 0u32;
-        let address2 = 128u32;
-        let address3 = (1 << 28) + 1234; // a large memory address to test heap adapter
+        let data_as = RV32_MEMORY_AS as usize;
+        let address1 = gen_pointer(rng, BLOCK_SIZE) as u32;
+        let address2 = gen_pointer(rng, BLOCK_SIZE) as u32;
+        let address3 = gen_pointer(rng, BLOCK_SIZE) as u32;
+
+        write_ptr_reg(tester, ptr_as, addr_ptr1, address1);
+        write_ptr_reg(tester, ptr_as, addr_ptr2, address2);
+        write_ptr_reg(tester, ptr_as, addr_ptr3, address3);
 
-        write_ptr_reg(&mut tester, ptr_as, addr_ptr1, address1);
-        write_ptr_reg(&mut tester, ptr_as, addr_ptr2, address2);
-        write_ptr_reg(&mut tester, ptr_as, addr_ptr3, address3);
+        let a_limbs: Vec<F> = biguint_to_limbs_vec(&a, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+        let b_limbs: Vec<F> = biguint_to_limbs_vec(&b, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
 
-        let a_limbs: [BabyBear; NUM_LIMBS] =
-            biguint_to_limbs(a.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-        tester.write(data_as, address1 as usize, a_limbs);
-        let b_limbs: [BabyBear; NUM_LIMBS] =
-            biguint_to_limbs(b.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-        tester.write(data_as, address2 as usize, b_limbs);
+        for i in (0..NUM_LIMBS).step_by(BLOCK_SIZE) {
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                address1 as usize + i,
+                a_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                address2 as usize + i,
+                b_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+        }
 
         let instruction = Instruction::from_isize(
-            VmOpcode::from_usize(chip.0.core.air.offset + op),
+            VmOpcode::from_usize(offset + op),
             addr_ptr3 as isize,
             addr_ptr1 as isize,
             addr_ptr2 as isize,
             ptr_as as isize,
             data_as as isize,
         );
-        tester.execute(&mut chip, &instruction);
+        tester.execute(harness, &instruction);
+
+        let expected_limbs: Vec<F> = biguint_to_limbs_vec(&expected_answer, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
 
-        let expected_limbs = biguint_to_limbs::<NUM_LIMBS>(expected_answer, LIMB_BITS);
-        for (i, expected) in expected_limbs.into_iter().enumerate() {
-            let address = address3 as usize + i;
-            let read_val = tester.read_cell(data_as, address);
-            assert_eq!(BabyBear::from_canonical_u32(expected), read_val);
+        for i in (0..NUM_LIMBS).step_by(BLOCK_SIZE) {
+            let read_vals = tester.read::<BLOCK_SIZE>(data_as, address3 as usize + i);
+            let expected_limbs: [F; BLOCK_SIZE] =
+                expected_limbs[i..i + BLOCK_SIZE].try_into().unwrap();
+            assert_eq!(read_vals, expected_limbs);
         }
     }
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
 
-    tester.simple_test().expect("Verification failed");
-}
+    fn run_addsub_test<const BLOCKS: usize, const BLOCK_SIZE: usize, const NUM_LIMBS: usize>(
+        opcode_offset: usize,
+        modulus: BigUint,
+        num_ops: usize,
+    ) {
+        let mut rng = create_seeded_rng();
+        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+        let offset = Rv32ModularArithmeticOpcode::CLASS_OFFSET + opcode_offset;
+        let config = ExprBuilderConfig {
+            modulus: modulus.clone(),
+            num_limbs: NUM_LIMBS,
+            limb_bits: LIMB_BITS,
+        };
 
-#[test]
-fn test_coord_muldiv() {
-    let opcode_offset = 0;
-    let modulus = secp256k1_coord_prime();
-    test_muldiv(opcode_offset, modulus);
-}
+        let (mut harness, bitwise) =
+            create_test_chip::<MatrixRecordArena<F>, BLOCKS, BLOCK_SIZE>(&tester, config, offset);
+
+        for i in 0..num_ops {
+            set_and_execute_addsub::<_, BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+                &mut tester,
+                &mut harness,
+                &mut rng,
+                &modulus,
+                i == 0,
+                offset,
+            );
+        }
 
-#[test]
-fn test_scalar_muldiv() {
-    let opcode_offset = 4;
-    let modulus = secp256k1_scalar_prime();
-    test_muldiv(opcode_offset, modulus);
-}
+        let tester = tester
+            .build()
+            .load(harness)
+            .load_periphery(bitwise)
+            .finalize();
+        tester.simple_test().expect("Verification failed");
+    }
 
-fn test_muldiv(opcode_offset: usize, modulus: BigUint) {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: modulus.clone(),
-        num_limbs: NUM_LIMBS,
-        limb_bits: LIMB_BITS,
-    };
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    // doing 1xNUM_LIMBS reads and writes
-    let adapter = Rv32VecHeapAdapterChip::<F, 2, 1, 1, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-    let mut chip = ModularMulDivChip::new(
-        adapter,
-        config,
-        Rv32ModularArithmeticOpcode::CLASS_OFFSET + opcode_offset,
-        tester.range_checker(),
-        tester.offline_memory_mutex_arc(),
-    );
-    let mut rng = create_seeded_rng();
-    let num_tests = 50;
-    let mut all_ops = vec![MUL_LOCAL + 2];
-    let mut all_a = vec![modulus.clone()];
-    let mut all_b = vec![BigUint::zero()];
-
-    // First loop: generate all random test data.
-    for _ in 0..num_tests {
-        let a_digits: Vec<_> = (0..NUM_LIMBS)
-            .map(|_| rng.gen_range(0..(1 << LIMB_BITS)))
-            .collect();
-        let mut a = BigUint::new(a_digits.clone());
-        let b_digits: Vec<_> = (0..NUM_LIMBS)
-            .map(|_| rng.gen_range(0..(1 << LIMB_BITS)))
-            .collect();
-        let mut b = BigUint::new(b_digits.clone());
+    #[test]
+    fn test_modular_addsub_1x32_small() {
+        run_addsub_test::<1, 32, 32>(
+            0,
+            BigUint::from_str("357686312646216567629137").unwrap(),
+            50,
+        );
+    }
+
+    #[test]
+    fn test_modular_addsub_1x32_secp256k1() {
+        run_addsub_test::<1, 32, 32>(0, secp256k1_coord_prime(), 50);
+        run_addsub_test::<1, 32, 32>(4, secp256k1_scalar_prime(), 50);
+    }
 
-        // let op = rng.gen_range(2..4); // 2 for mul, 3 for div
-        let op = MUL_LOCAL;
-        a %= &modulus;
-        b %= &modulus;
+    #[test]
+    fn test_modular_addsub_1x32_bn254() {
+        run_addsub_test::<1, 32, 32>(0, BN254_MODULUS.clone(), 50);
+    }
 
-        all_ops.push(op);
-        all_a.push(a);
-        all_b.push(b);
+    #[test]
+    fn test_modular_addsub_3x16_bls12_381() {
+        run_addsub_test::<3, 16, 48>(0, BLS12_381_MODULUS.clone(), 50);
     }
-    // Second loop: actually run the tests.
-    for i in 0..=num_tests {
-        let op = all_ops[i];
-        let a = all_a[i].clone();
-        let b = all_b[i].clone();
-        if i > 0 {
-            // if not setup
-            assert!(a < modulus);
-            assert!(b < modulus);
+
+    #[test_case(0, secp256k1_coord_prime(), 50)]
+    #[test_case(4, secp256k1_scalar_prime(), 50)]
+    fn dense_record_arena_test(opcode_offset: usize, modulus: BigUint, num_ops: usize) {
+        let mut rng = create_seeded_rng();
+        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+        let config = ExprBuilderConfig {
+            modulus: modulus.clone(),
+            num_limbs: 32,
+            limb_bits: LIMB_BITS,
+        };
+        let offset = Rv32ModularArithmeticOpcode::CLASS_OFFSET + opcode_offset;
+
+        let (mut sparse_harness, bitwise) =
+            create_test_chip::<MatrixRecordArena<F>, 1, 32>(&tester, config.clone(), offset);
+
+        {
+            // doing 1xNUM_LIMBS reads and writes
+            let mut dense_harness =
+                create_test_chip::<DenseRecordArena, 1, 32>(&tester, config, offset).0;
+
+            for i in 0..num_ops {
+                set_and_execute_addsub::<_, 1, 32, 32>(
+                    &mut tester,
+                    &mut dense_harness,
+                    &mut rng,
+                    &modulus,
+                    i == 0,
+                    offset,
+                );
+            }
+
+            type Record<'a> = (
+                &'a mut Rv32VecHeapAdapterRecord<2, 1, 1, 32, 32>,
+                FieldExpressionCoreRecordMut<'a>,
+            );
+            let mut record_interpreter = dense_harness.arena.get_record_seeker::<Record, _>();
+            record_interpreter.transfer_to_matrix_arena(
+                &mut sparse_harness.arena,
+                dense_harness.executor.get_record_layout::<F>(),
+            );
         }
+
+        let tester = tester
+            .build()
+            .load(sparse_harness)
+            .load_periphery(bitwise)
+            .finalize();
+        tester.simple_test().expect("Verification failed");
+    }
+}
+
+#[cfg(test)]
+mod muldiv_tests {
+    use super::*;
+
+    const MUL_LOCAL: usize = Rv32ModularArithmeticOpcode::MUL as usize;
+    type Harness<const BLOCKS: usize, const BLOCK_SIZE: usize> = TestChipHarness<
+        F,
+        ModularExecutor<BLOCKS, BLOCK_SIZE>,
+        ModularAir<BLOCKS, BLOCK_SIZE>,
+        ModularChip<F, BLOCKS, BLOCK_SIZE>,
+    >;
+
+    fn create_test_chip<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+        tester: &VmChipTestBuilder<F>,
+        config: ExprBuilderConfig,
+        offset: usize,
+    ) -> (
+        Harness<BLOCKS, BLOCK_SIZE>,
+        (
+            BitwiseOperationLookupAir<RV32_CELL_BITS>,
+            SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+        ),
+    ) {
+        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+        let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+            bitwise_bus,
+        ));
+
+        let air = get_modular_muldiv_air(
+            tester.execution_bridge(),
+            tester.memory_bridge(),
+            config.clone(),
+            tester.range_checker().bus(),
+            bitwise_bus,
+            tester.address_bits(),
+            offset,
+        );
+
+        let executor = get_modular_muldiv_step(
+            config.clone(),
+            tester.range_checker().bus(),
+            tester.address_bits(),
+            offset,
+        );
+
+        let chip = get_modular_muldiv_chip(
+            config,
+            tester.memory_helper(),
+            tester.range_checker(),
+            bitwise_chip.clone(),
+            tester.address_bits(),
+        );
+        let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+        (harness, (bitwise_chip.air, bitwise_chip))
+    }
+
+    fn set_and_execute_muldiv<
+        const BLOCKS: usize,
+        const BLOCK_SIZE: usize,
+        const NUM_LIMBS: usize,
+    >(
+        tester: &mut VmChipTestBuilder<F>,
+        harness: &mut Harness<BLOCKS, BLOCK_SIZE>,
+        rng: &mut StdRng,
+        modulus: &BigUint,
+        is_setup: bool,
+        offset: usize,
+    ) {
+        let (a, b, op) = if is_setup {
+            (modulus.clone(), BigUint::zero(), MUL_LOCAL + 2)
+        } else {
+            let a = generate_random_biguint(modulus);
+            let b = generate_random_biguint(modulus);
+
+            let op = rng.gen_range(0..2) + MUL_LOCAL; // 0 for add, 1 for sub
+
+            (a, b, op)
+        };
+
         let expected_answer = match op - MUL_LOCAL {
-            0 => (&a * &b) % &modulus,
-            1 => (&a * b.modinv(&modulus).unwrap()) % &modulus,
-            2 => a.clone() % &modulus,
+            0 => (&a * &b) % modulus,
+            1 => (&a * b.modinv(modulus).unwrap()) % modulus,
+            2 => a.clone() % modulus,
             _ => panic!(),
         };
 
@@ -254,317 +402,453 @@ fn test_muldiv(opcode_offset: usize, modulus: BigUint) {
         // 1. address_ptr which stores the actual address
         // 2. actual address which stores the biguint limbs
         // The write of result r is done in the chip.
-        let ptr_as = 1;
+        let ptr_as = RV32_REGISTER_AS as usize;
         let addr_ptr1 = 0;
         let addr_ptr2 = 12;
         let addr_ptr3 = 24;
 
-        let data_as = 2;
-        let address1 = 0;
-        let address2 = 128;
-        let address3 = 256;
+        let data_as = RV32_MEMORY_AS as usize;
+        let address1 = gen_pointer(rng, BLOCK_SIZE) as u32;
+        let address2 = gen_pointer(rng, BLOCK_SIZE) as u32;
+        let address3 = gen_pointer(rng, BLOCK_SIZE) as u32;
 
-        write_ptr_reg(&mut tester, ptr_as, addr_ptr1, address1);
-        write_ptr_reg(&mut tester, ptr_as, addr_ptr2, address2);
-        write_ptr_reg(&mut tester, ptr_as, addr_ptr3, address3);
+        write_ptr_reg(tester, ptr_as, addr_ptr1, address1);
+        write_ptr_reg(tester, ptr_as, addr_ptr2, address2);
+        write_ptr_reg(tester, ptr_as, addr_ptr3, address3);
+
+        let a_limbs: Vec<F> = biguint_to_limbs_vec(&a, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+        let b_limbs: Vec<F> = biguint_to_limbs_vec(&b, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
 
-        let a_limbs: [BabyBear; NUM_LIMBS] =
-            biguint_to_limbs(a.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-        tester.write(data_as, address1 as usize, a_limbs);
-        let b_limbs: [BabyBear; NUM_LIMBS] =
-            biguint_to_limbs(b.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-        tester.write(data_as, address2 as usize, b_limbs);
+        for i in (0..NUM_LIMBS).step_by(BLOCK_SIZE) {
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                address1 as usize + i,
+                a_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                address2 as usize + i,
+                b_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+        }
 
         let instruction = Instruction::from_isize(
-            VmOpcode::from_usize(chip.0.core.air.offset + op),
+            VmOpcode::from_usize(offset + op),
             addr_ptr3 as isize,
             addr_ptr1 as isize,
             addr_ptr2 as isize,
             ptr_as as isize,
             data_as as isize,
         );
-        tester.execute(&mut chip, &instruction);
+        tester.execute(harness, &instruction);
 
-        let expected_limbs = biguint_to_limbs::<NUM_LIMBS>(expected_answer, LIMB_BITS);
-        for (i, expected) in expected_limbs.into_iter().enumerate() {
-            let address = address3 as usize + i;
-            let read_val = tester.read_cell(data_as, address);
-            assert_eq!(BabyBear::from_canonical_u32(expected), read_val);
+        let expected_limbs: Vec<F> = biguint_to_limbs_vec(&expected_answer, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+
+        for i in (0..NUM_LIMBS).step_by(BLOCK_SIZE) {
+            let read_vals = tester.read::<BLOCK_SIZE>(data_as, address3 as usize + i);
+            let expected_limbs: [F; BLOCK_SIZE] =
+                expected_limbs[i..i + BLOCK_SIZE].try_into().unwrap();
+            assert_eq!(read_vals, expected_limbs);
         }
     }
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
 
-    tester.simple_test().expect("Verification failed");
-}
-
-fn test_is_equal<const NUM_LANES: usize, const LANE_SIZE: usize, const TOTAL_LIMBS: usize>(
-    opcode_offset: usize,
-    modulus: BigUint,
-    num_tests: usize,
-) {
-    let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<LIMB_BITS>::new(bitwise_bus);
-
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let mut chip = ModularIsEqualChip::<F, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>::new(
-        Rv32IsEqualModAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        ),
-        ModularIsEqualCoreChip::new(modulus.clone(), bitwise_chip.clone(), opcode_offset),
-        tester.offline_memory_mutex_arc(),
-    );
+    fn run_test_muldiv<const BLOCKS: usize, const BLOCK_SIZE: usize, const NUM_LIMBS: usize>(
+        opcode_offset: usize,
+        modulus: BigUint,
+        num_ops: usize,
+    ) {
+        let mut rng = create_seeded_rng();
+        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+        let config = ExprBuilderConfig {
+            modulus: modulus.clone(),
+            num_limbs: NUM_LIMBS,
+            limb_bits: LIMB_BITS,
+        };
+        let offset = Rv32ModularArithmeticOpcode::CLASS_OFFSET + opcode_offset;
+
+        let (mut harness, bitwise) =
+            create_test_chip::<BLOCKS, BLOCK_SIZE>(&tester, config, offset);
+
+        for i in 0..num_ops {
+            set_and_execute_muldiv::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+                &mut tester,
+                &mut harness,
+                &mut rng,
+                &modulus,
+                i == 0,
+                offset,
+            );
+        }
+        let tester = tester
+            .build()
+            .load(harness)
+            .load_periphery(bitwise)
+            .finalize();
 
-    {
-        let vec = big_uint_to_limbs(&modulus, LIMB_BITS);
-        let modulus_limbs: [F; TOTAL_LIMBS] = std::array::from_fn(|i| {
-            if i < vec.len() {
-                F::from_canonical_usize(vec[i])
-            } else {
-                F::ZERO
-            }
-        });
+        tester.simple_test().expect("Verification failed");
+    }
 
-        let setup_instruction = rv32_write_heap_default::<TOTAL_LIMBS>(
-            &mut tester,
-            vec![modulus_limbs],
-            vec![[F::ZERO; TOTAL_LIMBS]],
-            opcode_offset + Rv32ModularArithmeticOpcode::SETUP_ISEQ as usize,
+    #[test]
+    fn test_modular_muldiv_1x32_small() {
+        run_test_muldiv::<1, 32, 32>(
+            0,
+            BigUint::from_str("357686312646216567629137").unwrap(),
+            50,
         );
-        tester.execute(&mut chip, &setup_instruction);
     }
-    for _ in 0..num_tests {
-        let b = generate_field_element::<TOTAL_LIMBS, LIMB_BITS>(&modulus, &mut rng);
-        let c = if rng.gen_bool(0.5) {
-            b
-        } else {
-            generate_field_element::<TOTAL_LIMBS, LIMB_BITS>(&modulus, &mut rng)
-        };
 
-        let instruction = rv32_write_heap_default::<TOTAL_LIMBS>(
-            &mut tester,
-            vec![b.map(F::from_canonical_u32)],
-            vec![c.map(F::from_canonical_u32)],
-            opcode_offset + Rv32ModularArithmeticOpcode::IS_EQ as usize,
-        );
-        tester.execute(&mut chip, &instruction);
+    #[test]
+    fn test_modular_muldiv_1x32_secp256k1() {
+        run_test_muldiv::<1, 32, 32>(0, secp256k1_coord_prime(), 50);
+        run_test_muldiv::<1, 32, 32>(4, secp256k1_scalar_prime(), 50);
     }
 
-    // Special case where b == c are close to the prime
-    let b_vec = big_uint_to_limbs(&modulus, LIMB_BITS);
-    let mut b = from_fn(|i| if i < b_vec.len() { b_vec[i] as u32 } else { 0 });
-    b[0] -= 1;
-    let instruction = rv32_write_heap_default::<TOTAL_LIMBS>(
-        &mut tester,
-        vec![b.map(F::from_canonical_u32)],
-        vec![b.map(F::from_canonical_u32)],
-        opcode_offset + Rv32ModularArithmeticOpcode::IS_EQ as usize,
-    );
-    tester.execute(&mut chip, &instruction);
-
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
+    #[test]
+    fn test_modular_muldiv_1x32_bn254() {
+        run_test_muldiv::<1, 32, 32>(0, BN254_MODULUS.clone(), 50);
+    }
 
-#[test]
-fn test_modular_is_equal_1x32() {
-    test_is_equal::<1, 32, 32>(17, secp256k1_coord_prime(), 100);
+    #[test]
+    fn test_modular_muldiv_3x16_bls12_381() {
+        run_test_muldiv::<3, 16, 48>(0, BLS12_381_MODULUS.clone(), 50);
+    }
 }
 
-#[test]
-fn test_modular_is_equal_3x16() {
-    test_is_equal::<3, 16, 48>(17, BLS12_381_MODULUS.clone(), 100);
-}
+#[cfg(test)]
+mod is_equal_tests {
+    use openvm_rv32_adapters::{
+        Rv32IsEqualModAdapterAir, Rv32IsEqualModAdapterExecutor, Rv32IsEqualModAdapterFiller,
+    };
+    use openvm_stark_backend::{
+        p3_air::BaseAir,
+        p3_matrix::{
+            dense::{DenseMatrix, RowMajorMatrix},
+            Matrix,
+        },
+        utils::disable_debug_builder,
+        verifier::VerificationError,
+    };
 
-// Wrapper chip for testing a bad setup row
-type BadModularIsEqualChip<
-    F,
-    const NUM_LANES: usize,
-    const LANE_SIZE: usize,
-    const TOTAL_LIMBS: usize,
-> = VmChipWrapper<
-    F,
-    Rv32IsEqualModAdapterChip<F, 2, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
-    BadModularIsEqualCoreChip<TOTAL_LIMBS, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
->;
-
-// Wrapper chip for testing a bad setup row
-struct BadModularIsEqualCoreChip<
-    const READ_LIMBS: usize,
-    const WRITE_LIMBS: usize,
-    const LIMB_BITS: usize,
-> {
-    chip: ModularIsEqualCoreChip<READ_LIMBS, WRITE_LIMBS, LIMB_BITS>,
-}
+    use super::*;
+
+    type Harness<const NUM_LANES: usize, const LANE_SIZE: usize, const TOTAL_LIMBS: usize> =
+        TestChipHarness<
+            F,
+            VmModularIsEqualExecutor<NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+            ModularIsEqualAir<NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+            ModularIsEqualChip<F, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+        >;
+
+    fn create_test_chips<
+        const NUM_LANES: usize,
+        const LANE_SIZE: usize,
+        const TOTAL_LIMBS: usize,
+    >(
+        tester: &mut VmChipTestBuilder<F>,
+        modulus: &BigUint,
+        modulus_limbs: [u8; TOTAL_LIMBS],
+        offset: usize,
+    ) -> (
+        Harness<NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+        (
+            BitwiseOperationLookupAir<LIMB_BITS>,
+            SharedBitwiseOperationLookupChip<LIMB_BITS>,
+        ),
+    ) {
+        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+        let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<LIMB_BITS>::new(bitwise_bus));
+
+        let air = ModularIsEqualAir::new(
+            Rv32IsEqualModAdapterAir::new(
+                tester.execution_bridge(),
+                tester.memory_bridge(),
+                bitwise_bus,
+                tester.address_bits(),
+            ),
+            ModularIsEqualCoreAir::new(modulus.clone(), bitwise_bus, offset),
+        );
+        let executor = VmModularIsEqualExecutor::new(
+            Rv32IsEqualModAdapterExecutor::new(tester.address_bits()),
+            offset,
+            modulus_limbs,
+        );
+        let chip = ModularIsEqualChip::<F, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>::new(
+            ModularIsEqualFiller::new(
+                Rv32IsEqualModAdapterFiller::new(tester.address_bits(), bitwise_chip.clone()),
+                offset,
+                modulus_limbs,
+                bitwise_chip.clone(),
+            ),
+            tester.memory_helper(),
+        );
+        let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-impl<const READ_LIMBS: usize, const WRITE_LIMBS: usize, const LIMB_BITS: usize>
-    BadModularIsEqualCoreChip<READ_LIMBS, WRITE_LIMBS, LIMB_BITS>
-{
-    pub fn new(
-        modulus: BigUint,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
+        (harness, (bitwise_chip.air, bitwise_chip))
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn set_and_execute_is_equal<
+        const NUM_LANES: usize,
+        const LANE_SIZE: usize,
+        const TOTAL_LIMBS: usize,
+    >(
+        tester: &mut VmChipTestBuilder<F>,
+        harness: &mut Harness<NUM_LANES, LANE_SIZE, TOTAL_LIMBS>,
+        rng: &mut StdRng,
+        modulus: &BigUint,
         offset: usize,
-    ) -> Self {
-        Self {
-            chip: ModularIsEqualCoreChip::new(modulus, bitwise_lookup_chip, offset),
-        }
+        modulus_limbs: [F; TOTAL_LIMBS],
+        is_setup: bool,
+        b: Option<[F; TOTAL_LIMBS]>,
+        c: Option<[F; TOTAL_LIMBS]>,
+    ) {
+        let instruction = if is_setup {
+            rv32_write_heap_default::<TOTAL_LIMBS>(
+                tester,
+                vec![modulus_limbs],
+                vec![[F::ZERO; TOTAL_LIMBS]],
+                offset + Rv32ModularArithmeticOpcode::SETUP_ISEQ as usize,
+            )
+        } else {
+            let b = b.unwrap_or(
+                generate_field_element::<TOTAL_LIMBS, LIMB_BITS>(modulus, rng)
+                    .map(F::from_canonical_u32),
+            );
+            let c = c.unwrap_or(if rng.gen_bool(0.5) {
+                b
+            } else {
+                generate_field_element::<TOTAL_LIMBS, LIMB_BITS>(modulus, rng)
+                    .map(F::from_canonical_u32)
+            });
+
+            rv32_write_heap_default::<TOTAL_LIMBS>(
+                tester,
+                vec![b],
+                vec![c],
+                offset + Rv32ModularArithmeticOpcode::IS_EQ as usize,
+            )
+        };
+        tester.execute(harness, &instruction);
     }
-}
 
-impl<
-        F: PrimeField32,
-        I: VmAdapterInterface<F>,
-        const READ_LIMBS: usize,
-        const WRITE_LIMBS: usize,
-        const LIMB_BITS: usize,
-    > VmCoreChip<F, I> for BadModularIsEqualCoreChip<READ_LIMBS, WRITE_LIMBS, LIMB_BITS>
-where
-    I::Reads: Into<[[F; READ_LIMBS]; 2]>,
-    I::Writes: From<[[F; WRITE_LIMBS]; 1]>,
-{
-    type Record = ModularIsEqualCoreRecord<F, READ_LIMBS>;
-    type Air = ModularIsEqualCoreAir<READ_LIMBS, WRITE_LIMBS, LIMB_BITS>;
-
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
-        &self,
-        instruction: &Instruction<F>,
-        from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        // Override the b_diff_idx to be out of bounds.
-        // This will cause lt_marker to be all zeros except a 2.
-        // There was a bug in this case which allowed b to be less than N.
-        self.chip.execute_instruction(instruction, from_pc, reads)
+    //////////////////////////////////////////////////////////////////////////////////////
+    // POSITIVE TESTS
+    //
+    // Randomly generate computations and execute, ensuring that the generated trace
+    // passes all constraints.
+    //////////////////////////////////////////////////////////////////////////////////////
+
+    #[test]
+    fn test_modular_is_equal_1x32() {
+        test_is_equal::<1, 32, 32>(17, secp256k1_coord_prime(), 100);
     }
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        <ModularIsEqualCoreChip<READ_LIMBS, WRITE_LIMBS, LIMB_BITS> as VmCoreChip<F, I>>::get_opcode_name(&self.chip, opcode)
+    #[test]
+    fn test_modular_is_equal_3x16() {
+        test_is_equal::<3, 16, 48>(17, BLS12_381_MODULUS.clone(), 100);
     }
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        <ModularIsEqualCoreChip<READ_LIMBS, WRITE_LIMBS, LIMB_BITS> as VmCoreChip<F, I>>::generate_trace_row(&self.chip, row_slice, record.clone());
-        let row_slice: &mut ModularIsEqualCoreCols<_, READ_LIMBS> = row_slice.borrow_mut();
-        // decide which bug to test based on b[0]
-        if record.b[0] == F::ONE {
-            // test the constraint that c_lt_mark = 2 when is_setup = 1
-            row_slice.c_lt_mark = F::ONE;
-            row_slice.lt_marker = [F::ZERO; READ_LIMBS];
-            row_slice.lt_marker[READ_LIMBS - 1] = F::ONE;
-            row_slice.c_lt_diff =
-                F::from_canonical_u32(self.chip.air.modulus_limbs[READ_LIMBS - 1])
-                    - record.c[READ_LIMBS - 1];
-            row_slice.b_lt_diff =
-                F::from_canonical_u32(self.chip.air.modulus_limbs[READ_LIMBS - 1])
-                    - record.b[READ_LIMBS - 1];
-        } else if record.b[0] == F::from_canonical_u32(2) {
-            // test the constraint that b[i] = N[i] for all i when prefix_sum is not 1 or
-            // lt_marker_sum - is_setup
-            row_slice.c_lt_mark = F::from_canonical_u8(2);
-            row_slice.lt_marker = [F::ZERO; READ_LIMBS];
-            row_slice.lt_marker[READ_LIMBS - 1] = F::from_canonical_u8(2);
-            row_slice.c_lt_diff =
-                F::from_canonical_u32(self.chip.air.modulus_limbs[READ_LIMBS - 1])
-                    - record.c[READ_LIMBS - 1];
-        } else if record.b[0] == F::from_canonical_u32(3) {
-            // test the constraint that sum_i lt_marker[i] = 2 when is_setup = 1
-            row_slice.c_lt_mark = F::from_canonical_u8(2);
-            row_slice.lt_marker = [F::ZERO; READ_LIMBS];
-            row_slice.lt_marker[READ_LIMBS - 1] = F::from_canonical_u8(2);
-            row_slice.lt_marker[0] = F::ONE;
-            row_slice.b_lt_diff =
-                F::from_canonical_u32(self.chip.air.modulus_limbs[0]) - record.b[0];
-            row_slice.c_lt_diff =
-                F::from_canonical_u32(self.chip.air.modulus_limbs[READ_LIMBS - 1])
-                    - record.c[READ_LIMBS - 1];
+    fn test_is_equal<const NUM_LANES: usize, const LANE_SIZE: usize, const TOTAL_LIMBS: usize>(
+        opcode_offset: usize,
+        modulus: BigUint,
+        num_tests: usize,
+    ) {
+        let mut rng = create_seeded_rng();
+        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+
+        let modulus_limbs: [u8; TOTAL_LIMBS] = biguint_to_limbs_vec(&modulus, TOTAL_LIMBS)
+            .try_into()
+            .unwrap();
+
+        let (mut harness, bitwise) = create_test_chips::<NUM_LANES, LANE_SIZE, TOTAL_LIMBS>(
+            &mut tester,
+            &modulus,
+            modulus_limbs,
+            opcode_offset,
+        );
+
+        let modulus_limbs = modulus_limbs.map(F::from_canonical_u8);
+
+        for i in 0..num_tests {
+            set_and_execute_is_equal(
+                &mut tester,
+                &mut harness,
+                &mut rng,
+                &modulus,
+                opcode_offset,
+                modulus_limbs,
+                i == 0, // the first test is a setup test
+                None,
+                None,
+            );
         }
-    }
 
-    fn air(&self) -> &Self::Air {
-        <ModularIsEqualCoreChip<READ_LIMBS, WRITE_LIMBS, LIMB_BITS> as VmCoreChip<F, I>>::air(
-            &self.chip,
-        )
+        // Special case where b == c are close to the prime
+        let mut b = modulus_limbs;
+        b[0] -= F::ONE;
+        set_and_execute_is_equal(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            opcode_offset,
+            modulus_limbs,
+            false,
+            Some(b),
+            Some(b),
+        );
+
+        let tester = tester
+            .build()
+            .load(harness)
+            .load_periphery(bitwise)
+            .finalize();
+        tester.simple_test().expect("Verification failed");
     }
-}
 
-// Test that passes the wrong modulus in the setup instruction.
-// This proof should fail to verify.
-fn test_is_equal_setup_bad<
-    const NUM_LANES: usize,
-    const LANE_SIZE: usize,
-    const TOTAL_LIMBS: usize,
->(
-    opcode_offset: usize,
-    modulus: BigUint,
-    b_val: u32, /* used to select which bug to test. currently only 1, 2, and 3 are supported
-                 * (because there are three bugs to test) */
-) {
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<LIMB_BITS>::new(bitwise_bus);
-
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let mut chip = BadModularIsEqualChip::<F, NUM_LANES, LANE_SIZE, TOTAL_LIMBS>::new(
-        Rv32IsEqualModAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        ),
-        BadModularIsEqualCoreChip::new(modulus.clone(), bitwise_chip.clone(), opcode_offset),
-        tester.offline_memory_mutex_arc(),
-    );
-
-    let mut b_limbs = [F::ZERO; TOTAL_LIMBS];
-    b_limbs[0] = F::from_canonical_u32(b_val);
-    let setup_instruction = rv32_write_heap_default::<TOTAL_LIMBS>(
-        &mut tester,
-        vec![b_limbs],
-        vec![[F::ZERO; TOTAL_LIMBS]],
-        opcode_offset + Rv32ModularArithmeticOpcode::SETUP_ISEQ as usize,
-    );
-    tester.execute(&mut chip, &setup_instruction);
-
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
+    //////////////////////////////////////////////////////////////////////////////////////
+    // NEGATIVE TESTS
+    //
+    // Given a fake trace of a single operation, setup a chip and run the test. We replace
+    // part of the trace and check that the chip throws the expected error.
+    //////////////////////////////////////////////////////////////////////////////////////
+
+    /// Negative tests test for 3 "type" of errors determined by the value of b[0]:
+    fn run_negative_is_equal_test<
+        const NUM_LANES: usize,
+        const LANE_SIZE: usize,
+        const READ_LIMBS: usize,
+    >(
+        modulus: BigUint,
+        opcode_offset: usize,
+        test_case: usize,
+        expected_error: VerificationError,
+    ) {
+        let mut rng = create_seeded_rng();
+        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+
+        let modulus_limbs: [u8; READ_LIMBS] = biguint_to_limbs_vec(&modulus, READ_LIMBS)
+            .try_into()
+            .unwrap();
+
+        let (mut harness, bitwise) = create_test_chips::<NUM_LANES, LANE_SIZE, READ_LIMBS>(
+            &mut tester,
+            &modulus,
+            modulus_limbs,
+            opcode_offset,
+        );
 
-#[should_panic]
-#[test]
-fn test_modular_is_equal_setup_bad_1_1x32() {
-    test_is_equal_setup_bad::<1, 32, 32>(17, secp256k1_coord_prime(), 1);
-}
+        let modulus_limbs = modulus_limbs.map(F::from_canonical_u8);
 
-#[should_panic]
-#[test]
-fn test_modular_is_equal_setup_bad_2_1x32_2() {
-    test_is_equal_setup_bad::<1, 32, 32>(17, secp256k1_coord_prime(), 2);
-}
+        set_and_execute_is_equal(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            opcode_offset,
+            modulus_limbs,
+            true,
+            None,
+            None,
+        );
 
-#[should_panic]
-#[test]
-fn test_modular_is_equal_setup_bad_3_1x32() {
-    test_is_equal_setup_bad::<1, 32, 32>(17, secp256k1_coord_prime(), 3);
-}
+        let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+        let modify_trace = |trace: &mut DenseMatrix<F>| {
+            let mut trace_row = trace.row_slice(0).to_vec();
+            let cols: &mut ModularIsEqualCoreCols<_, READ_LIMBS> =
+                trace_row.split_at_mut(adapter_width).1.borrow_mut();
+            if test_case == 1 {
+                // test the constraint that c_lt_mark = 2 when is_setup = 1
+                cols.b[0] = F::from_canonical_u32(1);
+                cols.c_lt_mark = F::ONE;
+                cols.lt_marker = [F::ZERO; READ_LIMBS];
+                cols.lt_marker[READ_LIMBS - 1] = F::ONE;
+                cols.c_lt_diff = modulus_limbs[READ_LIMBS - 1] - cols.c[READ_LIMBS - 1];
+                cols.b_lt_diff = modulus_limbs[READ_LIMBS - 1] - cols.b[READ_LIMBS - 1];
+            } else if test_case == 2 {
+                // test the constraint that b[i] = N[i] for all i when prefix_sum is not 1 or
+                // lt_marker_sum - is_setup
+                cols.b[0] = F::from_canonical_u32(2);
+                cols.c_lt_mark = F::from_canonical_u8(2);
+                cols.lt_marker = [F::ZERO; READ_LIMBS];
+                cols.lt_marker[READ_LIMBS - 1] = F::from_canonical_u8(2);
+                cols.c_lt_diff = modulus_limbs[READ_LIMBS - 1] - cols.c[READ_LIMBS - 1];
+            } else if test_case == 3 {
+                // test the constraint that sum_i lt_marker[i] = 2 when is_setup = 1
+                cols.b[0] = F::from_canonical_u32(3);
+                cols.c_lt_mark = F::from_canonical_u8(2);
+                cols.lt_marker = [F::ZERO; READ_LIMBS];
+                cols.lt_marker[READ_LIMBS - 1] = F::from_canonical_u8(2);
+                cols.lt_marker[0] = F::ONE;
+                cols.b_lt_diff = modulus_limbs[0] - cols.b[0];
+                cols.c_lt_diff = modulus_limbs[READ_LIMBS - 1] - cols.c[READ_LIMBS - 1];
+            }
+            *trace = RowMajorMatrix::new(trace_row, trace.width());
+        };
 
-#[should_panic]
-#[test]
-fn test_modular_is_equal_setup_bad_1_3x16() {
-    test_is_equal_setup_bad::<3, 16, 48>(17, BLS12_381_MODULUS.clone(), 1);
-}
+        disable_debug_builder();
+        let tester = tester
+            .build()
+            .load_and_prank_trace(harness, modify_trace)
+            .load_periphery(bitwise)
+            .finalize();
+        tester.simple_test_with_expected_error(expected_error);
+    }
 
-#[should_panic]
-#[test]
-fn test_modular_is_equal_setup_bad_2_3x16() {
-    test_is_equal_setup_bad::<3, 16, 48>(17, BLS12_381_MODULUS.clone(), 2);
-}
+    #[test]
+    fn negative_test_modular_is_equal_1x32() {
+        run_negative_is_equal_test::<1, 32, 32>(
+            secp256k1_coord_prime(),
+            17,
+            1,
+            VerificationError::OodEvaluationMismatch,
+        );
+
+        run_negative_is_equal_test::<1, 32, 32>(
+            secp256k1_coord_prime(),
+            17,
+            2,
+            VerificationError::OodEvaluationMismatch,
+        );
+
+        run_negative_is_equal_test::<1, 32, 32>(
+            secp256k1_coord_prime(),
+            17,
+            3,
+            VerificationError::OodEvaluationMismatch,
+        );
+    }
+
+    #[test]
+    fn negative_test_modular_is_equal_3x16() {
+        run_negative_is_equal_test::<3, 16, 48>(
+            BLS12_381_MODULUS.clone(),
+            17,
+            1,
+            VerificationError::OodEvaluationMismatch,
+        );
+
+        run_negative_is_equal_test::<3, 16, 48>(
+            BLS12_381_MODULUS.clone(),
+            17,
+            2,
+            VerificationError::OodEvaluationMismatch,
+        );
 
-#[should_panic]
-#[test]
-fn test_modular_is_equal_setup_bad_3_3x16() {
-    test_is_equal_setup_bad::<3, 16, 48>(17, BLS12_381_MODULUS.clone(), 3);
+        run_negative_is_equal_test::<3, 16, 48>(
+            BLS12_381_MODULUS.clone(),
+            17,
+            3,
+            VerificationError::OodEvaluationMismatch,
+        );
+    }
 }
diff --git a/extensions/algebra/circuit/src/modular_extension.rs b/extensions/algebra/circuit/src/modular_extension.rs
index 99632d6ce3..8946daa9c3 100644
--- a/extensions/algebra/circuit/src/modular_extension.rs
+++ b/extensions/algebra/circuit/src/modular_extension.rs
@@ -1,28 +1,50 @@
-use derive_more::derive::From;
+use std::{array, sync::Arc};
+
 use num_bigint::{BigUint, RandBigInt};
 use num_traits::{FromPrimitive, One};
 use openvm_algebra_transpiler::{ModularPhantom, Rv32ModularArithmeticOpcode};
 use openvm_circuit::{
     self,
-    arch::{SystemPort, VmExtension, VmInventory, VmInventoryBuilder, VmInventoryError},
-    system::phantom::PhantomChip,
+    arch::{
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutionBridge,
+        ExecutorInventoryBuilder, ExecutorInventoryError, RowMajorMatrixArena, VmCircuitExtension,
+        VmExecutionExtension, VmProverExtension,
+    },
+    system::{memory::SharedMemoryHelper, SystemPort},
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor};
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
+use openvm_circuit_primitives::{
+    bigint::utils::big_uint_to_limbs,
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
+    var_range::VariableRangeCheckerBus,
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
 use openvm_instructions::{LocalOpcode, PhantomDiscriminant, VmOpcode};
 use openvm_mod_circuit_builder::ExprBuilderConfig;
-use openvm_rv32_adapters::{Rv32IsEqualModAdapterChip, Rv32VecHeapAdapterChip};
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_rv32_adapters::{
+    Rv32IsEqualModAdapterAir, Rv32IsEqualModAdapterExecutor, Rv32IsEqualModAdapterFiller,
+};
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use openvm_stark_sdk::engine::StarkEngine;
 use rand::Rng;
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use strum::EnumCount;
 
-use crate::modular_chip::{
-    ModularAddSubChip, ModularIsEqualChip, ModularIsEqualCoreChip, ModularMulDivChip,
+use crate::{
+    modular_chip::{
+        get_modular_addsub_air, get_modular_addsub_chip, get_modular_addsub_step,
+        get_modular_muldiv_air, get_modular_muldiv_chip, get_modular_muldiv_step, ModularAir,
+        ModularExecutor, ModularIsEqualAir, ModularIsEqualChip, ModularIsEqualCoreAir,
+        ModularIsEqualFiller, VmModularIsEqualExecutor,
+    },
+    AlgebraCpuProverExt,
 };
 
 #[serde_as]
@@ -46,205 +68,415 @@ impl ModularExtension {
     }
 }
 
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, AnyEnum, From)]
-pub enum ModularExtensionExecutor<F: PrimeField32> {
+#[derive(Clone, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum ModularExtensionExecutor {
     // 32 limbs prime
-    ModularAddSubRv32_32(ModularAddSubChip<F, 1, 32>),
-    ModularMulDivRv32_32(ModularMulDivChip<F, 1, 32>),
-    ModularIsEqualRv32_32(ModularIsEqualChip<F, 1, 32, 32>),
+    ModularAddSubRv32_32(ModularExecutor<1, 32>), // ModularAddSub
+    ModularMulDivRv32_32(ModularExecutor<1, 32>), // ModularMulDiv
+    ModularIsEqualRv32_32(VmModularIsEqualExecutor<1, 32, 32>), // ModularIsEqual
     // 48 limbs prime
-    ModularAddSubRv32_48(ModularAddSubChip<F, 3, 16>),
-    ModularMulDivRv32_48(ModularMulDivChip<F, 3, 16>),
-    ModularIsEqualRv32_48(ModularIsEqualChip<F, 3, 16, 48>),
-}
-
-#[derive(ChipUsageGetter, Chip, AnyEnum, From)]
-pub enum ModularExtensionPeriphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    // We put this only to get the <F> generic to work
-    Phantom(PhantomChip<F>),
+    ModularAddSubRv32_48(ModularExecutor<3, 16>), // ModularAddSub
+    ModularMulDivRv32_48(ModularExecutor<3, 16>), // ModularMulDiv
+    ModularIsEqualRv32_48(VmModularIsEqualExecutor<3, 16, 48>), // ModularIsEqual
 }
 
-impl<F: PrimeField32> VmExtension<F> for ModularExtension {
-    type Executor = ModularExtensionExecutor<F>;
-    type Periphery = ModularExtensionPeriphery<F>;
+impl<F: PrimeField32> VmExecutionExtension<F> for ModularExtension {
+    type Executor = ModularExtensionExecutor;
 
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
-        let SystemPort {
-            execution_bus,
-            program_bus,
-            memory_bridge,
-        } = builder.system_port();
-        let range_checker = builder.system_base().range_checker_chip.clone();
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
-        };
-        let offline_memory = builder.system_base().offline_memory();
-        let address_bits = builder.system_config().memory_config.pointer_max_bits;
-
-        let addsub_opcodes = (Rv32ModularArithmeticOpcode::ADD as usize)
-            ..=(Rv32ModularArithmeticOpcode::SETUP_ADDSUB as usize);
-        let muldiv_opcodes = (Rv32ModularArithmeticOpcode::MUL as usize)
-            ..=(Rv32ModularArithmeticOpcode::SETUP_MULDIV as usize);
-        let iseq_opcodes = (Rv32ModularArithmeticOpcode::IS_EQ as usize)
-            ..=(Rv32ModularArithmeticOpcode::SETUP_ISEQ as usize);
-
+        inventory: &mut ExecutorInventoryBuilder<F, ModularExtensionExecutor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
+        // TODO: somehow get the range checker bus from `ExecutorInventory`
+        let dummy_range_checker_bus = VariableRangeCheckerBus::new(u16::MAX, 16);
         for (i, modulus) in self.supported_moduli.iter().enumerate() {
             // determine the number of bytes needed to represent a prime field element
             let bytes = modulus.bits().div_ceil(8);
             let start_offset =
                 Rv32ModularArithmeticOpcode::CLASS_OFFSET + i * Rv32ModularArithmeticOpcode::COUNT;
-
-            let config32 = ExprBuilderConfig {
-                modulus: modulus.clone(),
-                num_limbs: 32,
-                limb_bits: 8,
-            };
-            let config48 = ExprBuilderConfig {
-                modulus: modulus.clone(),
-                num_limbs: 48,
-                limb_bits: 8,
-            };
-            let adapter_chip_32 = Rv32VecHeapAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
-            );
-            let adapter_chip_48 = Rv32VecHeapAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
-            );
-
+            let modulus_limbs = big_uint_to_limbs(modulus, 8);
             if bytes <= 32 {
-                let addsub_chip = ModularAddSubChip::new(
-                    adapter_chip_32.clone(),
-                    config32.clone(),
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+                let addsub = get_modular_addsub_step(
+                    config.clone(),
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    ModularExtensionExecutor::ModularAddSubRv32_32(addsub_chip),
-                    addsub_opcodes
-                        .clone()
+                    ModularExtensionExecutor::ModularAddSubRv32_32(addsub),
+                    ((Rv32ModularArithmeticOpcode::ADD as usize)
+                        ..=(Rv32ModularArithmeticOpcode::SETUP_ADDSUB as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
-                let muldiv_chip = ModularMulDivChip::new(
-                    adapter_chip_32.clone(),
-                    config32.clone(),
+
+                let muldiv = get_modular_muldiv_step(
+                    config,
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    ModularExtensionExecutor::ModularMulDivRv32_32(muldiv_chip),
-                    muldiv_opcodes
-                        .clone()
+                    ModularExtensionExecutor::ModularMulDivRv32_32(muldiv),
+                    ((Rv32ModularArithmeticOpcode::MUL as usize)
+                        ..=(Rv32ModularArithmeticOpcode::SETUP_MULDIV as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
-                let isequal_chip = ModularIsEqualChip::new(
-                    Rv32IsEqualModAdapterChip::new(
-                        execution_bus,
-                        program_bus,
-                        memory_bridge,
-                        address_bits,
-                        bitwise_lu_chip.clone(),
-                    ),
-                    ModularIsEqualCoreChip::new(
-                        modulus.clone(),
-                        bitwise_lu_chip.clone(),
-                        start_offset,
-                    ),
-                    offline_memory.clone(),
+
+                let modulus_limbs = array::from_fn(|i| {
+                    if i < modulus_limbs.len() {
+                        modulus_limbs[i] as u8
+                    } else {
+                        0
+                    }
+                });
+
+                let is_eq = VmModularIsEqualExecutor::new(
+                    Rv32IsEqualModAdapterExecutor::new(pointer_max_bits),
+                    start_offset,
+                    modulus_limbs,
                 );
+
                 inventory.add_executor(
-                    ModularExtensionExecutor::ModularIsEqualRv32_32(isequal_chip),
-                    iseq_opcodes
-                        .clone()
+                    ModularExtensionExecutor::ModularIsEqualRv32_32(is_eq),
+                    ((Rv32ModularArithmeticOpcode::IS_EQ as usize)
+                        ..=(Rv32ModularArithmeticOpcode::SETUP_ISEQ as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
             } else if bytes <= 48 {
-                let addsub_chip = ModularAddSubChip::new(
-                    adapter_chip_48.clone(),
-                    config48.clone(),
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+                let addsub = get_modular_addsub_step(
+                    config.clone(),
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    ModularExtensionExecutor::ModularAddSubRv32_48(addsub_chip),
-                    addsub_opcodes
-                        .clone()
+                    ModularExtensionExecutor::ModularAddSubRv32_48(addsub),
+                    ((Rv32ModularArithmeticOpcode::ADD as usize)
+                        ..=(Rv32ModularArithmeticOpcode::SETUP_ADDSUB as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
-                let muldiv_chip = ModularMulDivChip::new(
-                    adapter_chip_48.clone(),
-                    config48.clone(),
+
+                let muldiv = get_modular_muldiv_step(
+                    config,
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    ModularExtensionExecutor::ModularMulDivRv32_48(muldiv_chip),
-                    muldiv_opcodes
-                        .clone()
+                    ModularExtensionExecutor::ModularMulDivRv32_48(muldiv),
+                    ((Rv32ModularArithmeticOpcode::MUL as usize)
+                        ..=(Rv32ModularArithmeticOpcode::SETUP_MULDIV as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
-                let isequal_chip = ModularIsEqualChip::new(
-                    Rv32IsEqualModAdapterChip::new(
-                        execution_bus,
-                        program_bus,
-                        memory_bridge,
-                        address_bits,
-                        bitwise_lu_chip.clone(),
-                    ),
-                    ModularIsEqualCoreChip::new(
-                        modulus.clone(),
-                        bitwise_lu_chip.clone(),
-                        start_offset,
-                    ),
-                    offline_memory.clone(),
+
+                let modulus_limbs = array::from_fn(|i| {
+                    if i < modulus_limbs.len() {
+                        modulus_limbs[i] as u8
+                    } else {
+                        0
+                    }
+                });
+
+                let is_eq = VmModularIsEqualExecutor::new(
+                    Rv32IsEqualModAdapterExecutor::new(pointer_max_bits),
+                    start_offset,
+                    modulus_limbs,
                 );
+
                 inventory.add_executor(
-                    ModularExtensionExecutor::ModularIsEqualRv32_48(isequal_chip),
-                    iseq_opcodes
-                        .clone()
+                    ModularExtensionExecutor::ModularIsEqualRv32_48(is_eq),
+                    ((Rv32ModularArithmeticOpcode::IS_EQ as usize)
+                        ..=(Rv32ModularArithmeticOpcode::SETUP_ISEQ as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
             } else {
                 panic!("Modulus too large");
             }
         }
+
         let non_qr_hint_sub_ex = phantom::NonQrHintSubEx::new(self.supported_moduli.clone());
-        builder.add_phantom_sub_executor(
+        inventory.add_phantom_sub_executor(
             non_qr_hint_sub_ex.clone(),
             PhantomDiscriminant(ModularPhantom::HintNonQr as u16),
         )?;
 
         let sqrt_hint_sub_ex = phantom::SqrtHintSubEx::new(non_qr_hint_sub_ex);
-        builder.add_phantom_sub_executor(
+        inventory.add_phantom_sub_executor(
             sqrt_hint_sub_ex,
             PhantomDiscriminant(ModularPhantom::HintSqrt as u16),
         )?;
 
-        Ok(inventory)
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for ModularExtension {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
+        let SystemPort {
+            execution_bus,
+            program_bus,
+            memory_bridge,
+        } = inventory.system().port();
+
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let range_checker_bus = inventory.range_checker().bus;
+        let pointer_max_bits = inventory.pointer_max_bits();
+
+        let bitwise_lu = {
+            // A trick to get around Rust's borrow rules
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
+        };
+        for (i, modulus) in self.supported_moduli.iter().enumerate() {
+            // determine the number of bytes needed to represent a prime field element
+            let bytes = modulus.bits().div_ceil(8);
+            let start_offset =
+                Rv32ModularArithmeticOpcode::CLASS_OFFSET + i * Rv32ModularArithmeticOpcode::COUNT;
+
+            if bytes <= 32 {
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+
+                let addsub = get_modular_addsub_air::<1, 32>(
+                    exec_bridge,
+                    memory_bridge,
+                    config.clone(),
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(addsub);
+
+                let muldiv = get_modular_muldiv_air::<1, 32>(
+                    exec_bridge,
+                    memory_bridge,
+                    config,
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(muldiv);
+
+                let is_eq = ModularIsEqualAir::<1, 32, 32>::new(
+                    Rv32IsEqualModAdapterAir::new(
+                        exec_bridge,
+                        memory_bridge,
+                        bitwise_lu,
+                        pointer_max_bits,
+                    ),
+                    ModularIsEqualCoreAir::new(modulus.clone(), bitwise_lu, start_offset),
+                );
+                inventory.add_air(is_eq);
+            } else if bytes <= 48 {
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+
+                let addsub = get_modular_addsub_air::<3, 16>(
+                    exec_bridge,
+                    memory_bridge,
+                    config.clone(),
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(addsub);
+
+                let muldiv = get_modular_muldiv_air::<3, 16>(
+                    exec_bridge,
+                    memory_bridge,
+                    config,
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(muldiv);
+
+                let is_eq = ModularIsEqualAir::<3, 16, 48>::new(
+                    Rv32IsEqualModAdapterAir::new(
+                        exec_bridge,
+                        memory_bridge,
+                        bitwise_lu,
+                        pointer_max_bits,
+                    ),
+                    ModularIsEqualCoreAir::new(modulus.clone(), bitwise_lu, start_offset),
+                );
+                inventory.add_air(is_eq);
+            } else {
+                panic!("Modulus too large");
+            }
+        }
+
+        Ok(())
+    }
+}
+
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, ModularExtension> for AlgebraCpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        extension: &ModularExtension,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let pointer_max_bits = inventory.airs().pointer_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+        for (i, modulus) in extension.supported_moduli.iter().enumerate() {
+            // determine the number of bytes needed to represent a prime field element
+            let bytes = modulus.bits().div_ceil(8);
+            let start_offset =
+                Rv32ModularArithmeticOpcode::CLASS_OFFSET + i * Rv32ModularArithmeticOpcode::COUNT;
+
+            let modulus_limbs = big_uint_to_limbs(modulus, 8);
+
+            if bytes <= 32 {
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+
+                inventory.next_air::<ModularAir<1, 32>>()?;
+                let addsub = get_modular_addsub_chip::<Val<SC>, 1, 32>(
+                    config.clone(),
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(addsub);
+
+                inventory.next_air::<ModularAir<1, 32>>()?;
+                let muldiv = get_modular_muldiv_chip::<Val<SC>, 1, 32>(
+                    config,
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(muldiv);
+
+                let modulus_limbs = array::from_fn(|i| {
+                    if i < modulus_limbs.len() {
+                        modulus_limbs[i] as u8
+                    } else {
+                        0
+                    }
+                });
+                inventory.next_air::<ModularIsEqualAir<1, 32, 32>>()?;
+                let is_eq = ModularIsEqualChip::<Val<SC>, 1, 32, 32>::new(
+                    ModularIsEqualFiller::new(
+                        Rv32IsEqualModAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()),
+                        start_offset,
+                        modulus_limbs,
+                        bitwise_lu.clone(),
+                    ),
+                    mem_helper.clone(),
+                );
+                inventory.add_executor_chip(is_eq);
+            } else if bytes <= 48 {
+                let config = ExprBuilderConfig {
+                    modulus: modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+
+                inventory.next_air::<ModularAir<3, 16>>()?;
+                let addsub = get_modular_addsub_chip::<Val<SC>, 3, 16>(
+                    config.clone(),
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(addsub);
+
+                inventory.next_air::<ModularAir<3, 16>>()?;
+                let muldiv = get_modular_muldiv_chip::<Val<SC>, 3, 16>(
+                    config,
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(muldiv);
+
+                let modulus_limbs = array::from_fn(|i| {
+                    if i < modulus_limbs.len() {
+                        modulus_limbs[i] as u8
+                    } else {
+                        0
+                    }
+                });
+                inventory.next_air::<ModularIsEqualAir<3, 16, 48>>()?;
+                let is_eq = ModularIsEqualChip::<Val<SC>, 3, 16, 48>::new(
+                    ModularIsEqualFiller::new(
+                        Rv32IsEqualModAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()),
+                        start_offset,
+                        modulus_limbs,
+                        bitwise_lu.clone(),
+                    ),
+                    mem_helper.clone(),
+                );
+                inventory.add_executor_chip(is_eq);
+            } else {
+                panic!("Modulus too large");
+            }
+        }
+
+        Ok(())
     }
 }
 
@@ -258,10 +490,10 @@ pub(crate) mod phantom {
     use num_bigint::BigUint;
     use openvm_circuit::{
         arch::{PhantomSubExecutor, Streams},
-        system::memory::MemoryController,
+        system::memory::online::GuestMemory,
     };
     use openvm_instructions::{riscv::RV32_MEMORY_AS, PhantomDiscriminant};
-    use openvm_rv32im_circuit::adapters::unsafe_read_rv32_register;
+    use openvm_rv32im_circuit::adapters::read_rv32_register;
     use openvm_stark_backend::p3_field::PrimeField32;
     use rand::{rngs::StdRng, SeedableRng};
 
@@ -282,12 +514,13 @@ pub(crate) mod phantom {
     // Note that non_qr is fixed for each modulus.
     impl<F: PrimeField32> PhantomSubExecutor<F> for SqrtHintSubEx {
         fn phantom_execute(
-            &mut self,
-            memory: &MemoryController<F>,
+            &self,
+            memory: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            a: F,
-            _: F,
+            a: u32,
+            _: u32,
             c_upper: u16,
         ) -> eyre::Result<()> {
             let mod_idx = c_upper as usize;
@@ -306,15 +539,12 @@ pub(crate) mod phantom {
                 bail!("Modulus too large")
             };
 
-            let rs1 = unsafe_read_rv32_register(memory, a);
-            let mut x_limbs: Vec<u8> = Vec::with_capacity(num_limbs);
-            for i in 0..num_limbs {
-                let limb = memory.unsafe_read_cell(
-                    F::from_canonical_u32(RV32_MEMORY_AS),
-                    F::from_canonical_u32(rs1 + i as u32),
-                );
-                x_limbs.push(limb.as_canonical_u32() as u8);
-            }
+            let rs1 = read_rv32_register(memory, a);
+            // SAFETY:
+            // - MEMORY_AS consists of `u8`s
+            // - MEMORY_AS is in bounds
+            let x_limbs: Vec<u8> =
+                unsafe { memory.memory.get_slice((RV32_MEMORY_AS, rs1), num_limbs) }.to_vec();
             let x = BigUint::from_bytes_le(&x_limbs);
 
             let (success, sqrt) = match mod_sqrt(&x, modulus, &self.non_qrs[mod_idx]) {
@@ -372,12 +602,13 @@ pub(crate) mod phantom {
 
     impl<F: PrimeField32> PhantomSubExecutor<F> for NonQrHintSubEx {
         fn phantom_execute(
-            &mut self,
-            _: &MemoryController<F>,
+            &self,
+            _: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            _: F,
-            _: F,
+            _: u32,
+            _: u32,
             c_upper: u16,
         ) -> eyre::Result<()> {
             let mod_idx = c_upper as usize;
diff --git a/extensions/algebra/moduli-macros/src/lib.rs b/extensions/algebra/moduli-macros/src/lib.rs
index fc30341195..0dc7128588 100644
--- a/extensions/algebra/moduli-macros/src/lib.rs
+++ b/extensions/algebra/moduli-macros/src/lib.rs
@@ -965,7 +965,6 @@ pub fn moduli_init(input: TokenStream) -> TokenStream {
     let ModuliDefine { items } = parse_macro_input!(input as ModuliDefine);
 
     let mut externs = Vec::new();
-    let mut openvm_section = Vec::new();
 
     // List of all modular limbs in one (that is, with a compile-time known size) array.
     let mut two_modular_limbs_flattened_list = Vec::<u8>::new();
@@ -976,8 +975,6 @@ pub fn moduli_init(input: TokenStream) -> TokenStream {
 
     for (mod_idx, item) in items.into_iter().enumerate() {
         let modulus = item.value();
-        println!("[init] modulus #{} = {}", mod_idx, modulus);
-
         let modulus_bytes = string_to_bytes(&modulus);
         let mut limbs = modulus_bytes.len();
         let mut block_size = 32;
@@ -1012,31 +1009,11 @@ pub fn moduli_init(input: TokenStream) -> TokenStream {
             .collect::<Vec<_>>()
             .join("");
 
-        let serialized_modulus =
-            core::iter::once(1) // 1 for "modulus"
-                .chain(core::iter::once(mod_idx as u8)) // mod_idx is u8 for now (can make it u32), because we don't know the order of
-                // variables in the elf
-                .chain((modulus_bytes.len() as u32).to_le_bytes().iter().copied())
-                .chain(modulus_bytes.iter().copied())
-                .collect::<Vec<_>>();
-        let serialized_name = syn::Ident::new(
-            &format!("OPENVM_SERIALIZED_MODULUS_{}", mod_idx),
-            span.into(),
-        );
-        let serialized_len = serialized_modulus.len();
         let setup_extern_func = syn::Ident::new(
             &format!("moduli_setup_extern_func_{}", modulus_hex),
             span.into(),
         );
 
-        openvm_section.push(quote::quote_spanned! { span.into() =>
-            #[cfg(target_os = "zkvm")]
-            #[link_section = ".openvm"]
-            #[no_mangle]
-            #[used]
-            static #serialized_name: [u8; #serialized_len] = [#(#serialized_modulus),*];
-        });
-
         for op_type in ["add", "sub", "mul", "div"] {
             let func_name = syn::Ident::new(
                 &format!("{}_extern_func_{}", op_type, modulus_hex),
@@ -1126,19 +1103,12 @@ pub fn moduli_init(input: TokenStream) -> TokenStream {
             extern "C" fn #setup_extern_func() {
                 #[cfg(target_os = "zkvm")]
                 {
-                    let mut ptr = 0;
-                    assert_eq!(super::#serialized_name[ptr], 1);
-                    ptr += 1;
-                    assert_eq!(super::#serialized_name[ptr], #mod_idx as u8);
-                    ptr += 1;
-                    assert_eq!(super::#serialized_name[ptr..ptr+4].iter().rev().fold(0, |acc, &x| acc * 256 + x as usize), #limbs);
-                    ptr += 4;
-                    let remaining = &super::#serialized_name[ptr..];
-
                     // To avoid importing #struct_name, we create a placeholder struct with the same size and alignment.
                     #[repr(C, align(#block_size))]
                     struct AlignedPlaceholder([u8; #limbs]);
 
+                    const MODULUS_BYTES: AlignedPlaceholder = AlignedPlaceholder([#(#modulus_bytes),*]);
+
                     // We are going to use the numeric representation of the `rs2` register to distinguish the chip to setup.
                     // The transpiler will transform this instruction, based on whether `rs2` is `x0`, `x1` or `x2`, into a `SETUP_ADDSUB`, `SETUP_MULDIV` or `SETUP_ISEQ` instruction.
                     let mut uninit: core::mem::MaybeUninit<AlignedPlaceholder> = core::mem::MaybeUninit::uninit();
@@ -1149,7 +1119,7 @@ pub fn moduli_init(input: TokenStream) -> TokenStream {
                             + #mod_idx
                                 * (::openvm_algebra_guest::ModArithBaseFunct7::MODULAR_ARITHMETIC_MAX_KINDS as usize),
                         rd = In uninit.as_mut_ptr(),
-                        rs1 = In remaining.as_ptr(),
+                        rs1 = In MODULUS_BYTES.0.as_ptr(),
                         rs2 = Const "x0" // will be parsed as 0 and therefore transpiled to SETUP_ADDMOD
                     );
                     openvm::platform::custom_insn_r!(
@@ -1159,7 +1129,7 @@ pub fn moduli_init(input: TokenStream) -> TokenStream {
                             + #mod_idx
                                 * (::openvm_algebra_guest::ModArithBaseFunct7::MODULAR_ARITHMETIC_MAX_KINDS as usize),
                         rd = In uninit.as_mut_ptr(),
-                        rs1 = In remaining.as_ptr(),
+                        rs1 = In MODULUS_BYTES.0.as_ptr(),
                         rs2 = Const "x1" // will be parsed as 1 and therefore transpiled to SETUP_MULDIV
                     );
                     unsafe {
@@ -1172,7 +1142,7 @@ pub fn moduli_init(input: TokenStream) -> TokenStream {
                                 + #mod_idx
                                     * (::openvm_algebra_guest::ModArithBaseFunct7::MODULAR_ARITHMETIC_MAX_KINDS as usize),
                             rd = InOut tmp,
-                            rs1 = In remaining.as_ptr(),
+                            rs1 = In MODULUS_BYTES.0.as_ptr(),
                             rs2 = Const "x2" // will be parsed as 2 and therefore transpiled to SETUP_ISEQ
                         );
                         // rd = inout(reg) is necessary because this instruction will write to `rd` register
@@ -1185,7 +1155,6 @@ pub fn moduli_init(input: TokenStream) -> TokenStream {
     let total_limbs_cnt = two_modular_limbs_flattened_list.len();
     let cnt_limbs_list_len = limb_list_borders.len();
     TokenStream::from(quote::quote_spanned! { span.into() =>
-        #(#openvm_section)*
         #[allow(non_snake_case)]
         #[cfg(target_os = "zkvm")]
         mod openvm_intrinsics_ffi {
diff --git a/extensions/algebra/tests/src/lib.rs b/extensions/algebra/tests/src/lib.rs
index 181f592544..6931c1608a 100644
--- a/extensions/algebra/tests/src/lib.rs
+++ b/extensions/algebra/tests/src/lib.rs
@@ -5,10 +5,11 @@ mod tests {
     use eyre::Result;
     use num_bigint::BigUint;
     use openvm_algebra_circuit::{
-        Fp2Extension, ModularExtension, Rv32ModularConfig, Rv32ModularWithFp2Config,
+        Fp2Extension, Rv32ModularConfig, Rv32ModularCpuBuilder, Rv32ModularWithFp2Config,
+        Rv32ModularWithFp2CpuBuilder,
     };
     use openvm_algebra_transpiler::{Fp2TranspilerExtension, ModularTranspilerExtension};
-    use openvm_circuit::{arch::SystemConfig, utils::air_test};
+    use openvm_circuit::utils::{air_test, test_system_config};
     use openvm_ecc_circuit::SECP256K1_CONFIG;
     use openvm_instructions::exe::VmExe;
     use openvm_rv32im_transpiler::{
@@ -20,11 +21,27 @@ mod tests {
 
     type F = BabyBear;
 
+    #[cfg(test)]
+    fn test_rv32modular_config(moduli: Vec<BigUint>) -> Rv32ModularConfig {
+        let mut config = Rv32ModularConfig::new(moduli);
+        config.system = test_system_config();
+        config
+    }
+
+    #[cfg(test)]
+    fn test_rv32modularwithfp2_config(
+        moduli_with_names: Vec<(String, BigUint)>,
+    ) -> Rv32ModularWithFp2Config {
+        let mut config = Rv32ModularWithFp2Config::new(moduli_with_names);
+        *config.as_mut() = test_system_config();
+        config
+    }
+
     #[test]
     fn test_moduli_setup() -> Result<()> {
         let moduli = ["4002409555221667393417789825735904156556882819939007885332058136124031650490837864442687629129015664037894272559787", "1000000000000000003", "2305843009213693951"]
             .map(|s| BigUint::from_str(s).unwrap());
-        let config = Rv32ModularConfig::new(moduli.to_vec());
+        let config = test_rv32modular_config(moduli.to_vec());
         let elf = build_example_program_at_path(get_programs_dir!(), "moduli_setup", &config)?;
         let openvm_exe = VmExe::from_elf(
             elf,
@@ -35,13 +52,13 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )?;
 
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_modular() -> Result<()> {
-        let config = Rv32ModularConfig::new(vec![SECP256K1_CONFIG.modulus.clone()]);
+        let config = test_rv32modular_config(vec![SECP256K1_CONFIG.modulus.clone()]);
         let elf = build_example_program_at_path(get_programs_dir!(), "little", &config)?;
         let openvm_exe = VmExe::from_elf(
             elf,
@@ -51,13 +68,13 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_complex_two_moduli() -> Result<()> {
-        let config = Rv32ModularWithFp2Config::new(vec![
+        let config = test_rv32modularwithfp2_config(vec![
             (
                 "Complex1".to_string(),
                 BigUint::from_str("998244353").unwrap(),
@@ -78,18 +95,14 @@ mod tests {
                 .with_extension(Fp2TranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularWithFp2CpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_complex_redundant_modulus() -> Result<()> {
         let config = Rv32ModularWithFp2Config {
-            system: SystemConfig::default().with_continuations(),
-            base: Default::default(),
-            mul: Default::default(),
-            io: Default::default(),
-            modular: ModularExtension::new(vec![
+            modular: test_rv32modular_config(vec![
                 BigUint::from_str("998244353").unwrap(),
                 BigUint::from_str("1000000007").unwrap(),
                 BigUint::from_str("1000000009").unwrap(),
@@ -114,13 +127,13 @@ mod tests {
                 .with_extension(Fp2TranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularWithFp2CpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_complex() -> Result<()> {
-        let config = Rv32ModularWithFp2Config::new(vec![(
+        let config = test_rv32modularwithfp2_config(vec![(
             "Complex".to_string(),
             SECP256K1_CONFIG.modulus.clone(),
         )]);
@@ -134,14 +147,14 @@ mod tests {
                 .with_extension(Fp2TranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularWithFp2CpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     #[should_panic]
     fn test_invalid_setup() {
-        let config = Rv32ModularConfig::new(vec![
+        let config = test_rv32modular_config(vec![
             BigUint::from_str("998244353").unwrap(),
             BigUint::from_str("1000000007").unwrap(),
         ]);
@@ -163,12 +176,12 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )
         .unwrap();
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
     }
 
     #[test]
     fn test_sqrt() -> Result<()> {
-        let config = Rv32ModularConfig::new(vec![SECP256K1_CONFIG.modulus.clone()]);
+        let config = test_rv32modular_config(vec![SECP256K1_CONFIG.modulus.clone()]);
         let elf = build_example_program_at_path(get_programs_dir!(), "sqrt", &config)?;
         let openvm_exe = VmExe::from_elf(
             elf,
@@ -178,7 +191,7 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 }
diff --git a/extensions/bigint/circuit/Cargo.toml b/extensions/bigint/circuit/Cargo.toml
index 09d68a9d1b..aa9114c34a 100644
--- a/extensions/bigint/circuit/Cargo.toml
+++ b/extensions/bigint/circuit/Cargo.toml
@@ -29,6 +29,8 @@ serde.workspace = true
 openvm-stark-sdk = { workspace = true }
 openvm-circuit = { workspace = true, features = ["test-utils"] }
 openvm-rv32-adapters = { workspace = true, features = ["test-utils"] }
+test-case.workspace = true
+alloy-primitives = { version = "1.2.1" }
 
 [features]
 default = ["parallel", "jemalloc"]
diff --git a/extensions/bigint/circuit/src/base_alu.rs b/extensions/bigint/circuit/src/base_alu.rs
new file mode 100644
index 0000000000..6444f601e1
--- /dev/null
+++ b/extensions/bigint/circuit/src/base_alu.rs
@@ -0,0 +1,239 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::transmute,
+};
+
+use openvm_bigint_transpiler::Rv32BaseAlu256Opcode;
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_rv32_adapters::Rv32HeapAdapterExecutor;
+use openvm_rv32im_circuit::BaseAluExecutor;
+use openvm_rv32im_transpiler::BaseAluOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::{Rv32BaseAlu256Executor, INT256_NUM_LIMBS};
+
+type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>;
+
+impl Rv32BaseAlu256Executor {
+    pub fn new(adapter: AdapterExecutor, offset: usize) -> Self {
+        Self(BaseAluExecutor::new(adapter, offset))
+    }
+}
+
+#[derive(AlignedBytesBorrow)]
+struct BaseAluPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl<F: PrimeField32> Executor<F> for Rv32BaseAlu256Executor {
+    fn pre_compute_size(&self) -> usize {
+        size_of::<BaseAluPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut BaseAluPreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = match local_opcode {
+            BaseAluOpcode::ADD => execute_e1_impl::<_, _, AddOp>,
+            BaseAluOpcode::SUB => execute_e1_impl::<_, _, SubOp>,
+            BaseAluOpcode::XOR => execute_e1_impl::<_, _, XorOp>,
+            BaseAluOpcode::OR => execute_e1_impl::<_, _, OrOp>,
+            BaseAluOpcode::AND => execute_e1_impl::<_, _, AndOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F: PrimeField32> MeteredExecutor<F> for Rv32BaseAlu256Executor {
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<BaseAluPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<BaseAluPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = match local_opcode {
+            BaseAluOpcode::ADD => execute_e2_impl::<_, _, AddOp>,
+            BaseAluOpcode::SUB => execute_e2_impl::<_, _, SubOp>,
+            BaseAluOpcode::XOR => execute_e2_impl::<_, _, XorOp>,
+            BaseAluOpcode::OR => execute_e2_impl::<_, _, OrOp>,
+            BaseAluOpcode::AND => execute_e2_impl::<_, _, AndOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: AluOp>(
+    pre_compute: &BaseAluPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.c as u32);
+    let rd_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+    let rs1 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr));
+    let rs2 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr));
+    let rd = <OP as AluOp>::compute(rs1, rs2);
+    vm_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd);
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: AluOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &BaseAluPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, OP>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, OP: AluOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<BaseAluPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, OP>(&pre_compute.data, vm_state);
+}
+
+impl Rv32BaseAlu256Executor {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut BaseAluPreCompute,
+    ) -> Result<BaseAluOpcode, StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = BaseAluPreCompute {
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            c: c.as_canonical_u32() as u8,
+        };
+        let local_opcode =
+            BaseAluOpcode::from_usize(opcode.local_opcode_idx(Rv32BaseAlu256Opcode::CLASS_OFFSET));
+        Ok(local_opcode)
+    }
+}
+
+trait AluOp {
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS];
+}
+struct AddOp;
+struct SubOp;
+struct XorOp;
+struct OrOp;
+struct AndOp;
+impl AluOp for AddOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS] {
+        let rs1_u64: [u64; 4] = unsafe { transmute(rs1) };
+        let rs2_u64: [u64; 4] = unsafe { transmute(rs2) };
+        let mut rd_u64 = [0u64; 4];
+        let (res, mut carry) = rs1_u64[0].overflowing_add(rs2_u64[0]);
+        rd_u64[0] = res;
+        for i in 1..4 {
+            let (res1, c1) = rs1_u64[i].overflowing_add(rs2_u64[i]);
+            let (res2, c2) = res1.overflowing_add(carry as u64);
+            carry = c1 || c2;
+            rd_u64[i] = res2;
+        }
+        unsafe { transmute(rd_u64) }
+    }
+}
+impl AluOp for SubOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS] {
+        let rs1_u64: [u64; 4] = unsafe { transmute(rs1) };
+        let rs2_u64: [u64; 4] = unsafe { transmute(rs2) };
+        let mut rd_u64 = [0u64; 4];
+        let (res, mut borrow) = rs1_u64[0].overflowing_sub(rs2_u64[0]);
+        rd_u64[0] = res;
+        for i in 1..4 {
+            let (res1, c1) = rs1_u64[i].overflowing_sub(rs2_u64[i]);
+            let (res2, c2) = res1.overflowing_sub(borrow as u64);
+            borrow = c1 || c2;
+            rd_u64[i] = res2;
+        }
+        unsafe { transmute(rd_u64) }
+    }
+}
+impl AluOp for XorOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS] {
+        let rs1_u64: [u64; 4] = unsafe { transmute(rs1) };
+        let rs2_u64: [u64; 4] = unsafe { transmute(rs2) };
+        let mut rd_u64 = [0u64; 4];
+        // Compiler will expand this loop.
+        for i in 0..4 {
+            rd_u64[i] = rs1_u64[i] ^ rs2_u64[i];
+        }
+        unsafe { transmute(rd_u64) }
+    }
+}
+impl AluOp for OrOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS] {
+        let rs1_u64: [u64; 4] = unsafe { transmute(rs1) };
+        let rs2_u64: [u64; 4] = unsafe { transmute(rs2) };
+        let mut rd_u64 = [0u64; 4];
+        // Compiler will expand this loop.
+        for i in 0..4 {
+            rd_u64[i] = rs1_u64[i] | rs2_u64[i];
+        }
+        unsafe { transmute(rd_u64) }
+    }
+}
+impl AluOp for AndOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS] {
+        let rs1_u64: [u64; 4] = unsafe { transmute(rs1) };
+        let rs2_u64: [u64; 4] = unsafe { transmute(rs2) };
+        let mut rd_u64 = [0u64; 4];
+        // Compiler will expand this loop.
+        for i in 0..4 {
+            rd_u64[i] = rs1_u64[i] & rs2_u64[i];
+        }
+        unsafe { transmute(rd_u64) }
+    }
+}
diff --git a/extensions/bigint/circuit/src/branch_eq.rs b/extensions/bigint/circuit/src/branch_eq.rs
new file mode 100644
index 0000000000..eab11ae362
--- /dev/null
+++ b/extensions/bigint/circuit/src/branch_eq.rs
@@ -0,0 +1,170 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_bigint_transpiler::Rv32BranchEqual256Opcode;
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_rv32_adapters::Rv32HeapBranchAdapterExecutor;
+use openvm_rv32im_circuit::BranchEqualExecutor;
+use openvm_rv32im_transpiler::BranchEqualOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::{Rv32BranchEqual256Executor, INT256_NUM_LIMBS};
+
+type AdapterExecutor = Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>;
+
+impl Rv32BranchEqual256Executor {
+    pub fn new(adapter_step: AdapterExecutor, offset: usize, pc_step: u32) -> Self {
+        Self(BranchEqualExecutor::new(adapter_step, offset, pc_step))
+    }
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct BranchEqPreCompute {
+    imm: isize,
+    a: u8,
+    b: u8,
+}
+
+impl<F: PrimeField32> Executor<F> for Rv32BranchEqual256Executor {
+    fn pre_compute_size(&self) -> usize {
+        size_of::<BranchEqPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut BranchEqPreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = match local_opcode {
+            BranchEqualOpcode::BEQ => execute_e1_impl::<_, _, false>,
+            BranchEqualOpcode::BNE => execute_e1_impl::<_, _, true>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F: PrimeField32> MeteredExecutor<F> for Rv32BranchEqual256Executor {
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<BranchEqPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<BranchEqPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = match local_opcode {
+            BranchEqualOpcode::BEQ => execute_e2_impl::<_, _, false>,
+            BranchEqualOpcode::BNE => execute_e2_impl::<_, _, true>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_NE: bool>(
+    pre_compute: &BranchEqPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+    let rs2_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs1 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr));
+    let rs2 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr));
+    let cmp_result = u256_eq(rs1, rs2);
+    if cmp_result ^ IS_NE {
+        vm_state.pc = (vm_state.pc as isize + pre_compute.imm) as u32;
+    } else {
+        vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    }
+
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_NE: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &BranchEqPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, IS_NE>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, const IS_NE: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<BranchEqPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, IS_NE>(&pre_compute.data, vm_state);
+}
+
+impl Rv32BranchEqual256Executor {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut BranchEqPreCompute,
+    ) -> Result<BranchEqualOpcode, StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let c = c.as_canonical_u32();
+        let imm = if F::ORDER_U32 - c < c {
+            -((F::ORDER_U32 - c) as isize)
+        } else {
+            c as isize
+        };
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = BranchEqPreCompute {
+            imm,
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+        };
+        let local_opcode = BranchEqualOpcode::from_usize(
+            opcode.local_opcode_idx(Rv32BranchEqual256Opcode::CLASS_OFFSET),
+        );
+        Ok(local_opcode)
+    }
+}
+
+fn u256_eq(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> bool {
+    let rs1_u64: [u64; 4] = unsafe { std::mem::transmute(rs1) };
+    let rs2_u64: [u64; 4] = unsafe { std::mem::transmute(rs2) };
+    for i in 0..4 {
+        if rs1_u64[i] != rs2_u64[i] {
+            return false;
+        }
+    }
+    true
+}
diff --git a/extensions/bigint/circuit/src/branch_lt.rs b/extensions/bigint/circuit/src/branch_lt.rs
new file mode 100644
index 0000000000..7a701d4812
--- /dev/null
+++ b/extensions/bigint/circuit/src/branch_lt.rs
@@ -0,0 +1,198 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_bigint_transpiler::Rv32BranchLessThan256Opcode;
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_rv32_adapters::Rv32HeapBranchAdapterExecutor;
+use openvm_rv32im_circuit::BranchLessThanExecutor;
+use openvm_rv32im_transpiler::BranchLessThanOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::{
+    common::{i256_lt, u256_lt},
+    Rv32BranchLessThan256Executor, INT256_NUM_LIMBS,
+};
+
+type AdapterExecutor = Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>;
+
+impl Rv32BranchLessThan256Executor {
+    pub fn new(adapter: AdapterExecutor, offset: usize) -> Self {
+        Self(BranchLessThanExecutor::new(adapter, offset))
+    }
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct BranchLtPreCompute {
+    imm: isize,
+    a: u8,
+    b: u8,
+}
+
+impl<F: PrimeField32> Executor<F> for Rv32BranchLessThan256Executor {
+    fn pre_compute_size(&self) -> usize {
+        size_of::<BranchLtPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut BranchLtPreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = match local_opcode {
+            BranchLessThanOpcode::BLT => execute_e1_impl::<_, _, BltOp>,
+            BranchLessThanOpcode::BLTU => execute_e1_impl::<_, _, BltuOp>,
+            BranchLessThanOpcode::BGE => execute_e1_impl::<_, _, BgeOp>,
+            BranchLessThanOpcode::BGEU => execute_e1_impl::<_, _, BgeuOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F: PrimeField32> MeteredExecutor<F> for Rv32BranchLessThan256Executor {
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<BranchLtPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<BranchLtPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = match local_opcode {
+            BranchLessThanOpcode::BLT => execute_e2_impl::<_, _, BltOp>,
+            BranchLessThanOpcode::BLTU => execute_e2_impl::<_, _, BltuOp>,
+            BranchLessThanOpcode::BGE => execute_e2_impl::<_, _, BgeOp>,
+            BranchLessThanOpcode::BGEU => execute_e2_impl::<_, _, BgeuOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: BranchLessThanOp>(
+    pre_compute: &BranchLtPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+    let rs2_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs1 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr));
+    let rs2 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr));
+    let cmp_result = OP::compute(rs1, rs2);
+    if cmp_result {
+        vm_state.pc = (vm_state.pc as isize + pre_compute.imm) as u32;
+    } else {
+        vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    }
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: BranchLessThanOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &BranchLtPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, OP>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, OP: BranchLessThanOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<BranchLtPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, OP>(&pre_compute.data, vm_state);
+}
+
+impl Rv32BranchLessThan256Executor {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut BranchLtPreCompute,
+    ) -> Result<BranchLessThanOpcode, StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let c = c.as_canonical_u32();
+        let imm = if F::ORDER_U32 - c < c {
+            -((F::ORDER_U32 - c) as isize)
+        } else {
+            c as isize
+        };
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = BranchLtPreCompute {
+            imm,
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+        };
+        let local_opcode = BranchLessThanOpcode::from_usize(
+            opcode.local_opcode_idx(Rv32BranchLessThan256Opcode::CLASS_OFFSET),
+        );
+        Ok(local_opcode)
+    }
+}
+
+trait BranchLessThanOp {
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> bool;
+}
+struct BltOp;
+struct BltuOp;
+struct BgeOp;
+struct BgeuOp;
+
+impl BranchLessThanOp for BltOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> bool {
+        i256_lt(rs1, rs2)
+    }
+}
+impl BranchLessThanOp for BltuOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> bool {
+        u256_lt(rs1, rs2)
+    }
+}
+impl BranchLessThanOp for BgeOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> bool {
+        !i256_lt(rs1, rs2)
+    }
+}
+impl BranchLessThanOp for BgeuOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> bool {
+        !u256_lt(rs1, rs2)
+    }
+}
diff --git a/extensions/bigint/circuit/src/common.rs b/extensions/bigint/circuit/src/common.rs
new file mode 100644
index 0000000000..14c49ce68c
--- /dev/null
+++ b/extensions/bigint/circuit/src/common.rs
@@ -0,0 +1,66 @@
+use crate::{INT256_NUM_LIMBS, RV32_CELL_BITS};
+
+#[inline(always)]
+pub(crate) fn u256_lt(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> bool {
+    let rs1_u64: [u64; 4] = unsafe { std::mem::transmute(rs1) };
+    let rs2_u64: [u64; 4] = unsafe { std::mem::transmute(rs2) };
+    for i in (0..4).rev() {
+        if rs1_u64[i] != rs2_u64[i] {
+            return rs1_u64[i] < rs2_u64[i];
+        }
+    }
+    false
+}
+
+#[inline(always)]
+pub(crate) fn i256_lt(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> bool {
+    // true for negative. false for positive
+    let rs1_sign = rs1[INT256_NUM_LIMBS - 1] >> (RV32_CELL_BITS - 1) == 1;
+    let rs2_sign = rs2[INT256_NUM_LIMBS - 1] >> (RV32_CELL_BITS - 1) == 1;
+    let rs1_u64: [u64; 4] = unsafe { std::mem::transmute(rs1) };
+    let rs2_u64: [u64; 4] = unsafe { std::mem::transmute(rs2) };
+    for i in (0..4).rev() {
+        if rs1_u64[i] != rs2_u64[i] {
+            return (rs1_u64[i] < rs2_u64[i]) ^ rs1_sign ^ rs2_sign;
+        }
+    }
+    false
+}
+
+#[cfg(test)]
+mod tests {
+    use alloy_primitives::{I256, U256};
+    use rand::{prelude::StdRng, Rng, SeedableRng};
+
+    use crate::{
+        common::{i256_lt, u256_lt},
+        INT256_NUM_LIMBS,
+    };
+
+    #[test]
+    fn test_u256_lt() {
+        let mut rng = StdRng::from_seed([42; 32]);
+        for _ in 0..10000 {
+            let limbs_a: [u64; 4] = rng.gen();
+            let limbs_b: [u64; 4] = rng.gen();
+            let a = U256::from_limbs(limbs_a);
+            let b = U256::from_limbs(limbs_b);
+            let a_u8: [u8; INT256_NUM_LIMBS] = unsafe { std::mem::transmute(limbs_a) };
+            let b_u8: [u8; INT256_NUM_LIMBS] = unsafe { std::mem::transmute(limbs_b) };
+            assert_eq!(u256_lt(a_u8, b_u8), a < b);
+        }
+    }
+    #[test]
+    fn test_i256_lt() {
+        let mut rng = StdRng::from_seed([42; 32]);
+        for _ in 0..10000 {
+            let limbs_a: [u64; 4] = rng.gen();
+            let limbs_b: [u64; 4] = rng.gen();
+            let a = I256::from_limbs(limbs_a);
+            let b = I256::from_limbs(limbs_b);
+            let a_u8: [u8; INT256_NUM_LIMBS] = unsafe { std::mem::transmute(limbs_a) };
+            let b_u8: [u8; INT256_NUM_LIMBS] = unsafe { std::mem::transmute(limbs_b) };
+            assert_eq!(i256_lt(a_u8, b_u8), a < b);
+        }
+    }
+}
diff --git a/extensions/bigint/circuit/src/extension.rs b/extensions/bigint/circuit/src/extension.rs
index 390b79cc63..1bbd9c80d7 100644
--- a/extensions/bigint/circuit/src/extension.rs
+++ b/extensions/bigint/circuit/src/extension.rs
@@ -1,3 +1,5 @@
+use std::sync::Arc;
+
 use derive_more::derive::From;
 use openvm_bigint_transpiler::{
     Rv32BaseAlu256Opcode, Rv32BranchEqual256Opcode, Rv32BranchLessThan256Opcode,
@@ -5,56 +7,35 @@ use openvm_bigint_transpiler::{
 };
 use openvm_circuit::{
     arch::{
-        InitFileGenerator, SystemConfig, SystemPort, VmExtension, VmInventory, VmInventoryBuilder,
-        VmInventoryError,
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutionBridge,
+        ExecutorInventoryBuilder, ExecutorInventoryError, RowMajorMatrixArena, VmCircuitExtension,
+        VmExecutionExtension, VmProverExtension,
     },
-    system::phantom::PhantomChip,
+    system::{memory::SharedMemoryHelper, SystemPort},
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor, VmConfig};
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
 use openvm_circuit_primitives::{
-    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
-    range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip},
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
+    range_tuple::{
+        RangeTupleCheckerAir, RangeTupleCheckerBus, RangeTupleCheckerChip,
+        SharedRangeTupleCheckerChip,
+    },
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
 use openvm_instructions::{program::DEFAULT_PC_STEP, LocalOpcode};
-use openvm_rv32im_circuit::{
-    Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32Io, Rv32IoExecutor, Rv32IoPeriphery, Rv32M,
-    Rv32MExecutor, Rv32MPeriphery,
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
 };
-use openvm_stark_backend::p3_field::PrimeField32;
 use serde::{Deserialize, Serialize};
 
 use crate::*;
 
-#[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
-pub struct Int256Rv32Config {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub rv32i: Rv32I,
-    #[extension]
-    pub rv32m: Rv32M,
-    #[extension]
-    pub io: Rv32Io,
-    #[extension]
-    pub bigint: Int256,
-}
-
-// Default implementation uses no init file
-impl InitFileGenerator for Int256Rv32Config {}
-
-impl Default for Int256Rv32Config {
-    fn default() -> Self {
-        Self {
-            system: SystemConfig::default().with_continuations(),
-            rv32i: Rv32I,
-            rv32m: Rv32M::default(),
-            io: Rv32Io,
-            bigint: Int256::default(),
-        }
-    }
-}
-
+// =================================== VM Extension Implementation =================================
 #[derive(Clone, Copy, Debug, Serialize, Deserialize)]
 pub struct Int256 {
     #[serde(default = "default_range_tuple_checker_sizes")]
@@ -73,172 +54,272 @@ fn default_range_tuple_checker_sizes() -> [u32; 2] {
     [1 << 8, 32 * (1 << 8)]
 }
 
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum Int256Executor<F: PrimeField32> {
-    BaseAlu256(Rv32BaseAlu256Chip<F>),
-    LessThan256(Rv32LessThan256Chip<F>),
-    BranchEqual256(Rv32BranchEqual256Chip<F>),
-    BranchLessThan256(Rv32BranchLessThan256Chip<F>),
-    Multiplication256(Rv32Multiplication256Chip<F>),
-    Shift256(Rv32Shift256Chip<F>),
+#[derive(Clone, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum Int256Executor {
+    BaseAlu256(Rv32BaseAlu256Executor),
+    LessThan256(Rv32LessThan256Executor),
+    BranchEqual256(Rv32BranchEqual256Executor),
+    BranchLessThan256(Rv32BranchLessThan256Executor),
+    Multiplication256(Rv32Multiplication256Executor),
+    Shift256(Rv32Shift256Executor),
 }
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum Int256Periphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    /// Only needed for multiplication extension
-    RangeTupleChecker(SharedRangeTupleCheckerChip<2>),
-    Phantom(PhantomChip<F>),
-}
+impl<F: PrimeField32> VmExecutionExtension<F> for Int256 {
+    type Executor = Int256Executor;
 
-impl<F: PrimeField32> VmExtension<F> for Int256 {
-    type Executor = Int256Executor<F>;
-    type Periphery = Int256Periphery<F>;
-
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
+        inventory: &mut ExecutorInventoryBuilder<F, Int256Executor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
+
+        let alu = Rv32BaseAlu256Executor::new(
+            Rv32HeapAdapterExecutor::new(pointer_max_bits),
+            Rv32BaseAlu256Opcode::CLASS_OFFSET,
+        );
+        inventory.add_executor(alu, Rv32BaseAlu256Opcode::iter().map(|x| x.global_opcode()))?;
+
+        let lt = Rv32LessThan256Executor::new(
+            Rv32HeapAdapterExecutor::new(pointer_max_bits),
+            Rv32LessThan256Opcode::CLASS_OFFSET,
+        );
+        inventory.add_executor(lt, Rv32LessThan256Opcode::iter().map(|x| x.global_opcode()))?;
+
+        let beq = Rv32BranchEqual256Executor::new(
+            Rv32HeapBranchAdapterExecutor::new(pointer_max_bits),
+            Rv32BranchEqual256Opcode::CLASS_OFFSET,
+            DEFAULT_PC_STEP,
+        );
+        inventory.add_executor(
+            beq,
+            Rv32BranchEqual256Opcode::iter().map(|x| x.global_opcode()),
+        )?;
+
+        let blt = Rv32BranchLessThan256Executor::new(
+            Rv32HeapBranchAdapterExecutor::new(pointer_max_bits),
+            Rv32BranchLessThan256Opcode::CLASS_OFFSET,
+        );
+        inventory.add_executor(
+            blt,
+            Rv32BranchLessThan256Opcode::iter().map(|x| x.global_opcode()),
+        )?;
+
+        let mult = Rv32Multiplication256Executor::new(
+            Rv32HeapAdapterExecutor::new(pointer_max_bits),
+            Rv32Mul256Opcode::CLASS_OFFSET,
+        );
+        inventory.add_executor(mult, Rv32Mul256Opcode::iter().map(|x| x.global_opcode()))?;
+
+        let shift = Rv32Shift256Executor::new(
+            Rv32HeapAdapterExecutor::new(pointer_max_bits),
+            Rv32Shift256Opcode::CLASS_OFFSET,
+        );
+        inventory.add_executor(shift, Rv32Shift256Opcode::iter().map(|x| x.global_opcode()))?;
+
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Int256 {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
         let SystemPort {
             execution_bus,
             program_bus,
             memory_bridge,
-        } = builder.system_port();
-        let range_checker_chip = builder.system_base().range_checker_chip.clone();
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
+        } = inventory.system().port();
+
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let range_checker = inventory.range_checker().bus;
+        let pointer_max_bits = inventory.pointer_max_bits();
+
+        let bitwise_lu = {
+            // A trick to get around Rust's borrow rules
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
         };
-        let offline_memory = builder.system_base().offline_memory();
-        let address_bits = builder.system_config().memory_config.pointer_max_bits;
-
-        let range_tuple_chip = if let Some(chip) = builder
-            .find_chip::<SharedRangeTupleCheckerChip<2>>()
-            .into_iter()
-            .find(|c| {
-                c.bus().sizes[0] >= self.range_tuple_checker_sizes[0]
-                    && c.bus().sizes[1] >= self.range_tuple_checker_sizes[1]
-            }) {
-            chip.clone()
-        } else {
-            let range_tuple_bus =
-                RangeTupleCheckerBus::new(builder.new_bus_idx(), self.range_tuple_checker_sizes);
-            let chip = SharedRangeTupleCheckerChip::new(range_tuple_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
+
+        let range_tuple_checker = {
+            let existing_air = inventory.find_air::<RangeTupleCheckerAir<2>>().find(|c| {
+                c.bus.sizes[0] >= self.range_tuple_checker_sizes[0]
+                    && c.bus.sizes[1] >= self.range_tuple_checker_sizes[1]
+            });
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = RangeTupleCheckerBus::new(
+                    inventory.new_bus_idx(),
+                    self.range_tuple_checker_sizes,
+                );
+                let air = RangeTupleCheckerAir { bus };
+                inventory.add_air(air);
+                air.bus
+            }
         };
 
-        let base_alu_chip = Rv32BaseAlu256Chip::new(
-            Rv32HeapAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
-            ),
-            BaseAluCoreChip::new(bitwise_lu_chip.clone(), Rv32BaseAlu256Opcode::CLASS_OFFSET),
-            offline_memory.clone(),
+        let alu = Rv32BaseAlu256Air::new(
+            Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits),
+            BaseAluCoreAir::new(bitwise_lu, Rv32BaseAlu256Opcode::CLASS_OFFSET),
         );
-        inventory.add_executor(
-            base_alu_chip,
-            Rv32BaseAlu256Opcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_air(alu);
+
+        let lt = Rv32LessThan256Air::new(
+            Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits),
+            LessThanCoreAir::new(bitwise_lu, Rv32LessThan256Opcode::CLASS_OFFSET),
+        );
+        inventory.add_air(lt);
+
+        let beq = Rv32BranchEqual256Air::new(
+            Rv32HeapBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits),
+            BranchEqualCoreAir::new(Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP),
+        );
+        inventory.add_air(beq);
+
+        let blt = Rv32BranchLessThan256Air::new(
+            Rv32HeapBranchAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits),
+            BranchLessThanCoreAir::new(bitwise_lu, Rv32BranchLessThan256Opcode::CLASS_OFFSET),
+        );
+        inventory.add_air(blt);
+
+        let mult = Rv32Multiplication256Air::new(
+            Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits),
+            MultiplicationCoreAir::new(range_tuple_checker, Rv32Mul256Opcode::CLASS_OFFSET),
+        );
+        inventory.add_air(mult);
+
+        let shift = Rv32Shift256Air::new(
+            Rv32HeapAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu, pointer_max_bits),
+            ShiftCoreAir::new(bitwise_lu, range_checker, Rv32Shift256Opcode::CLASS_OFFSET),
+        );
+        inventory.add_air(shift);
+
+        Ok(())
+    }
+}
+
+pub struct Int256CpuProverExt;
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, Int256> for Int256CpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        extension: &Int256,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+        let pointer_max_bits = inventory.airs().config().memory_config.pointer_max_bits;
+
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+
+        let range_tuple_checker = {
+            let existing_chip = inventory
+                .find_chip::<SharedRangeTupleCheckerChip<2>>()
+                .find(|c| {
+                    c.bus().sizes[0] >= extension.range_tuple_checker_sizes[0]
+                        && c.bus().sizes[1] >= extension.range_tuple_checker_sizes[1]
+                });
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &RangeTupleCheckerAir<2> = inventory.next_air()?;
+                let chip = SharedRangeTupleCheckerChip::new(RangeTupleCheckerChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
 
-        let less_than_chip = Rv32LessThan256Chip::new(
-            Rv32HeapAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
+        inventory.next_air::<Rv32BaseAlu256Air>()?;
+        let alu = Rv32BaseAlu256Chip::new(
+            BaseAluFiller::new(
+                Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()),
+                bitwise_lu.clone(),
+                Rv32BaseAlu256Opcode::CLASS_OFFSET,
             ),
-            LessThanCoreChip::new(bitwise_lu_chip.clone(), Rv32LessThan256Opcode::CLASS_OFFSET),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            less_than_chip,
-            Rv32LessThan256Opcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(alu);
 
-        let branch_equal_chip = Rv32BranchEqual256Chip::new(
-            Rv32HeapBranchAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
+        inventory.next_air::<Rv32LessThan256Air>()?;
+        let lt = Rv32LessThan256Chip::new(
+            LessThanFiller::new(
+                Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()),
+                bitwise_lu.clone(),
+                Rv32LessThan256Opcode::CLASS_OFFSET,
             ),
-            BranchEqualCoreChip::new(Rv32BranchEqual256Opcode::CLASS_OFFSET, DEFAULT_PC_STEP),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            branch_equal_chip,
-            Rv32BranchEqual256Opcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(lt);
 
-        let branch_less_than_chip = Rv32BranchLessThan256Chip::new(
-            Rv32HeapBranchAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
+        inventory.next_air::<Rv32BranchEqual256Air>()?;
+        let beq = Rv32BranchEqual256Chip::new(
+            BranchEqualFiller::new(
+                Rv32HeapBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()),
+                Rv32BranchEqual256Opcode::CLASS_OFFSET,
+                DEFAULT_PC_STEP,
             ),
-            BranchLessThanCoreChip::new(
-                bitwise_lu_chip.clone(),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(beq);
+
+        inventory.next_air::<Rv32BranchLessThan256Air>()?;
+        let blt = Rv32BranchLessThan256Chip::new(
+            BranchLessThanFiller::new(
+                Rv32HeapBranchAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()),
+                bitwise_lu.clone(),
                 Rv32BranchLessThan256Opcode::CLASS_OFFSET,
             ),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            branch_less_than_chip,
-            Rv32BranchLessThan256Opcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(blt);
 
-        let multiplication_chip = Rv32Multiplication256Chip::new(
-            Rv32HeapAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
+        inventory.next_air::<Rv32Multiplication256Air>()?;
+        let mult = Rv32Multiplication256Chip::new(
+            MultiplicationFiller::new(
+                Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()),
+                range_tuple_checker.clone(),
+                Rv32Mul256Opcode::CLASS_OFFSET,
             ),
-            MultiplicationCoreChip::new(range_tuple_chip, Rv32Mul256Opcode::CLASS_OFFSET),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            multiplication_chip,
-            Rv32Mul256Opcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(mult);
 
-        let shift_chip = Rv32Shift256Chip::new(
-            Rv32HeapAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                address_bits,
-                bitwise_lu_chip.clone(),
-            ),
-            ShiftCoreChip::new(
-                bitwise_lu_chip.clone(),
-                range_checker_chip,
+        inventory.next_air::<Rv32Shift256Air>()?;
+        let shift = Rv32Shift256Chip::new(
+            ShiftFiller::new(
+                Rv32HeapAdapterFiller::new(pointer_max_bits, bitwise_lu.clone()),
+                bitwise_lu.clone(),
+                range_checker.clone(),
                 Rv32Shift256Opcode::CLASS_OFFSET,
             ),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            shift_chip,
-            Rv32Shift256Opcode::iter().map(|x| x.global_opcode()),
-        )?;
-
-        Ok(inventory)
+        inventory.add_executor_chip(shift);
+        Ok(())
     }
 }
diff --git a/extensions/bigint/circuit/src/less_than.rs b/extensions/bigint/circuit/src/less_than.rs
new file mode 100644
index 0000000000..e153a6221e
--- /dev/null
+++ b/extensions/bigint/circuit/src/less_than.rs
@@ -0,0 +1,157 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_bigint_transpiler::Rv32LessThan256Opcode;
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_rv32_adapters::Rv32HeapAdapterExecutor;
+use openvm_rv32im_circuit::LessThanExecutor;
+use openvm_rv32im_transpiler::LessThanOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::{common, Rv32LessThan256Executor, INT256_NUM_LIMBS};
+
+type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>;
+
+impl Rv32LessThan256Executor {
+    pub fn new(adapter: AdapterExecutor, offset: usize) -> Self {
+        Self(LessThanExecutor::new(adapter, offset))
+    }
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct LessThanPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl<F: PrimeField32> Executor<F> for Rv32LessThan256Executor {
+    fn pre_compute_size(&self) -> usize {
+        size_of::<LessThanPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut LessThanPreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = match local_opcode {
+            LessThanOpcode::SLT => execute_e1_impl::<_, _, false>,
+            LessThanOpcode::SLTU => execute_e1_impl::<_, _, true>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F: PrimeField32> MeteredExecutor<F> for Rv32LessThan256Executor {
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<LessThanPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<LessThanPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = match local_opcode {
+            LessThanOpcode::SLT => execute_e2_impl::<_, _, false>,
+            LessThanOpcode::SLTU => execute_e2_impl::<_, _, true>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_U256: bool>(
+    pre_compute: &LessThanPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.c as u32);
+    let rd_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+    let rs1 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr));
+    let rs2 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr));
+    let cmp_result = if IS_U256 {
+        common::u256_lt(rs1, rs2)
+    } else {
+        common::i256_lt(rs1, rs2)
+    };
+    let mut rd = [0u8; INT256_NUM_LIMBS];
+    rd[0] = cmp_result as u8;
+    vm_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd);
+
+    vm_state.pc += DEFAULT_PC_STEP;
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_U256: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &LessThanPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, IS_U256>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, const IS_U256: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<LessThanPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, IS_U256>(&pre_compute.data, vm_state);
+}
+
+impl Rv32LessThan256Executor {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut LessThanPreCompute,
+    ) -> Result<LessThanOpcode, StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = LessThanPreCompute {
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            c: c.as_canonical_u32() as u8,
+        };
+        let local_opcode = LessThanOpcode::from_usize(
+            opcode.local_opcode_idx(Rv32LessThan256Opcode::CLASS_OFFSET),
+        );
+        Ok(local_opcode)
+    }
+}
diff --git a/extensions/bigint/circuit/src/lib.rs b/extensions/bigint/circuit/src/lib.rs
index 295ef73db2..0dd5a5b4d4 100644
--- a/extensions/bigint/circuit/src/lib.rs
+++ b/extensions/bigint/circuit/src/lib.rs
@@ -1,49 +1,230 @@
-use openvm_circuit::{self, arch::VmChipWrapper};
-use openvm_rv32_adapters::{Rv32HeapAdapterChip, Rv32HeapBranchAdapterChip};
+use openvm_circuit::{
+    self,
+    arch::{
+        AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, SystemConfig,
+        VmAirWrapper, VmBuilder, VmChipComplex, VmChipWrapper, VmProverExtension,
+    },
+    system::{SystemChipInventory, SystemCpuBuilder, SystemExecutor},
+};
+use openvm_circuit_derive::{PreflightExecutor, VmConfig};
+use openvm_rv32_adapters::{
+    Rv32HeapAdapterAir, Rv32HeapAdapterExecutor, Rv32HeapAdapterFiller, Rv32HeapBranchAdapterAir,
+    Rv32HeapBranchAdapterExecutor, Rv32HeapBranchAdapterFiller,
+};
 use openvm_rv32im_circuit::{
     adapters::{INT256_NUM_LIMBS, RV32_CELL_BITS},
-    BaseAluCoreChip, BranchEqualCoreChip, BranchLessThanCoreChip, LessThanCoreChip,
-    MultiplicationCoreChip, ShiftCoreChip,
+    BaseAluCoreAir, BaseAluExecutor, BaseAluFiller, BranchEqualCoreAir, BranchEqualExecutor,
+    BranchEqualFiller, BranchLessThanCoreAir, BranchLessThanExecutor, BranchLessThanFiller,
+    LessThanCoreAir, LessThanExecutor, LessThanFiller, MultiplicationCoreAir,
+    MultiplicationExecutor, MultiplicationFiller, Rv32I, Rv32IExecutor, Rv32ImCpuProverExt, Rv32Io,
+    Rv32IoExecutor, Rv32M, Rv32MExecutor, ShiftCoreAir, ShiftExecutor, ShiftFiller,
 };
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use serde::{Deserialize, Serialize};
 
 mod extension;
 pub use extension::*;
 
+mod base_alu;
+mod branch_eq;
+mod branch_lt;
+pub(crate) mod common;
+mod less_than;
+mod mult;
+mod shift;
 #[cfg(test)]
 mod tests;
 
+/// BaseAlu256
+pub type Rv32BaseAlu256Air = VmAirWrapper<
+    Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+    BaseAluCoreAir<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+>;
+#[derive(Clone, PreflightExecutor)]
+pub struct Rv32BaseAlu256Executor(
+    BaseAluExecutor<
+        Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
+);
 pub type Rv32BaseAlu256Chip<F> = VmChipWrapper<
     F,
-    Rv32HeapAdapterChip<F, 2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
-    BaseAluCoreChip<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+    BaseAluFiller<
+        Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
 >;
 
+/// LessThan256
+pub type Rv32LessThan256Air = VmAirWrapper<
+    Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+    LessThanCoreAir<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+>;
+#[derive(Clone, PreflightExecutor)]
+pub struct Rv32LessThan256Executor(
+    LessThanExecutor<
+        Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
+);
 pub type Rv32LessThan256Chip<F> = VmChipWrapper<
     F,
-    Rv32HeapAdapterChip<F, 2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
-    LessThanCoreChip<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+    LessThanFiller<
+        Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
 >;
 
+/// Multiplication256
+pub type Rv32Multiplication256Air = VmAirWrapper<
+    Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+    MultiplicationCoreAir<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+>;
+#[derive(Clone, PreflightExecutor)]
+pub struct Rv32Multiplication256Executor(
+    MultiplicationExecutor<
+        Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
+);
 pub type Rv32Multiplication256Chip<F> = VmChipWrapper<
     F,
-    Rv32HeapAdapterChip<F, 2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
-    MultiplicationCoreChip<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+    MultiplicationFiller<
+        Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
 >;
 
+/// Shift256
+pub type Rv32Shift256Air = VmAirWrapper<
+    Rv32HeapAdapterAir<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+    ShiftCoreAir<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+>;
+#[derive(Clone, PreflightExecutor)]
+pub struct Rv32Shift256Executor(
+    ShiftExecutor<
+        Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
+);
 pub type Rv32Shift256Chip<F> = VmChipWrapper<
     F,
-    Rv32HeapAdapterChip<F, 2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
-    ShiftCoreChip<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+    ShiftFiller<
+        Rv32HeapAdapterFiller<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
 >;
 
+/// BranchEqual256
+pub type Rv32BranchEqual256Air = VmAirWrapper<
+    Rv32HeapBranchAdapterAir<2, INT256_NUM_LIMBS>,
+    BranchEqualCoreAir<INT256_NUM_LIMBS>,
+>;
+#[derive(Clone, PreflightExecutor)]
+pub struct Rv32BranchEqual256Executor(
+    BranchEqualExecutor<Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>, INT256_NUM_LIMBS>,
+);
 pub type Rv32BranchEqual256Chip<F> = VmChipWrapper<
     F,
-    Rv32HeapBranchAdapterChip<F, 2, INT256_NUM_LIMBS>,
-    BranchEqualCoreChip<INT256_NUM_LIMBS>,
+    BranchEqualFiller<Rv32HeapBranchAdapterFiller<2, INT256_NUM_LIMBS>, INT256_NUM_LIMBS>,
 >;
 
+/// BranchLessThan256
+pub type Rv32BranchLessThan256Air = VmAirWrapper<
+    Rv32HeapBranchAdapterAir<2, INT256_NUM_LIMBS>,
+    BranchLessThanCoreAir<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+>;
+#[derive(Clone, PreflightExecutor)]
+pub struct Rv32BranchLessThan256Executor(
+    BranchLessThanExecutor<
+        Rv32HeapBranchAdapterExecutor<2, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
+);
 pub type Rv32BranchLessThan256Chip<F> = VmChipWrapper<
     F,
-    Rv32HeapBranchAdapterChip<F, 2, INT256_NUM_LIMBS>,
-    BranchLessThanCoreChip<INT256_NUM_LIMBS, RV32_CELL_BITS>,
+    BranchLessThanFiller<
+        Rv32HeapBranchAdapterFiller<2, INT256_NUM_LIMBS>,
+        INT256_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
 >;
+
+#[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
+pub struct Int256Rv32Config {
+    #[config(executor = "SystemExecutor<F>")]
+    pub system: SystemConfig,
+    #[extension]
+    pub rv32i: Rv32I,
+    #[extension]
+    pub rv32m: Rv32M,
+    #[extension]
+    pub io: Rv32Io,
+    #[extension]
+    pub bigint: Int256,
+}
+
+// Default implementation uses no init file
+impl InitFileGenerator for Int256Rv32Config {}
+
+impl Default for Int256Rv32Config {
+    fn default() -> Self {
+        Self {
+            system: SystemConfig::default(),
+            rv32i: Rv32I,
+            rv32m: Rv32M::default(),
+            io: Rv32Io,
+            bigint: Int256::default(),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct Int256Rv32CpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Int256Rv32CpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Int256Rv32Config;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Int256Rv32Config,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32i, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32m, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.io, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &Int256CpuProverExt,
+            &config.bigint,
+            inventory,
+        )?;
+        Ok(chip_complex)
+    }
+}
diff --git a/extensions/bigint/circuit/src/mult.rs b/extensions/bigint/circuit/src/mult.rs
new file mode 100644
index 0000000000..566c049264
--- /dev/null
+++ b/extensions/bigint/circuit/src/mult.rs
@@ -0,0 +1,184 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_bigint_transpiler::Rv32Mul256Opcode;
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_rv32_adapters::Rv32HeapAdapterExecutor;
+use openvm_rv32im_circuit::MultiplicationExecutor;
+use openvm_rv32im_transpiler::MulOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::{Rv32Multiplication256Executor, INT256_NUM_LIMBS};
+
+type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>;
+
+impl Rv32Multiplication256Executor {
+    pub fn new(adapter: AdapterExecutor, offset: usize) -> Self {
+        Self(MultiplicationExecutor::new(adapter, offset))
+    }
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct MultPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl<F: PrimeField32> Executor<F> for Rv32Multiplication256Executor {
+    fn pre_compute_size(&self) -> usize {
+        size_of::<MultPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut MultPreCompute = data.borrow_mut();
+        self.pre_compute_impl(pc, inst, data)?;
+        Ok(execute_e1_impl)
+    }
+}
+
+impl<F: PrimeField32> MeteredExecutor<F> for Rv32Multiplication256Executor {
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<MultPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<MultPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        self.pre_compute_impl(pc, inst, &mut data.data)?;
+        Ok(execute_e2_impl)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &MultPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.c as u32);
+    let rd_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+    let rs1 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr));
+    let rs2 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr));
+    let rd = u256_mul(rs1, rs2);
+    vm_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd);
+
+    vm_state.pc += DEFAULT_PC_STEP;
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &MultPreCompute = pre_compute.borrow();
+    execute_e12_impl(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<MultPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl(&pre_compute.data, vm_state);
+}
+
+impl Rv32Multiplication256Executor {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut MultPreCompute,
+    ) -> Result<(), StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        let local_opcode =
+            MulOpcode::from_usize(opcode.local_opcode_idx(Rv32Mul256Opcode::CLASS_OFFSET));
+        assert_eq!(local_opcode, MulOpcode::MUL);
+        *data = MultPreCompute {
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            c: c.as_canonical_u32() as u8,
+        };
+        Ok(())
+    }
+}
+
+#[inline(always)]
+pub(crate) fn u256_mul(
+    rs1: [u8; INT256_NUM_LIMBS],
+    rs2: [u8; INT256_NUM_LIMBS],
+) -> [u8; INT256_NUM_LIMBS] {
+    let rs1_u64: [u32; 8] = unsafe { std::mem::transmute(rs1) };
+    let rs2_u64: [u32; 8] = unsafe { std::mem::transmute(rs2) };
+    let mut rd = [0u32; 8];
+    for i in 0..8 {
+        let mut carry = 0u64;
+        for j in 0..(8 - i) {
+            let res = rs1_u64[i] as u64 * rs2_u64[j] as u64 + rd[i + j] as u64 + carry;
+            rd[i + j] = res as u32;
+            carry = res >> 32;
+        }
+    }
+    unsafe { std::mem::transmute(rd) }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloy_primitives::U256;
+    use rand::{prelude::StdRng, Rng, SeedableRng};
+
+    use crate::{mult::u256_mul, INT256_NUM_LIMBS};
+
+    #[test]
+    fn test_u256_mul() {
+        let mut rng = StdRng::from_seed([42; 32]);
+        for _ in 0..10000 {
+            let limbs_a: [u64; 4] = rng.gen();
+            let limbs_b: [u64; 4] = rng.gen();
+            let a = U256::from_limbs(limbs_a);
+            let b = U256::from_limbs(limbs_b);
+            let a_u8: [u8; INT256_NUM_LIMBS] = unsafe { std::mem::transmute(limbs_a) };
+            let b_u8: [u8; INT256_NUM_LIMBS] = unsafe { std::mem::transmute(limbs_b) };
+            assert_eq!(U256::from_le_bytes(u256_mul(a_u8, b_u8)), a.wrapping_mul(b));
+        }
+    }
+}
diff --git a/extensions/bigint/circuit/src/shift.rs b/extensions/bigint/circuit/src/shift.rs
new file mode 100644
index 0000000000..5b033ce832
--- /dev/null
+++ b/extensions/bigint/circuit/src/shift.rs
@@ -0,0 +1,258 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_bigint_transpiler::Rv32Shift256Opcode;
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_rv32_adapters::Rv32HeapAdapterExecutor;
+use openvm_rv32im_circuit::ShiftExecutor;
+use openvm_rv32im_transpiler::ShiftOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::{Rv32Shift256Executor, INT256_NUM_LIMBS};
+
+type AdapterExecutor = Rv32HeapAdapterExecutor<2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>;
+
+impl Rv32Shift256Executor {
+    pub fn new(adapter: AdapterExecutor, offset: usize) -> Self {
+        Self(ShiftExecutor::new(adapter, offset))
+    }
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct ShiftPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl<F: PrimeField32> Executor<F> for Rv32Shift256Executor {
+    fn pre_compute_size(&self) -> usize {
+        size_of::<ShiftPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut ShiftPreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = match local_opcode {
+            ShiftOpcode::SLL => execute_e1_impl::<_, _, SllOp>,
+            ShiftOpcode::SRA => execute_e1_impl::<_, _, SraOp>,
+            ShiftOpcode::SRL => execute_e1_impl::<_, _, SrlOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F: PrimeField32> MeteredExecutor<F> for Rv32Shift256Executor {
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<ShiftPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<ShiftPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = match local_opcode {
+            ShiftOpcode::SLL => execute_e2_impl::<_, _, SllOp>,
+            ShiftOpcode::SRA => execute_e2_impl::<_, _, SraOp>,
+            ShiftOpcode::SRL => execute_e2_impl::<_, _, SrlOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: ShiftOp>(
+    pre_compute: &ShiftPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.c as u32);
+    let rd_ptr = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+    let rs1 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs1_ptr));
+    let rs2 = vm_state.vm_read::<u8, INT256_NUM_LIMBS>(RV32_MEMORY_AS, u32::from_le_bytes(rs2_ptr));
+    let rd = OP::compute(rs1, rs2);
+    vm_state.vm_write(RV32_MEMORY_AS, u32::from_le_bytes(rd_ptr), &rd);
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: ShiftOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &ShiftPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, OP>(pre_compute, vm_state);
+}
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, OP: ShiftOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<ShiftPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, OP>(&pre_compute.data, vm_state);
+}
+
+impl Rv32Shift256Executor {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut ShiftPreCompute,
+    ) -> Result<ShiftOpcode, StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = ShiftPreCompute {
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            c: c.as_canonical_u32() as u8,
+        };
+        let local_opcode =
+            ShiftOpcode::from_usize(opcode.local_opcode_idx(Rv32Shift256Opcode::CLASS_OFFSET));
+        Ok(local_opcode)
+    }
+}
+
+trait ShiftOp {
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS];
+}
+struct SllOp;
+struct SrlOp;
+struct SraOp;
+impl ShiftOp for SllOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS] {
+        let rs1_u64: [u64; 4] = unsafe { std::mem::transmute(rs1) };
+        let rs2_u64: [u64; 4] = unsafe { std::mem::transmute(rs2) };
+        let mut rd = [0u64; 4];
+        // Only use the first 8 bits.
+        let shift = (rs2_u64[0] & 0xff) as u32;
+        let index_offset = (shift / u64::BITS) as usize;
+        let bit_offset = shift % u64::BITS;
+        let mut carry = 0u64;
+        for i in index_offset..4 {
+            let curr = rs1_u64[i - index_offset];
+            rd[i] = (curr << bit_offset) + carry;
+            if bit_offset > 0 {
+                carry = curr >> (u64::BITS - bit_offset);
+            }
+        }
+        unsafe { std::mem::transmute(rd) }
+    }
+}
+impl ShiftOp for SrlOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS] {
+        // Logical right shift - fill with 0
+        shift_right(rs1, rs2, 0)
+    }
+}
+impl ShiftOp for SraOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; INT256_NUM_LIMBS], rs2: [u8; INT256_NUM_LIMBS]) -> [u8; INT256_NUM_LIMBS] {
+        // Arithmetic right shift - fill with sign bit
+        if rs1[INT256_NUM_LIMBS - 1] & 0x80 > 0 {
+            shift_right(rs1, rs2, u64::MAX)
+        } else {
+            shift_right(rs1, rs2, 0)
+        }
+    }
+}
+
+#[inline(always)]
+fn shift_right(
+    rs1: [u8; INT256_NUM_LIMBS],
+    rs2: [u8; INT256_NUM_LIMBS],
+    init_value: u64,
+) -> [u8; INT256_NUM_LIMBS] {
+    let rs1_u64: [u64; 4] = unsafe { std::mem::transmute(rs1) };
+    let rs2_u64: [u64; 4] = unsafe { std::mem::transmute(rs2) };
+    let mut rd = [init_value; 4];
+    let shift = (rs2_u64[0] & 0xff) as u32;
+    let index_offset = (shift / u64::BITS) as usize;
+    let bit_offset = shift % u64::BITS;
+    let mut carry = if bit_offset > 0 {
+        init_value << (u64::BITS - bit_offset)
+    } else {
+        0
+    };
+    for i in (index_offset..4).rev() {
+        let curr = rs1_u64[i];
+        rd[i - index_offset] = (curr >> bit_offset) + carry;
+        if bit_offset > 0 {
+            carry = curr << (u64::BITS - bit_offset);
+        }
+    }
+    unsafe { std::mem::transmute(rd) }
+}
+
+#[cfg(test)]
+mod tests {
+    use alloy_primitives::U256;
+    use rand::{prelude::StdRng, Rng, SeedableRng};
+
+    use crate::{
+        shift::{ShiftOp, SllOp, SraOp, SrlOp},
+        INT256_NUM_LIMBS,
+    };
+
+    #[test]
+    fn test_shift_op() {
+        let mut rng = StdRng::from_seed([42; 32]);
+        for _ in 0..10000 {
+            let limbs_a: [u8; INT256_NUM_LIMBS] = rng.gen();
+            let mut limbs_b: [u8; INT256_NUM_LIMBS] = [0; INT256_NUM_LIMBS];
+            let shift: u8 = rng.gen();
+            limbs_b[0] = shift;
+            let a = U256::from_le_bytes(limbs_a);
+            {
+                let res = SllOp::compute(limbs_a, limbs_b);
+                assert_eq!(U256::from_le_bytes(res), a << shift);
+            }
+            {
+                let res = SraOp::compute(limbs_a, limbs_b);
+                assert_eq!(U256::from_le_bytes(res), a.arithmetic_shr(shift as usize));
+            }
+            {
+                let res = SrlOp::compute(limbs_a, limbs_b);
+                assert_eq!(U256::from_le_bytes(res), a >> shift);
+            }
+        }
+    }
+}
diff --git a/extensions/bigint/circuit/src/tests.rs b/extensions/bigint/circuit/src/tests.rs
index 0e26352410..f49de57339 100644
--- a/extensions/bigint/circuit/src/tests.rs
+++ b/extensions/bigint/circuit/src/tests.rs
@@ -1,183 +1,214 @@
+use std::sync::Arc;
+
 use openvm_bigint_transpiler::{
     Rv32BaseAlu256Opcode, Rv32BranchEqual256Opcode, Rv32BranchLessThan256Opcode,
     Rv32LessThan256Opcode, Rv32Mul256Opcode, Rv32Shift256Opcode,
 };
 use openvm_circuit::{
     arch::{
-        testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS, RANGE_TUPLE_CHECKER_BUS},
-        InstructionExecutor,
+        testing::{
+            TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS, RANGE_TUPLE_CHECKER_BUS,
+        },
+        MatrixRecordArena, PreflightExecutor,
     },
     utils::generate_long_number,
 };
 use openvm_circuit_primitives::{
-    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
-    range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip},
+    bitwise_op_lookup::{BitwiseOperationLookupBus, BitwiseOperationLookupChip},
+    range_tuple::{RangeTupleCheckerBus, RangeTupleCheckerChip, SharedRangeTupleCheckerChip},
+};
+use openvm_instructions::{
+    program::{DEFAULT_PC_STEP, PC_BITS},
+    riscv::RV32_CELL_BITS,
+    LocalOpcode,
 };
-use openvm_instructions::{program::PC_BITS, riscv::RV32_CELL_BITS, LocalOpcode};
 use openvm_rv32_adapters::{
-    rv32_heap_branch_default, rv32_write_heap_default, Rv32HeapAdapterChip,
-    Rv32HeapBranchAdapterChip,
+    rv32_heap_branch_default, rv32_write_heap_default, Rv32HeapAdapterAir, Rv32HeapAdapterExecutor,
+    Rv32HeapAdapterFiller, Rv32HeapBranchAdapterAir, Rv32HeapBranchAdapterExecutor,
+    Rv32HeapBranchAdapterFiller,
 };
 use openvm_rv32im_circuit::{
     adapters::{INT256_NUM_LIMBS, RV_B_TYPE_IMM_BITS},
-    BaseAluCoreChip, BranchEqualCoreChip, BranchLessThanCoreChip, LessThanCoreChip,
-    MultiplicationCoreChip, ShiftCoreChip,
+    BaseAluCoreAir, BaseAluFiller, BranchEqualCoreAir, BranchEqualFiller, BranchLessThanCoreAir,
+    BranchLessThanFiller, LessThanCoreAir, LessThanFiller, MultiplicationCoreAir,
+    MultiplicationFiller, ShiftCoreAir, ShiftFiller,
 };
 use openvm_rv32im_transpiler::{
-    BaseAluOpcode, BranchEqualOpcode, BranchLessThanOpcode, LessThanOpcode, ShiftOpcode,
+    BaseAluOpcode, BranchEqualOpcode, BranchLessThanOpcode, LessThanOpcode, MulOpcode, ShiftOpcode,
 };
 use openvm_stark_backend::p3_field::{FieldAlgebra, PrimeField32};
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::Rng;
-
-use super::{
-    Rv32BaseAlu256Chip, Rv32BranchEqual256Chip, Rv32BranchLessThan256Chip, Rv32LessThan256Chip,
-    Rv32Multiplication256Chip, Rv32Shift256Chip,
+use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
+
+use crate::{
+    Rv32BaseAlu256Air, Rv32BaseAlu256Chip, Rv32BaseAlu256Executor, Rv32BranchEqual256Air,
+    Rv32BranchEqual256Chip, Rv32BranchEqual256Executor, Rv32BranchLessThan256Air,
+    Rv32BranchLessThan256Chip, Rv32BranchLessThan256Executor, Rv32LessThan256Air,
+    Rv32LessThan256Chip, Rv32LessThan256Executor, Rv32Multiplication256Air,
+    Rv32Multiplication256Chip, Rv32Multiplication256Executor, Rv32Shift256Air, Rv32Shift256Chip,
+    Rv32Shift256Executor,
 };
 
 type F = BabyBear;
+const MAX_INS_CAPACITY: usize = 128;
+const ABS_MAX_BRANCH: i32 = 1 << (RV_B_TYPE_IMM_BITS - 1);
 
 #[allow(clippy::type_complexity)]
-fn run_int_256_rand_execute<E: InstructionExecutor<F>>(
-    opcode: usize,
-    num_ops: usize,
-    executor: &mut E,
+fn set_and_execute_rand<STEP, AIR, CHIP>(
     tester: &mut VmChipTestBuilder<F>,
+    harness: &mut TestChipHarness<F, STEP, AIR, CHIP>,
+    rng: &mut StdRng,
+    opcode: usize,
     branch_fn: Option<fn(usize, &[u32; INT256_NUM_LIMBS], &[u32; INT256_NUM_LIMBS]) -> bool>,
-) {
-    const ABS_MAX_BRANCH: i32 = 1 << (RV_B_TYPE_IMM_BITS - 1);
-
-    let mut rng = create_seeded_rng();
+) where
+    STEP: PreflightExecutor<F, MatrixRecordArena<F>>,
+{
     let branch = branch_fn.is_some();
 
-    for _ in 0..num_ops {
-        let b = generate_long_number::<INT256_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        let c = generate_long_number::<INT256_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        if branch {
-            let imm = rng.gen_range((-ABS_MAX_BRANCH)..ABS_MAX_BRANCH);
-            let instruction = rv32_heap_branch_default(
-                tester,
-                vec![b.map(F::from_canonical_u32)],
-                vec![c.map(F::from_canonical_u32)],
-                imm as isize,
-                opcode,
-            );
-
-            tester.execute_with_pc(
-                executor,
-                &instruction,
-                rng.gen_range((ABS_MAX_BRANCH as u32)..(1 << (PC_BITS - 1))),
-            );
-
-            let cmp_result = branch_fn.unwrap()(opcode, &b, &c);
-            let from_pc = tester.execution.last_from_pc().as_canonical_u32() as i32;
-            let to_pc = tester.execution.last_to_pc().as_canonical_u32() as i32;
-            assert_eq!(to_pc, from_pc + if cmp_result { imm } else { 4 });
-        } else {
-            let instruction = rv32_write_heap_default(
-                tester,
-                vec![b.map(F::from_canonical_u32)],
-                vec![c.map(F::from_canonical_u32)],
-                opcode,
-            );
-            tester.execute(executor, &instruction);
-        }
+    let b = generate_long_number::<INT256_NUM_LIMBS, RV32_CELL_BITS>(rng);
+    let c = generate_long_number::<INT256_NUM_LIMBS, RV32_CELL_BITS>(rng);
+    if branch {
+        let imm = rng.gen_range((-ABS_MAX_BRANCH)..ABS_MAX_BRANCH);
+        let instruction = rv32_heap_branch_default(
+            tester,
+            vec![b.map(F::from_canonical_u32)],
+            vec![c.map(F::from_canonical_u32)],
+            imm as isize,
+            opcode,
+        );
+
+        tester.execute_with_pc(
+            harness,
+            &instruction,
+            rng.gen_range((ABS_MAX_BRANCH as u32)..(1 << (PC_BITS - 1))),
+        );
+
+        let cmp_result = branch_fn.unwrap()(opcode, &b, &c);
+        let from_pc = tester.execution.last_from_pc().as_canonical_u32() as i32;
+        let to_pc = tester.execution.last_to_pc().as_canonical_u32() as i32;
+        assert_eq!(to_pc, from_pc + if cmp_result { imm } else { 4 });
+    } else {
+        let instruction = rv32_write_heap_default(
+            tester,
+            vec![b.map(F::from_canonical_u32)],
+            vec![c.map(F::from_canonical_u32)],
+            opcode,
+        );
+        tester.execute(harness, &instruction);
     }
 }
 
+#[test_case(BaseAluOpcode::ADD, 24)]
+#[test_case(BaseAluOpcode::SUB, 24)]
+#[test_case(BaseAluOpcode::XOR, 24)]
+#[test_case(BaseAluOpcode::OR, 24)]
+#[test_case(BaseAluOpcode::AND, 24)]
 fn run_alu_256_rand_test(opcode: BaseAluOpcode, num_ops: usize) {
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
+    let offset = Rv32BaseAlu256Opcode::CLASS_OFFSET;
 
-    let mut chip = Rv32BaseAlu256Chip::<F>::new(
-        Rv32HeapAdapterChip::<F, 2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let air = Rv32BaseAlu256Air::new(
+        Rv32HeapAdapterAir::new(
+            tester.execution_bridge(),
             tester.memory_bridge(),
+            bitwise_bus,
             tester.address_bits(),
+        ),
+        BaseAluCoreAir::new(bitwise_bus, offset),
+    );
+    let executor =
+        Rv32BaseAlu256Executor::new(Rv32HeapAdapterExecutor::new(tester.address_bits()), offset);
+    let chip = Rv32BaseAlu256Chip::new(
+        BaseAluFiller::new(
+            Rv32HeapAdapterFiller::new(tester.address_bits(), bitwise_chip.clone()),
             bitwise_chip.clone(),
+            offset,
         ),
-        BaseAluCoreChip::new(bitwise_chip.clone(), Rv32BaseAlu256Opcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
+        tester.memory_helper(),
     );
+    let mut harness = TestChipHarness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-    run_int_256_rand_execute(
-        opcode.local_usize() + Rv32BaseAlu256Opcode::CLASS_OFFSET,
-        num_ops,
-        &mut chip,
-        &mut tester,
-        None,
-    );
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    for _ in 0..num_ops {
+        set_and_execute_rand(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode.local_usize() + offset,
+            None,
+        );
+    }
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery((bitwise_chip.air, bitwise_chip))
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn alu_256_add_rand_test() {
-    run_alu_256_rand_test(BaseAluOpcode::ADD, 24);
-}
-
-#[test]
-fn alu_256_sub_rand_test() {
-    run_alu_256_rand_test(BaseAluOpcode::SUB, 24);
-}
-
-#[test]
-fn alu_256_xor_rand_test() {
-    run_alu_256_rand_test(BaseAluOpcode::XOR, 24);
-}
-
-#[test]
-fn alu_256_or_rand_test() {
-    run_alu_256_rand_test(BaseAluOpcode::OR, 24);
-}
-
-#[test]
-fn alu_256_and_rand_test() {
-    run_alu_256_rand_test(BaseAluOpcode::AND, 24);
-}
-
+#[test_case(LessThanOpcode::SLT, 24)]
+#[test_case(LessThanOpcode::SLTU, 24)]
 fn run_lt_256_rand_test(opcode: LessThanOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let offset = Rv32LessThan256Opcode::CLASS_OFFSET;
+
     let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
 
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32LessThan256Chip::<F>::new(
-        Rv32HeapAdapterChip::<F, 2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
+    let air = Rv32LessThan256Air::new(
+        Rv32HeapAdapterAir::new(
+            tester.execution_bridge(),
             tester.memory_bridge(),
+            bitwise_bus,
             tester.address_bits(),
-            bitwise_chip.clone(),
         ),
-        LessThanCoreChip::new(bitwise_chip.clone(), Rv32LessThan256Opcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
+        LessThanCoreAir::new(bitwise_bus, offset),
     );
 
-    run_int_256_rand_execute(
-        opcode.local_usize() + Rv32LessThan256Opcode::CLASS_OFFSET,
-        num_ops,
-        &mut chip,
-        &mut tester,
-        None,
+    let executor =
+        Rv32LessThan256Executor::new(Rv32HeapAdapterExecutor::new(tester.address_bits()), offset);
+    let chip = Rv32LessThan256Chip::new(
+        LessThanFiller::new(
+            Rv32HeapAdapterFiller::new(tester.address_bits(), bitwise_chip.clone()),
+            bitwise_chip.clone(),
+            offset,
+        ),
+        tester.memory_helper(),
     );
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
+    let mut harness = TestChipHarness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-#[test]
-fn lt_256_slt_rand_test() {
-    run_lt_256_rand_test(LessThanOpcode::SLT, 24);
+    for _ in 0..num_ops {
+        set_and_execute_rand(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode.local_usize() + offset,
+            None,
+        );
+    }
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery((bitwise_chip.air, bitwise_chip))
+        .finalize();
+    tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn lt_256_sltu_rand_test() {
-    run_lt_256_rand_test(LessThanOpcode::SLTU, 24);
-}
+#[test_case(MulOpcode::MUL, 24)]
+fn run_mul_256_rand_test(opcode: MulOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let offset = Rv32Mul256Opcode::CLASS_OFFSET;
 
-fn run_mul_256_rand_test(num_ops: usize) {
     let range_tuple_bus = RangeTupleCheckerBus::new(
         RANGE_TUPLE_CHECKER_BUS,
         [
@@ -185,106 +216,143 @@ fn run_mul_256_rand_test(num_ops: usize) {
             (INT256_NUM_LIMBS * (1 << RV32_CELL_BITS)) as u32,
         ],
     );
-    let range_tuple_checker = SharedRangeTupleCheckerChip::new(range_tuple_bus);
+    let range_tuple_chip =
+        SharedRangeTupleCheckerChip::new(RangeTupleCheckerChip::<2>::new(range_tuple_bus));
     let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
 
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32Multiplication256Chip::<F>::new(
-        Rv32HeapAdapterChip::<F, 2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
+    let air = Rv32Multiplication256Air::new(
+        Rv32HeapAdapterAir::new(
+            tester.execution_bridge(),
             tester.memory_bridge(),
+            bitwise_bus,
             tester.address_bits(),
-            bitwise_chip.clone(),
         ),
-        MultiplicationCoreChip::new(range_tuple_checker.clone(), Rv32Mul256Opcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
+        MultiplicationCoreAir::new(range_tuple_bus, offset),
     );
-
-    run_int_256_rand_execute(
-        Rv32Mul256Opcode::CLASS_OFFSET,
-        num_ops,
-        &mut chip,
-        &mut tester,
-        None,
+    let executor = Rv32Multiplication256Executor::new(
+        Rv32HeapAdapterExecutor::new(tester.address_bits()),
+        offset,
+    );
+    let chip = Rv32Multiplication256Chip::<F>::new(
+        MultiplicationFiller::new(
+            Rv32HeapAdapterFiller::new(tester.address_bits(), bitwise_chip.clone()),
+            range_tuple_chip.clone(),
+            offset,
+        ),
+        tester.memory_helper(),
     );
+    let mut harness = TestChipHarness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    for _ in 0..num_ops {
+        set_and_execute_rand(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode.local_usize() + offset,
+            None,
+        );
+    }
     let tester = tester
         .build()
-        .load(chip)
-        .load(range_tuple_checker)
-        .load(bitwise_chip)
+        .load(harness)
+        .load_periphery((range_tuple_chip.air, range_tuple_chip))
+        .load_periphery((bitwise_chip.air, bitwise_chip))
         .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn mul_256_rand_test() {
-    run_mul_256_rand_test(24);
-}
-
+#[test_case(ShiftOpcode::SLL, 24)]
+#[test_case(ShiftOpcode::SRL, 24)]
+#[test_case(ShiftOpcode::SRA, 24)]
 fn run_shift_256_rand_test(opcode: ShiftOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let offset = Rv32Shift256Opcode::CLASS_OFFSET;
+
+    let range_checker_chip = tester.range_checker();
     let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
 
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32Shift256Chip::<F>::new(
-        Rv32HeapAdapterChip::<F, 2, INT256_NUM_LIMBS, INT256_NUM_LIMBS>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
+    let air = Rv32Shift256Air::new(
+        Rv32HeapAdapterAir::new(
+            tester.execution_bridge(),
             tester.memory_bridge(),
+            bitwise_bus,
             tester.address_bits(),
-            bitwise_chip.clone(),
         ),
-        ShiftCoreChip::new(
+        ShiftCoreAir::new(bitwise_bus, range_checker_chip.bus(), offset),
+    );
+    let executor =
+        Rv32Shift256Executor::new(Rv32HeapAdapterExecutor::new(tester.address_bits()), offset);
+    let chip = Rv32Shift256Chip::new(
+        ShiftFiller::new(
+            Rv32HeapAdapterFiller::new(tester.address_bits(), bitwise_chip.clone()),
             bitwise_chip.clone(),
-            tester.memory_controller().borrow().range_checker.clone(),
-            Rv32Shift256Opcode::CLASS_OFFSET,
+            range_checker_chip.clone(),
+            offset,
         ),
-        tester.offline_memory_mutex_arc(),
+        tester.memory_helper(),
     );
 
-    run_int_256_rand_execute(
-        opcode.local_usize() + Rv32Shift256Opcode::CLASS_OFFSET,
-        num_ops,
-        &mut chip,
-        &mut tester,
-        None,
-    );
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
-
-#[test]
-fn shift_256_sll_rand_test() {
-    run_shift_256_rand_test(ShiftOpcode::SLL, 24);
-}
+    let mut harness = TestChipHarness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-#[test]
-fn shift_256_srl_rand_test() {
-    run_shift_256_rand_test(ShiftOpcode::SRL, 24);
-}
+    for _ in 0..num_ops {
+        set_and_execute_rand(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode.local_usize() + offset,
+            None,
+        );
+    }
 
-#[test]
-fn shift_256_sra_rand_test() {
-    run_shift_256_rand_test(ShiftOpcode::SRA, 24);
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery((bitwise_chip.air, bitwise_chip))
+        .finalize();
+    tester.simple_test().expect("Verification failed");
 }
 
+#[test_case(BranchEqualOpcode::BEQ, 24)]
+#[test_case(BranchEqualOpcode::BNE, 24)]
 fn run_beq_256_rand_test(opcode: BranchEqualOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
+    let offset = Rv32BranchEqual256Opcode::CLASS_OFFSET;
+
     let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let mut chip = Rv32BranchEqual256Chip::<F>::new(
-        Rv32HeapBranchAdapterChip::<F, 2, INT256_NUM_LIMBS>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+    let air = Rv32BranchEqual256Air::new(
+        Rv32HeapBranchAdapterAir::new(
+            tester.execution_bridge(),
             tester.memory_bridge(),
+            bitwise_bus,
             tester.address_bits(),
-            bitwise_chip.clone(),
         ),
-        BranchEqualCoreChip::new(Rv32BranchEqual256Opcode::CLASS_OFFSET, 4),
-        tester.offline_memory_mutex_arc(),
+        BranchEqualCoreAir::new(offset, DEFAULT_PC_STEP),
+    );
+    let executor = Rv32BranchEqual256Executor::new(
+        Rv32HeapBranchAdapterExecutor::new(tester.address_bits()),
+        offset,
+        DEFAULT_PC_STEP,
     );
+    let chip = Rv32BranchEqual256Chip::new(
+        BranchEqualFiller::new(
+            Rv32HeapBranchAdapterFiller::new(tester.address_bits(), bitwise_chip.clone()),
+            offset,
+            DEFAULT_PC_STEP,
+        ),
+        tester.memory_helper(),
+    );
+    let mut harness = TestChipHarness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
     let branch_fn = |opcode: usize, x: &[u32; INT256_NUM_LIMBS], y: &[u32; INT256_NUM_LIMBS]| {
         x.iter()
@@ -294,93 +362,94 @@ fn run_beq_256_rand_test(opcode: BranchEqualOpcode, num_ops: usize) {
                 == BranchEqualOpcode::BNE.local_usize() + Rv32BranchEqual256Opcode::CLASS_OFFSET)
     };
 
-    run_int_256_rand_execute(
-        opcode.local_usize() + Rv32BranchEqual256Opcode::CLASS_OFFSET,
-        num_ops,
-        &mut chip,
-        &mut tester,
-        Some(branch_fn),
-    );
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    for _ in 0..num_ops {
+        set_and_execute_rand(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode.local_usize() + offset,
+            Some(branch_fn),
+        );
+    }
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery((bitwise_chip.air, bitwise_chip))
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn beq_256_beq_rand_test() {
-    run_beq_256_rand_test(BranchEqualOpcode::BEQ, 24);
-}
-
-#[test]
-fn beq_256_bne_rand_test() {
-    run_beq_256_rand_test(BranchEqualOpcode::BNE, 24);
-}
-
+#[test_case(BranchLessThanOpcode::BLT, 24)]
+#[test_case(BranchLessThanOpcode::BLTU, 24)]
+#[test_case(BranchLessThanOpcode::BGE, 24)]
+#[test_case(BranchLessThanOpcode::BGEU, 24)]
 fn run_blt_256_rand_test(opcode: BranchLessThanOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let offset = Rv32BranchLessThan256Opcode::CLASS_OFFSET;
+
     let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
 
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32BranchLessThan256Chip::<F>::new(
-        Rv32HeapBranchAdapterChip::<F, 2, INT256_NUM_LIMBS>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
+    let air = Rv32BranchLessThan256Air::new(
+        Rv32HeapBranchAdapterAir::new(
+            tester.execution_bridge(),
             tester.memory_bridge(),
+            bitwise_bus,
             tester.address_bits(),
-            bitwise_chip.clone(),
         ),
-        BranchLessThanCoreChip::new(
+        BranchLessThanCoreAir::new(bitwise_bus, offset),
+    );
+    let executor = Rv32BranchLessThan256Executor::new(
+        Rv32HeapBranchAdapterExecutor::new(tester.address_bits()),
+        offset,
+    );
+    let chip = Rv32BranchLessThan256Chip::new(
+        BranchLessThanFiller::new(
+            Rv32HeapBranchAdapterFiller::new(tester.address_bits(), bitwise_chip.clone()),
             bitwise_chip.clone(),
-            Rv32BranchLessThan256Opcode::CLASS_OFFSET,
+            offset,
         ),
-        tester.offline_memory_mutex_arc(),
+        tester.memory_helper(),
     );
+    let mut harness = TestChipHarness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-    let branch_fn = |opcode: usize, x: &[u32; INT256_NUM_LIMBS], y: &[u32; INT256_NUM_LIMBS]| {
-        let opcode =
-            BranchLessThanOpcode::from_usize(opcode - Rv32BranchLessThan256Opcode::CLASS_OFFSET);
-        let (is_ge, is_signed) = match opcode {
-            BranchLessThanOpcode::BLT => (false, true),
-            BranchLessThanOpcode::BLTU => (false, false),
-            BranchLessThanOpcode::BGE => (true, true),
-            BranchLessThanOpcode::BGEU => (true, false),
-        };
-        let x_sign = x[INT256_NUM_LIMBS - 1] >> (RV32_CELL_BITS - 1) != 0 && is_signed;
-        let y_sign = y[INT256_NUM_LIMBS - 1] >> (RV32_CELL_BITS - 1) != 0 && is_signed;
-        for (x, y) in x.iter().rev().zip(y.iter().rev()) {
-            if x != y {
-                return (x < y) ^ x_sign ^ y_sign ^ is_ge;
+    let branch_fn =
+        |opcode: usize, x: &[u32; INT256_NUM_LIMBS], y: &[u32; INT256_NUM_LIMBS]| -> bool {
+            let opcode = BranchLessThanOpcode::from_usize(
+                opcode - Rv32BranchLessThan256Opcode::CLASS_OFFSET,
+            );
+            let (is_ge, is_signed) = match opcode {
+                BranchLessThanOpcode::BLT => (false, true),
+                BranchLessThanOpcode::BLTU => (false, false),
+                BranchLessThanOpcode::BGE => (true, true),
+                BranchLessThanOpcode::BGEU => (true, false),
+            };
+            let x_sign = x[INT256_NUM_LIMBS - 1] >> (RV32_CELL_BITS - 1) != 0 && is_signed;
+            let y_sign = y[INT256_NUM_LIMBS - 1] >> (RV32_CELL_BITS - 1) != 0 && is_signed;
+            for (x, y) in x.iter().rev().zip(y.iter().rev()) {
+                if x != y {
+                    return (x < y) ^ x_sign ^ y_sign ^ is_ge;
+                }
             }
-        }
-        is_ge
-    };
+            is_ge
+        };
 
-    run_int_256_rand_execute(
-        opcode.local_usize() + Rv32BranchLessThan256Opcode::CLASS_OFFSET,
-        num_ops,
-        &mut chip,
-        &mut tester,
-        Some(branch_fn),
-    );
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    for _ in 0..num_ops {
+        set_and_execute_rand(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode.local_usize() + offset,
+            Some(branch_fn),
+        );
+    }
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery((bitwise_chip.air, bitwise_chip))
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
-
-#[test]
-fn blt_256_blt_rand_test() {
-    run_blt_256_rand_test(BranchLessThanOpcode::BLT, 24);
-}
-
-#[test]
-fn blt_256_bltu_rand_test() {
-    run_blt_256_rand_test(BranchLessThanOpcode::BLTU, 24);
-}
-
-#[test]
-fn blt_256_bge_rand_test() {
-    run_blt_256_rand_test(BranchLessThanOpcode::BGE, 24);
-}
-
-#[test]
-fn blt_256_bgeu_rand_test() {
-    run_blt_256_rand_test(BranchLessThanOpcode::BGEU, 24);
-}
diff --git a/extensions/ecc/circuit/Cargo.toml b/extensions/ecc/circuit/Cargo.toml
index dca4fb91e9..a194b5ac5a 100644
--- a/extensions/ecc/circuit/Cargo.toml
+++ b/extensions/ecc/circuit/Cargo.toml
@@ -8,14 +8,12 @@ homepage.workspace = true
 repository.workspace = true
 
 [dependencies]
-openvm-circuit-primitives-derive = { workspace = true }
 openvm-circuit-primitives = { workspace = true }
 openvm-circuit-derive = { workspace = true }
 openvm-circuit = { workspace = true }
 openvm-instructions = { workspace = true }
 openvm-mod-circuit-builder = { workspace = true }
 openvm-stark-backend = { workspace = true }
-openvm-rv32im-circuit = { workspace = true }
 openvm-algebra-circuit = { workspace = true }
 openvm-rv32-adapters = { workspace = true }
 openvm-ecc-transpiler = { workspace = true }
@@ -23,17 +21,23 @@ openvm-ecc-transpiler = { workspace = true }
 num-bigint = { workspace = true }
 num-traits = { workspace = true }
 strum = { workspace = true }
-derive_more = { workspace = true }
+derive_more = { workspace = true, features = ["deref", "deref_mut"] }
 derive-new = { workspace = true }
 once_cell = { workspace = true, features = ["std"] }
+rand = { workspace = true }
 serde = { workspace = true }
 serde_with = { workspace = true }
 lazy_static = { workspace = true }
 hex-literal = { workspace = true }
+halo2curves-axiom = { workspace = true }
 
 [dev-dependencies]
+openvm-pairing-guest = { workspace = true, features = ["halo2curves"] }
 openvm-stark-sdk = { workspace = true }
 openvm-mod-circuit-builder = { workspace = true, features = ["test-utils"] }
 openvm-circuit = { workspace = true, features = ["test-utils"] }
 openvm-rv32-adapters = { workspace = true, features = ["test-utils"] }
 lazy_static = { workspace = true }
+
+[package.metadata.cargo-shear]
+ignored = ["rand"]
diff --git a/extensions/ecc/circuit/src/config.rs b/extensions/ecc/circuit/src/config.rs
index a959938be9..cb555bdba9 100644
--- a/extensions/ecc/circuit/src/config.rs
+++ b/extensions/ecc/circuit/src/config.rs
@@ -1,24 +1,28 @@
-use openvm_algebra_circuit::*;
-use openvm_circuit::arch::{InitFileGenerator, SystemConfig};
+use std::result::Result;
+
+use openvm_algebra_circuit::{Rv32ModularConfig, Rv32ModularConfigExecutor, Rv32ModularCpuBuilder};
+use openvm_circuit::{
+    arch::{
+        AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, SystemConfig,
+        VmBuilder, VmChipComplex, VmProverExtension,
+    },
+    system::SystemChipInventory,
+};
 use openvm_circuit_derive::VmConfig;
-use openvm_rv32im_circuit::*;
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
 use serde::{Deserialize, Serialize};
 
 use super::*;
 
 #[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
 pub struct Rv32WeierstrassConfig {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub base: Rv32I,
-    #[extension]
-    pub mul: Rv32M,
-    #[extension]
-    pub io: Rv32Io,
-    #[extension]
-    pub modular: ModularExtension,
+    #[config(generics = true)]
+    pub modular: Rv32ModularConfig,
     #[extension]
     pub weierstrass: WeierstrassExtension,
 }
@@ -30,11 +34,7 @@ impl Rv32WeierstrassConfig {
             .flat_map(|c| [c.modulus.clone(), c.scalar.clone()])
             .collect();
         Self {
-            system: SystemConfig::default().with_continuations(),
-            base: Default::default(),
-            mul: Default::default(),
-            io: Default::default(),
-            modular: ModularExtension::new(primes),
+            modular: Rv32ModularConfig::new(primes),
             weierstrass: WeierstrassExtension::new(curves),
         }
     }
@@ -44,8 +44,41 @@ impl InitFileGenerator for Rv32WeierstrassConfig {
     fn generate_init_file_contents(&self) -> Option<String> {
         Some(format!(
             "// This file is automatically generated by cargo openvm. Do not rename or edit.\n{}\n{}\n",
-            self.modular.generate_moduli_init(),
+            self.modular.modular.generate_moduli_init(),
             self.weierstrass.generate_sw_init()
         ))
     }
 }
+
+#[derive(Clone)]
+pub struct Rv32WeierstrassCpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Rv32WeierstrassCpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Rv32WeierstrassConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Self::VmConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&Rv32ModularCpuBuilder, &config.modular, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &EccCpuProverExt,
+            &config.weierstrass,
+            inventory,
+        )?;
+        Ok(chip_complex)
+    }
+}
diff --git a/extensions/ecc/circuit/src/lib.rs b/extensions/ecc/circuit/src/lib.rs
index c1ec864636..9986dca696 100644
--- a/extensions/ecc/circuit/src/lib.rs
+++ b/extensions/ecc/circuit/src/lib.rs
@@ -6,3 +6,5 @@ pub use weierstrass_extension::*;
 
 mod config;
 pub use config::*;
+
+pub struct EccCpuProverExt;
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne.rs
deleted file mode 100644
index 24bcc52ef3..0000000000
--- a/extensions/ecc/circuit/src/weierstrass_chip/add_ne.rs
+++ /dev/null
@@ -1,28 +0,0 @@
-use std::{cell::RefCell, rc::Rc};
-
-use openvm_circuit_primitives::var_range::VariableRangeCheckerBus;
-use openvm_mod_circuit_builder::{ExprBuilder, ExprBuilderConfig, FieldExpr};
-
-// Assumes that (x1, y1), (x2, y2) both lie on the curve and are not the identity point.
-// Further assumes that x1, x2 are not equal in the coordinate field.
-pub fn ec_add_ne_expr(
-    config: ExprBuilderConfig, // The coordinate field.
-    range_bus: VariableRangeCheckerBus,
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let x1 = ExprBuilder::new_input(builder.clone());
-    let y1 = ExprBuilder::new_input(builder.clone());
-    let x2 = ExprBuilder::new_input(builder.clone());
-    let y2 = ExprBuilder::new_input(builder.clone());
-    let mut lambda = (y2 - y1.clone()) / (x2.clone() - x1.clone());
-    let mut x3 = lambda.square() - x1.clone() - x2;
-    x3.save_output();
-    let mut y3 = lambda * (x1 - x3.clone()) - y1;
-    y3.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, true)
-}
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs
new file mode 100644
index 0000000000..5c63a05c41
--- /dev/null
+++ b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/execution.rs
@@ -0,0 +1,388 @@
+use std::{
+    array::from_fn,
+    borrow::{Borrow, BorrowMut},
+};
+
+use num_bigint::BigUint;
+use openvm_algebra_circuit::fields::{get_field_type, FieldType};
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::GuestMemory, POINTER_MAX_BITS},
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_ecc_transpiler::Rv32WeierstrassOpcode;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+};
+use openvm_mod_circuit_builder::{run_field_expression_precomputed, FieldExpr};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::EcAddNeExecutor;
+use crate::weierstrass_chip::curves::ec_add_ne;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct EcAddNePreCompute<'a> {
+    expr: &'a FieldExpr,
+    rs_addrs: [u8; 2],
+    a: u8,
+    flag_idx: u8,
+}
+
+impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize> EcAddNeExecutor<BLOCKS, BLOCK_SIZE> {
+    fn pre_compute_impl<F: PrimeField32>(
+        &'a self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut EcAddNePreCompute<'a>,
+    ) -> Result<bool, StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+
+        // Validate instruction format
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        let c = c.as_canonical_u32();
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+        if d != RV32_REGISTER_AS || e != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let local_opcode = opcode.local_opcode_idx(self.offset);
+
+        // Pre-compute flag_idx
+        let needs_setup = self.expr.needs_setup();
+        let mut flag_idx = self.expr.num_flags() as u8;
+        if needs_setup {
+            // Find which opcode this is in our local_opcode_idx list
+            if let Some(opcode_position) = self
+                .local_opcode_idx
+                .iter()
+                .position(|&idx| idx == local_opcode)
+            {
+                // If this is NOT the last opcode (setup), get the corresponding flag_idx
+                if opcode_position < self.opcode_flag_idx.len() {
+                    flag_idx = self.opcode_flag_idx[opcode_position] as u8;
+                }
+            }
+        }
+
+        let rs_addrs = from_fn(|i| if i == 0 { b } else { c } as u8);
+        *data = EcAddNePreCompute {
+            expr: &self.expr,
+            rs_addrs,
+            a: a as u8,
+            flag_idx,
+        };
+
+        let local_opcode = opcode.local_opcode_idx(self.offset);
+        let is_setup = local_opcode == Rv32WeierstrassOpcode::SETUP_EC_ADD_NE as usize;
+
+        Ok(is_setup)
+    }
+}
+
+impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize> Executor<F>
+    for EcAddNeExecutor<BLOCKS, BLOCK_SIZE>
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        std::mem::size_of::<EcAddNePreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let pre_compute: &mut EcAddNePreCompute = data.borrow_mut();
+
+        let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        if let Some(field_type) = {
+            let modulus = &pre_compute.expr.builder.prime;
+            get_field_type(modulus)
+        } {
+            match (is_setup, field_type) {
+                (true, FieldType::K256Coordinate) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::K256Coordinate as u8 },
+                    true,
+                >),
+                (true, FieldType::P256Coordinate) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::P256Coordinate as u8 },
+                    true,
+                >),
+                (true, FieldType::BN254Coordinate) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::BN254Coordinate as u8 },
+                    true,
+                >),
+                (true, FieldType::BLS12_381Coordinate) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::BLS12_381Coordinate as u8 },
+                    true,
+                >),
+                (false, FieldType::K256Coordinate) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::K256Coordinate as u8 },
+                    false,
+                >),
+                (false, FieldType::P256Coordinate) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::P256Coordinate as u8 },
+                    false,
+                >),
+                (false, FieldType::BN254Coordinate) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::BN254Coordinate as u8 },
+                    false,
+                >),
+                (false, FieldType::BLS12_381Coordinate) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::BLS12_381Coordinate as u8 },
+                    false,
+                >),
+                _ => panic!("Unsupported field type"),
+            }
+        } else if is_setup {
+            Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>)
+        } else {
+            Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>)
+        }
+    }
+}
+
+impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize> MeteredExecutor<F>
+    for EcAddNeExecutor<BLOCKS, BLOCK_SIZE>
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        std::mem::size_of::<E2PreCompute<EcAddNePreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<EcAddNePreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        let is_setup = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        if let Some(field_type) = {
+            let modulus = &pre_compute.data.expr.builder.prime;
+            get_field_type(modulus)
+        } {
+            match (is_setup, field_type) {
+                (true, FieldType::K256Coordinate) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::K256Coordinate as u8 },
+                    true,
+                >),
+                (true, FieldType::P256Coordinate) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::P256Coordinate as u8 },
+                    true,
+                >),
+                (true, FieldType::BN254Coordinate) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::BN254Coordinate as u8 },
+                    true,
+                >),
+                (true, FieldType::BLS12_381Coordinate) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::BLS12_381Coordinate as u8 },
+                    true,
+                >),
+                (false, FieldType::K256Coordinate) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::K256Coordinate as u8 },
+                    false,
+                >),
+                (false, FieldType::P256Coordinate) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::P256Coordinate as u8 },
+                    false,
+                >),
+                (false, FieldType::BN254Coordinate) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::BN254Coordinate as u8 },
+                    false,
+                >),
+                (false, FieldType::BLS12_381Coordinate) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { FieldType::BLS12_381Coordinate as u8 },
+                    false,
+                >),
+                _ => panic!("Unsupported field type"),
+            }
+        } else if is_setup {
+            Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>)
+        } else {
+            Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>)
+        }
+    }
+}
+
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const FIELD_TYPE: u8,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &EcAddNePreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    // Read register values
+    let rs_vals = pre_compute
+        .rs_addrs
+        .map(|addr| u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, addr as u32)));
+
+    // Read memory values for both points
+    let read_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2] = rs_vals.map(|address| {
+        debug_assert!(address as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+        from_fn(|i| vm_state.vm_read(RV32_MEMORY_AS, address + (i * BLOCK_SIZE) as u32))
+    });
+
+    if IS_SETUP {
+        let input_prime = BigUint::from_bytes_le(read_data[0][..BLOCKS / 2].as_flattened());
+        if input_prime != pre_compute.expr.prime {
+            vm_state.exit_code = Err(ExecutionError::Fail {
+                pc: vm_state.pc,
+                msg: "EcAddNe: mismatched prime",
+            });
+            return;
+        }
+    }
+
+    let output_data = if FIELD_TYPE == u8::MAX || IS_SETUP {
+        let read_data: DynArray<u8> = read_data.into();
+        run_field_expression_precomputed::<true>(
+            pre_compute.expr,
+            pre_compute.flag_idx as usize,
+            &read_data.0,
+        )
+        .into()
+    } else {
+        ec_add_ne::<FIELD_TYPE, BLOCKS, BLOCK_SIZE>(read_data)
+    };
+
+    let rd_val = u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, pre_compute.a as u32));
+    debug_assert!(rd_val as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+
+    // Write output data to memory
+    for (i, block) in output_data.into_iter().enumerate() {
+        vm_state.vm_write(RV32_MEMORY_AS, rd_val + (i * BLOCK_SIZE) as u32, &block);
+    }
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const FIELD_TYPE: u8,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &EcAddNePreCompute = pre_compute.borrow();
+    execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, FIELD_TYPE, IS_SETUP>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const FIELD_TYPE: u8,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let e2_pre_compute: &E2PreCompute<EcAddNePreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(e2_pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, FIELD_TYPE, IS_SETUP>(
+        &e2_pre_compute.data,
+        vm_state,
+    );
+}
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/add_ne/mod.rs b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/mod.rs
new file mode 100644
index 0000000000..5bb247de07
--- /dev/null
+++ b/extensions/ecc/circuit/src/weierstrass_chip/add_ne/mod.rs
@@ -0,0 +1,131 @@
+use std::{cell::RefCell, rc::Rc};
+
+use derive_more::derive::{Deref, DerefMut};
+use openvm_circuit::{
+    arch::*,
+    system::memory::{offline_checker::MemoryBridge, SharedMemoryHelper},
+};
+use openvm_circuit_derive::PreflightExecutor;
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+};
+use openvm_ecc_transpiler::Rv32WeierstrassOpcode;
+use openvm_instructions::riscv::RV32_CELL_BITS;
+use openvm_mod_circuit_builder::{
+    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreAir, FieldExpressionExecutor,
+    FieldExpressionFiller,
+};
+use openvm_rv32_adapters::{
+    Rv32VecHeapAdapterAir, Rv32VecHeapAdapterExecutor, Rv32VecHeapAdapterFiller,
+};
+
+use super::{WeierstrassAir, WeierstrassChip};
+
+mod execution;
+
+// Assumes that (x1, y1), (x2, y2) both lie on the curve and are not the identity point.
+// Further assumes that x1, x2 are not equal in the coordinate field.
+pub fn ec_add_ne_expr(
+    config: ExprBuilderConfig, // The coordinate field.
+    range_bus: VariableRangeCheckerBus,
+) -> FieldExpr {
+    config.check_valid();
+    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
+    let builder = Rc::new(RefCell::new(builder));
+
+    let x1 = ExprBuilder::new_input(builder.clone());
+    let y1 = ExprBuilder::new_input(builder.clone());
+    let x2 = ExprBuilder::new_input(builder.clone());
+    let y2 = ExprBuilder::new_input(builder.clone());
+    let mut lambda = (y2 - y1.clone()) / (x2.clone() - x1.clone());
+    let mut x3 = lambda.square() - x1.clone() - x2;
+    x3.save_output();
+    let mut y3 = lambda * (x1 - x3.clone()) - y1;
+    y3.save_output();
+
+    let builder = (*builder).borrow().clone();
+    FieldExpr::new(builder, range_bus, true)
+}
+
+/// BLOCK_SIZE: how many cells do we read at a time, must be a power of 2.
+/// BLOCKS: how many blocks do we need to represent one input or output
+/// For example, for bls12_381, BLOCK_SIZE = 16, each element has 3 blocks and with two elements per
+/// input AffinePoint, BLOCKS = 6. For secp256k1, BLOCK_SIZE = 32, BLOCKS = 2.
+#[derive(Clone, PreflightExecutor, Deref, DerefMut)]
+pub struct EcAddNeExecutor<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    FieldExpressionExecutor<Rv32VecHeapAdapterExecutor<2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>>,
+);
+
+fn gen_base_expr(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+) -> (FieldExpr, Vec<usize>) {
+    let expr = ec_add_ne_expr(config, range_checker_bus);
+
+    let local_opcode_idx = vec![
+        Rv32WeierstrassOpcode::EC_ADD_NE as usize,
+        Rv32WeierstrassOpcode::SETUP_EC_ADD_NE as usize,
+    ];
+
+    (expr, local_opcode_idx)
+}
+
+pub fn get_ec_addne_air<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    exec_bridge: ExecutionBridge,
+    mem_bridge: MemoryBridge,
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    bitwise_lookup_bus: BitwiseOperationLookupBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> WeierstrassAir<2, BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx) = gen_base_expr(config, range_checker_bus);
+    WeierstrassAir::new(
+        Rv32VecHeapAdapterAir::new(
+            exec_bridge,
+            mem_bridge,
+            bitwise_lookup_bus,
+            pointer_max_bits,
+        ),
+        FieldExpressionCoreAir::new(expr.clone(), offset, local_opcode_idx.clone(), vec![]),
+    )
+}
+
+pub fn get_ec_addne_step<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    pointer_max_bits: usize,
+    offset: usize,
+) -> EcAddNeExecutor<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx) = gen_base_expr(config, range_checker_bus);
+    EcAddNeExecutor(FieldExpressionExecutor::new(
+        Rv32VecHeapAdapterExecutor::new(pointer_max_bits),
+        expr,
+        offset,
+        local_opcode_idx,
+        vec![],
+        "EcAddNe",
+    ))
+}
+
+pub fn get_ec_addne_chip<F, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    mem_helper: SharedMemoryHelper<F>,
+    range_checker: SharedVariableRangeCheckerChip,
+    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    pointer_max_bits: usize,
+) -> WeierstrassChip<F, 2, BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx) = gen_base_expr(config, range_checker.bus());
+    WeierstrassChip::new(
+        FieldExpressionFiller::new(
+            Rv32VecHeapAdapterFiller::new(pointer_max_bits, bitwise_lookup_chip),
+            expr,
+            local_opcode_idx,
+            vec![],
+            range_checker,
+            false,
+        ),
+        mem_helper,
+    )
+}
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/curves.rs b/extensions/ecc/circuit/src/weierstrass_chip/curves.rs
new file mode 100644
index 0000000000..085d40ff5f
--- /dev/null
+++ b/extensions/ecc/circuit/src/weierstrass_chip/curves.rs
@@ -0,0 +1,214 @@
+use halo2curves_axiom::ff::PrimeField;
+use num_bigint::BigUint;
+use num_traits::Num;
+use openvm_algebra_circuit::fields::{
+    blocks_to_field_element, blocks_to_field_element_bls12_381_coordinate, field_element_to_blocks,
+    field_element_to_blocks_bls12_381_coordinate, FieldType,
+};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CurveType {
+    K256 = 0,
+    P256 = 1,
+    BN254 = 2,
+    BLS12_381 = 3,
+}
+
+const P256_NEG_A: u64 = 3;
+
+fn get_modulus_as_bigint<F: PrimeField>() -> BigUint {
+    BigUint::from_str_radix(F::MODULUS.trim_start_matches("0x"), 16).unwrap()
+}
+
+pub(super) fn get_curve_type(modulus: &BigUint, a_coeff: &BigUint) -> Option<CurveType> {
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::secq256k1::Fq>()
+        && a_coeff == &BigUint::ZERO
+    {
+        return Some(CurveType::K256);
+    }
+
+    let coeff_a = (-halo2curves_axiom::secp256r1::Fp::from(P256_NEG_A)).to_bytes();
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::secp256r1::Fp>()
+        && a_coeff == &BigUint::from_bytes_le(&coeff_a)
+    {
+        return Some(CurveType::P256);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::bn256::Fq>()
+        && a_coeff == &BigUint::ZERO
+    {
+        return Some(CurveType::BN254);
+    }
+
+    if modulus == &get_modulus_as_bigint::<halo2curves_axiom::bls12_381::Fq>()
+        && a_coeff == &BigUint::ZERO
+    {
+        return Some(CurveType::BLS12_381);
+    }
+
+    None
+}
+
+#[inline(always)]
+pub fn ec_add_ne<const FIELD_TYPE: u8, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    match FIELD_TYPE {
+        x if x == FieldType::K256Coordinate as u8 => {
+            ec_add_ne_256bit::<halo2curves_axiom::secq256k1::Fq, BLOCKS, BLOCK_SIZE>(input_data)
+        }
+        x if x == FieldType::P256Coordinate as u8 => {
+            ec_add_ne_256bit::<halo2curves_axiom::secp256r1::Fp, BLOCKS, BLOCK_SIZE>(input_data)
+        }
+        x if x == FieldType::BN254Coordinate as u8 => {
+            ec_add_ne_256bit::<halo2curves_axiom::bn256::Fq, BLOCKS, BLOCK_SIZE>(input_data)
+        }
+        x if x == FieldType::BLS12_381Coordinate as u8 => {
+            ec_add_ne_bls12_381::<BLOCKS, BLOCK_SIZE>(input_data)
+        }
+        _ => panic!("Unsupported field type: {}", FIELD_TYPE),
+    }
+}
+
+/// Dispatch elliptic curve point doubling based on const generic curve type
+#[inline(always)]
+pub fn ec_double<const CURVE_TYPE: u8, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    input_data: [[u8; BLOCK_SIZE]; BLOCKS],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    match CURVE_TYPE {
+        x if x == CurveType::K256 as u8 => {
+            ec_double_256bit::<halo2curves_axiom::secq256k1::Fq, 0, BLOCKS, BLOCK_SIZE>(input_data)
+        }
+        x if x == CurveType::P256 as u8 => {
+            ec_double_256bit::<halo2curves_axiom::secp256r1::Fp, P256_NEG_A, BLOCKS, BLOCK_SIZE>(
+                input_data,
+            )
+        }
+        x if x == CurveType::BN254 as u8 => {
+            ec_double_256bit::<halo2curves_axiom::bn256::Fq, 0, BLOCKS, BLOCK_SIZE>(input_data)
+        }
+        x if x == CurveType::BLS12_381 as u8 => {
+            ec_double_bls12_381::<BLOCKS, BLOCK_SIZE>(input_data)
+        }
+        _ => panic!("Unsupported curve type: {}", CURVE_TYPE),
+    }
+}
+
+#[inline(always)]
+fn ec_add_ne_256bit<
+    F: PrimeField<Repr = [u8; 32]>,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    let x1 = blocks_to_field_element::<F>(input_data[0][..BLOCKS / 2].as_flattened());
+    let y1 = blocks_to_field_element::<F>(input_data[0][BLOCKS / 2..].as_flattened());
+    let x2 = blocks_to_field_element::<F>(input_data[1][..BLOCKS / 2].as_flattened());
+    let y2 = blocks_to_field_element::<F>(input_data[1][BLOCKS / 2..].as_flattened());
+
+    let (x3, y3) = ec_add_ne_impl::<F>(x1, y1, x2, y2);
+
+    let mut output = [[0u8; BLOCK_SIZE]; BLOCKS];
+    field_element_to_blocks::<F, BLOCK_SIZE>(&x3, &mut output[..BLOCKS / 2]);
+    field_element_to_blocks::<F, BLOCK_SIZE>(&y3, &mut output[BLOCKS / 2..]);
+    output
+}
+
+#[inline(always)]
+fn ec_double_256bit<
+    F: PrimeField<Repr = [u8; 32]>,
+    const NEG_A: u64,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+>(
+    input_data: [[u8; BLOCK_SIZE]; BLOCKS],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    let x1 = blocks_to_field_element::<F>(input_data[..BLOCKS / 2].as_flattened());
+    let y1 = blocks_to_field_element::<F>(input_data[BLOCKS / 2..].as_flattened());
+
+    let (x3, y3) = ec_double_impl::<F, NEG_A>(x1, y1);
+
+    let mut output = [[0u8; BLOCK_SIZE]; BLOCKS];
+    field_element_to_blocks::<F, BLOCK_SIZE>(&x3, &mut output[..BLOCKS / 2]);
+    field_element_to_blocks::<F, BLOCK_SIZE>(&y3, &mut output[BLOCKS / 2..]);
+    output
+}
+
+#[inline(always)]
+fn ec_add_ne_bls12_381<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    input_data: [[[u8; BLOCK_SIZE]; BLOCKS]; 2],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    // Extract coordinates
+    let x1 =
+        blocks_to_field_element_bls12_381_coordinate(input_data[0][..BLOCKS / 2].as_flattened());
+    let y1 =
+        blocks_to_field_element_bls12_381_coordinate(input_data[0][BLOCKS / 2..].as_flattened());
+    let x2 =
+        blocks_to_field_element_bls12_381_coordinate(input_data[1][..BLOCKS / 2].as_flattened());
+    let y2 =
+        blocks_to_field_element_bls12_381_coordinate(input_data[1][BLOCKS / 2..].as_flattened());
+
+    let (x3, y3) = ec_add_ne_impl::<halo2curves_axiom::bls12_381::Fq>(x1, y1, x2, y2);
+
+    // Final output
+    let mut output = [[0u8; BLOCK_SIZE]; BLOCKS];
+    field_element_to_blocks_bls12_381_coordinate(&x3, &mut output[..BLOCKS / 2]);
+    field_element_to_blocks_bls12_381_coordinate(&y3, &mut output[BLOCKS / 2..]);
+    output
+}
+
+#[inline(always)]
+fn ec_double_bls12_381<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    input_data: [[u8; BLOCK_SIZE]; BLOCKS],
+) -> [[u8; BLOCK_SIZE]; BLOCKS] {
+    // Extract coordinates
+    let x1 = blocks_to_field_element_bls12_381_coordinate(input_data[..BLOCKS / 2].as_flattened());
+    let y1 = blocks_to_field_element_bls12_381_coordinate(input_data[BLOCKS / 2..].as_flattened());
+
+    let (x3, y3) = ec_double_impl::<halo2curves_axiom::bls12_381::Fq, 0>(x1, y1);
+
+    // Final output
+    let mut output = [[0u8; BLOCK_SIZE]; BLOCKS];
+    field_element_to_blocks_bls12_381_coordinate(&x3, &mut output[..BLOCKS / 2]);
+    field_element_to_blocks_bls12_381_coordinate(&y3, &mut output[BLOCKS / 2..]);
+    output
+}
+
+#[inline(always)]
+pub fn ec_add_ne_impl<F: PrimeField>(x1: F, y1: F, x2: F, y2: F) -> (F, F) {
+    // Calculate lambda = (y2 - y1) / (x2 - x1)
+    let lambda = (y2 - y1) * (x2 - x1).invert().unwrap();
+
+    // Calculate x3 = lambda^2 - x1 - x2
+    let x3 = lambda.square() - x1 - x2;
+
+    // Calculate y3 = lambda * (x1 - x3) - y1
+    let y3 = lambda * (x1 - x3) - y1;
+
+    (x3, y3)
+}
+
+#[inline(always)]
+pub fn ec_double_impl<F: PrimeField, const NEG_A: u64>(x1: F, y1: F) -> (F, F) {
+    // Calculate lambda based on curve coefficient 'a'
+    let x1_squared = x1.square();
+    let three_x1_squared = x1_squared + x1_squared.double();
+    let two_y1 = y1.double();
+
+    let lambda = if NEG_A == 0 {
+        // For a = 0: lambda = (3 * x1^2) / (2 * y1)
+        three_x1_squared * two_y1.invert().unwrap()
+    } else {
+        // lambda = (3 * x1^2 + a) / (2 * y1)
+        (three_x1_squared - F::from(NEG_A)) * two_y1.invert().unwrap()
+    };
+
+    // Calculate x3 = lambda^2 - 2 * x1
+    let x3 = lambda.square() - x1.double();
+
+    // Calculate y3 = lambda * (x1 - x3) - y1
+    let y3 = lambda * (x1 - x3) - y1;
+
+    (x3, y3)
+}
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double.rs b/extensions/ecc/circuit/src/weierstrass_chip/double.rs
deleted file mode 100644
index 0ae55f2df7..0000000000
--- a/extensions/ecc/circuit/src/weierstrass_chip/double.rs
+++ /dev/null
@@ -1,36 +0,0 @@
-use std::{cell::RefCell, rc::Rc};
-
-use num_bigint::BigUint;
-use num_traits::One;
-use openvm_circuit_primitives::var_range::VariableRangeCheckerBus;
-use openvm_mod_circuit_builder::{ExprBuilder, ExprBuilderConfig, FieldExpr, FieldVariable};
-
-pub fn ec_double_ne_expr(
-    config: ExprBuilderConfig, // The coordinate field.
-    range_bus: VariableRangeCheckerBus,
-    a_biguint: BigUint,
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut x1 = ExprBuilder::new_input(builder.clone());
-    let mut y1 = ExprBuilder::new_input(builder.clone());
-    let a = ExprBuilder::new_const(builder.clone(), a_biguint.clone());
-    let is_double_flag = builder.borrow_mut().new_flag();
-    // We need to prevent divide by zero when not double flag
-    // (equivalently, when it is the setup opcode)
-    let lambda_denom = FieldVariable::select(
-        is_double_flag,
-        &y1.int_mul(2),
-        &ExprBuilder::new_const(builder.clone(), BigUint::one()),
-    );
-    let mut lambda = (x1.square().int_mul(3) + a) / lambda_denom;
-    let mut x3 = lambda.square() - x1.int_mul(2);
-    x3.save_output();
-    let mut y3 = lambda * (x1 - x3.clone()) - y1;
-    y3.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new_with_setup_values(builder, range_bus, true, vec![a_biguint])
-}
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs
new file mode 100644
index 0000000000..8e755aa6f7
--- /dev/null
+++ b/extensions/ecc/circuit/src/weierstrass_chip/double/execution.rs
@@ -0,0 +1,355 @@
+use std::{
+    array::from_fn,
+    borrow::{Borrow, BorrowMut},
+};
+
+use num_bigint::BigUint;
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::GuestMemory, POINTER_MAX_BITS},
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_ecc_transpiler::Rv32WeierstrassOpcode;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+};
+use openvm_mod_circuit_builder::{run_field_expression_precomputed, FieldExpr};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::EcDoubleExecutor;
+use crate::weierstrass_chip::curves::{ec_double, get_curve_type, CurveType};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct EcDoublePreCompute<'a> {
+    expr: &'a FieldExpr,
+    rs_addrs: [u8; 1],
+    a: u8,
+    flag_idx: u8,
+}
+
+impl<'a, const BLOCKS: usize, const BLOCK_SIZE: usize> EcDoubleExecutor<BLOCKS, BLOCK_SIZE> {
+    fn pre_compute_impl<F: PrimeField32>(
+        &'a self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut EcDoublePreCompute<'a>,
+    ) -> Result<bool, StaticProgramError> {
+        let Instruction {
+            opcode, a, b, d, e, ..
+        } = inst;
+
+        // Validate instruction format
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+        if d != RV32_REGISTER_AS || e != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let local_opcode = opcode.local_opcode_idx(self.offset);
+
+        // Pre-compute flag_idx
+        let needs_setup = self.expr.needs_setup();
+        let mut flag_idx = self.expr.num_flags() as u8;
+        if needs_setup {
+            // Find which opcode this is in our local_opcode_idx list
+            if let Some(opcode_position) = self
+                .local_opcode_idx
+                .iter()
+                .position(|&idx| idx == local_opcode)
+            {
+                // If this is NOT the last opcode (setup), get the corresponding flag_idx
+                if opcode_position < self.opcode_flag_idx.len() {
+                    flag_idx = self.opcode_flag_idx[opcode_position] as u8;
+                }
+            }
+        }
+
+        let rs_addrs = [b as u8];
+        *data = EcDoublePreCompute {
+            expr: &self.expr,
+            rs_addrs,
+            a: a as u8,
+            flag_idx,
+        };
+
+        let local_opcode = opcode.local_opcode_idx(self.offset);
+        let is_setup = local_opcode == Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize;
+
+        Ok(is_setup)
+    }
+}
+
+impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize> Executor<F>
+    for EcDoubleExecutor<BLOCKS, BLOCK_SIZE>
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        std::mem::size_of::<EcDoublePreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let pre_compute: &mut EcDoublePreCompute = data.borrow_mut();
+
+        let is_setup = self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        if let Some(curve_type) = {
+            let modulus = &pre_compute.expr.builder.prime;
+            let a_coeff = &pre_compute.expr.setup_values[0];
+            get_curve_type(modulus, a_coeff)
+        } {
+            match (is_setup, curve_type) {
+                (true, CurveType::K256) => {
+                    Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, true>)
+                }
+                (true, CurveType::P256) => {
+                    Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, true>)
+                }
+                (true, CurveType::BN254) => {
+                    Ok(
+                        execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::BN254 as u8 }, true>,
+                    )
+                }
+                (true, CurveType::BLS12_381) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { CurveType::BLS12_381 as u8 },
+                    true,
+                >),
+                (false, CurveType::K256) => {
+                    Ok(
+                        execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, false>,
+                    )
+                }
+                (false, CurveType::P256) => {
+                    Ok(
+                        execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, false>,
+                    )
+                }
+                (false, CurveType::BN254) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { CurveType::BN254 as u8 },
+                    false,
+                >),
+                (false, CurveType::BLS12_381) => Ok(execute_e1_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { CurveType::BLS12_381 as u8 },
+                    false,
+                >),
+            }
+        } else if is_setup {
+            Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>)
+        } else {
+            Ok(execute_e1_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>)
+        }
+    }
+}
+
+impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize> MeteredExecutor<F>
+    for EcDoubleExecutor<BLOCKS, BLOCK_SIZE>
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        std::mem::size_of::<E2PreCompute<EcDoublePreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<EcDoublePreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        let is_setup = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        if let Some(curve_type) = {
+            let modulus = &pre_compute.data.expr.builder.prime;
+            let a_coeff = &pre_compute.data.expr.setup_values[0];
+            get_curve_type(modulus, a_coeff)
+        } {
+            match (is_setup, curve_type) {
+                (true, CurveType::K256) => {
+                    Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, true>)
+                }
+                (true, CurveType::P256) => {
+                    Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, true>)
+                }
+                (true, CurveType::BN254) => {
+                    Ok(
+                        execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::BN254 as u8 }, true>,
+                    )
+                }
+                (true, CurveType::BLS12_381) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { CurveType::BLS12_381 as u8 },
+                    true,
+                >),
+                (false, CurveType::K256) => {
+                    Ok(
+                        execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::K256 as u8 }, false>,
+                    )
+                }
+                (false, CurveType::P256) => {
+                    Ok(
+                        execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { CurveType::P256 as u8 }, false>,
+                    )
+                }
+                (false, CurveType::BN254) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { CurveType::BN254 as u8 },
+                    false,
+                >),
+                (false, CurveType::BLS12_381) => Ok(execute_e2_impl::<
+                    _,
+                    _,
+                    BLOCKS,
+                    BLOCK_SIZE,
+                    { CurveType::BLS12_381 as u8 },
+                    false,
+                >),
+            }
+        } else if is_setup {
+            Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, true>)
+        } else {
+            Ok(execute_e2_impl::<_, _, BLOCKS, BLOCK_SIZE, { u8::MAX }, false>)
+        }
+    }
+}
+
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const CURVE_TYPE: u8,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &EcDoublePreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    // Read register values
+    let rs_vals = pre_compute
+        .rs_addrs
+        .map(|addr| u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, addr as u32)));
+
+    // Read memory values for the point
+    let read_data: [[u8; BLOCK_SIZE]; BLOCKS] = {
+        let address = rs_vals[0];
+        debug_assert!(address as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+        from_fn(|i| vm_state.vm_read(RV32_MEMORY_AS, address + (i * BLOCK_SIZE) as u32))
+    };
+
+    if IS_SETUP {
+        let input_prime = BigUint::from_bytes_le(read_data[..BLOCKS / 2].as_flattened());
+
+        if input_prime != pre_compute.expr.builder.prime {
+            vm_state.exit_code = Err(ExecutionError::Fail {
+                pc: vm_state.pc,
+                msg: "EcDouble: mismatched prime",
+            });
+            return;
+        }
+
+        // Extract second field element as the a coefficient
+        let input_a = BigUint::from_bytes_le(read_data[BLOCKS / 2..].as_flattened());
+        let coeff_a = &pre_compute.expr.setup_values[0];
+        if input_a != *coeff_a {
+            vm_state.exit_code = Err(ExecutionError::Fail {
+                pc: vm_state.pc,
+                msg: "EcDouble: mismatched coeff_a",
+            });
+            return;
+        }
+    }
+
+    let output_data = if CURVE_TYPE == u8::MAX || IS_SETUP {
+        let read_data: DynArray<u8> = read_data.into();
+        run_field_expression_precomputed::<true>(
+            pre_compute.expr,
+            pre_compute.flag_idx as usize,
+            &read_data.0,
+        )
+        .into()
+    } else {
+        ec_double::<CURVE_TYPE, BLOCKS, BLOCK_SIZE>(read_data)
+    };
+
+    let rd_val = u32::from_le_bytes(vm_state.vm_read(RV32_REGISTER_AS, pre_compute.a as u32));
+    debug_assert!(rd_val as usize + BLOCK_SIZE * BLOCKS - 1 < (1 << POINTER_MAX_BITS));
+
+    // Write output data to memory
+    for (i, block) in output_data.into_iter().enumerate() {
+        vm_state.vm_write(RV32_MEMORY_AS, rd_val + (i * BLOCK_SIZE) as u32, &block);
+    }
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const CURVE_TYPE: u8,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &EcDoublePreCompute = pre_compute.borrow();
+    execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, CURVE_TYPE, IS_SETUP>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const BLOCKS: usize,
+    const BLOCK_SIZE: usize,
+    const CURVE_TYPE: u8,
+    const IS_SETUP: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let e2_pre_compute: &E2PreCompute<EcDoublePreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(e2_pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<_, _, BLOCKS, BLOCK_SIZE, CURVE_TYPE, IS_SETUP>(
+        &e2_pre_compute.data,
+        vm_state,
+    );
+}
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/double/mod.rs b/extensions/ecc/circuit/src/weierstrass_chip/double/mod.rs
new file mode 100644
index 0000000000..79cf3f4aa3
--- /dev/null
+++ b/extensions/ecc/circuit/src/weierstrass_chip/double/mod.rs
@@ -0,0 +1,144 @@
+use std::{cell::RefCell, rc::Rc};
+
+use derive_more::derive::{Deref, DerefMut};
+use num_bigint::BigUint;
+use num_traits::One;
+use openvm_circuit::{
+    arch::*,
+    system::memory::{offline_checker::MemoryBridge, SharedMemoryHelper},
+};
+use openvm_circuit_derive::PreflightExecutor;
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+};
+use openvm_ecc_transpiler::Rv32WeierstrassOpcode;
+use openvm_instructions::riscv::RV32_CELL_BITS;
+use openvm_mod_circuit_builder::{
+    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreAir, FieldExpressionExecutor,
+    FieldExpressionFiller, FieldVariable,
+};
+use openvm_rv32_adapters::{
+    Rv32VecHeapAdapterAir, Rv32VecHeapAdapterExecutor, Rv32VecHeapAdapterFiller,
+};
+
+use super::{WeierstrassAir, WeierstrassChip};
+
+mod execution;
+
+pub fn ec_double_ne_expr(
+    config: ExprBuilderConfig, // The coordinate field.
+    range_bus: VariableRangeCheckerBus,
+    a_biguint: BigUint,
+) -> FieldExpr {
+    config.check_valid();
+    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
+    let builder = Rc::new(RefCell::new(builder));
+
+    let mut x1 = ExprBuilder::new_input(builder.clone());
+    let mut y1 = ExprBuilder::new_input(builder.clone());
+    let a = ExprBuilder::new_const(builder.clone(), a_biguint.clone());
+    let is_double_flag = (*builder).borrow_mut().new_flag();
+    // We need to prevent divide by zero when not double flag
+    // (equivalently, when it is the setup opcode)
+    let lambda_denom = FieldVariable::select(
+        is_double_flag,
+        &y1.int_mul(2),
+        &ExprBuilder::new_const(builder.clone(), BigUint::one()),
+    );
+    let mut lambda = (x1.square().int_mul(3) + a) / lambda_denom;
+    let mut x3 = lambda.square() - x1.int_mul(2);
+    x3.save_output();
+    let mut y3 = lambda * (x1 - x3.clone()) - y1;
+    y3.save_output();
+
+    let builder = (*builder).borrow().clone();
+    FieldExpr::new_with_setup_values(builder, range_bus, true, vec![a_biguint])
+}
+
+/// BLOCK_SIZE: how many cells do we read at a time, must be a power of 2.
+/// BLOCKS: how many blocks do we need to represent one input or output
+/// For example, for bls12_381, BLOCK_SIZE = 16, each element has 3 blocks and with two elements per
+/// input AffinePoint, BLOCKS = 6. For secp256k1, BLOCK_SIZE = 32, BLOCKS = 2.
+#[derive(Clone, PreflightExecutor, Deref, DerefMut)]
+pub struct EcDoubleExecutor<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    FieldExpressionExecutor<Rv32VecHeapAdapterExecutor<1, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>>,
+);
+
+fn gen_base_expr(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    a_biguint: BigUint,
+) -> (FieldExpr, Vec<usize>) {
+    let expr = ec_double_ne_expr(config, range_checker_bus, a_biguint);
+
+    let local_opcode_idx = vec![
+        Rv32WeierstrassOpcode::EC_DOUBLE as usize,
+        Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize,
+    ];
+
+    (expr, local_opcode_idx)
+}
+
+#[allow(clippy::too_many_arguments)]
+pub fn get_ec_double_air<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    exec_bridge: ExecutionBridge,
+    mem_bridge: MemoryBridge,
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    bitwise_lookup_bus: BitwiseOperationLookupBus,
+    pointer_max_bits: usize,
+    offset: usize,
+    a_biguint: BigUint,
+) -> WeierstrassAir<1, BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx) = gen_base_expr(config, range_checker_bus, a_biguint);
+    WeierstrassAir::new(
+        Rv32VecHeapAdapterAir::new(
+            exec_bridge,
+            mem_bridge,
+            bitwise_lookup_bus,
+            pointer_max_bits,
+        ),
+        FieldExpressionCoreAir::new(expr.clone(), offset, local_opcode_idx.clone(), vec![]),
+    )
+}
+
+pub fn get_ec_double_step<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    range_checker_bus: VariableRangeCheckerBus,
+    pointer_max_bits: usize,
+    offset: usize,
+    a_biguint: BigUint,
+) -> EcDoubleExecutor<BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx) = gen_base_expr(config, range_checker_bus, a_biguint);
+    EcDoubleExecutor(FieldExpressionExecutor::new(
+        Rv32VecHeapAdapterExecutor::new(pointer_max_bits),
+        expr,
+        offset,
+        local_opcode_idx,
+        vec![],
+        "EcDouble",
+    ))
+}
+
+pub fn get_ec_double_chip<F, const BLOCKS: usize, const BLOCK_SIZE: usize>(
+    config: ExprBuilderConfig,
+    mem_helper: SharedMemoryHelper<F>,
+    range_checker: SharedVariableRangeCheckerChip,
+    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    pointer_max_bits: usize,
+    a_biguint: BigUint,
+) -> WeierstrassChip<F, 1, BLOCKS, BLOCK_SIZE> {
+    let (expr, local_opcode_idx) = gen_base_expr(config, range_checker.bus(), a_biguint);
+    WeierstrassChip::new(
+        FieldExpressionFiller::new(
+            Rv32VecHeapAdapterFiller::new(pointer_max_bits, bitwise_lookup_chip),
+            expr,
+            local_opcode_idx,
+            vec![],
+            range_checker,
+            true,
+        ),
+        mem_helper,
+    )
+}
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/mod.rs b/extensions/ecc/circuit/src/weierstrass_chip/mod.rs
index 0bcee1facf..cc8e97841e 100644
--- a/extensions/ecc/circuit/src/weierstrass_chip/mod.rs
+++ b/extensions/ecc/circuit/src/weierstrass_chip/mod.rs
@@ -1,99 +1,27 @@
 mod add_ne;
+mod curves;
 mod double;
 
-use std::sync::Arc;
-
 pub use add_ne::*;
 pub use double::*;
 
 #[cfg(test)]
 mod tests;
 
-use std::sync::Mutex;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
+use openvm_mod_circuit_builder::{FieldExpressionCoreAir, FieldExpressionFiller};
+use openvm_rv32_adapters::{Rv32VecHeapAdapterAir, Rv32VecHeapAdapterFiller};
 
-use num_bigint::BigUint;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::SharedVariableRangeCheckerChip;
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_ecc_transpiler::Rv32WeierstrassOpcode;
-use openvm_mod_circuit_builder::{ExprBuilderConfig, FieldExpressionCoreChip};
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
+pub type WeierstrassAir<const NUM_READS: usize, const BLOCKS: usize, const BLOCK_SIZE: usize> =
+    VmAirWrapper<
+        Rv32VecHeapAdapterAir<NUM_READS, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
+        FieldExpressionCoreAir,
+    >;
 
-/// BLOCK_SIZE: how many cells do we read at a time, must be a power of 2.
-/// BLOCKS: how many blocks do we need to represent one input or output
-/// For example, for bls12_381, BLOCK_SIZE = 16, each element has 3 blocks and with two elements per
-/// input AffinePoint, BLOCKS = 6. For secp256k1, BLOCK_SIZE = 32, BLOCKS = 2.
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct EcAddNeChip<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>(
-    pub  VmChipWrapper<
+pub type WeierstrassChip<F, const NUM_READS: usize, const BLOCKS: usize, const BLOCK_SIZE: usize> =
+    VmChipWrapper<
         F,
-        Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>
-    EcAddNeChip<F, BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        config: ExprBuilderConfig,
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let expr = ec_add_ne_expr(config, range_checker.bus());
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![
-                Rv32WeierstrassOpcode::EC_ADD_NE as usize,
-                Rv32WeierstrassOpcode::SETUP_EC_ADD_NE as usize,
-            ],
-            vec![],
-            range_checker,
-            "EcAddNe",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct EcDoubleChip<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 1, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>
-    EcDoubleChip<F, BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 1, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        range_checker: SharedVariableRangeCheckerChip,
-        config: ExprBuilderConfig,
-        offset: usize,
-        a: BigUint,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let expr = ec_double_ne_expr(config, range_checker.bus(), a);
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![
-                Rv32WeierstrassOpcode::EC_DOUBLE as usize,
-                Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize,
-            ],
-            vec![],
-            range_checker,
-            "EcDouble",
-            true,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
+        FieldExpressionFiller<
+            Rv32VecHeapAdapterFiller<NUM_READS, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
+        >,
+    >;
diff --git a/extensions/ecc/circuit/src/weierstrass_chip/tests.rs b/extensions/ecc/circuit/src/weierstrass_chip/tests.rs
index 213918ec2e..c29b1ea006 100644
--- a/extensions/ecc/circuit/src/weierstrass_chip/tests.rs
+++ b/extensions/ecc/circuit/src/weierstrass_chip/tests.rs
@@ -1,24 +1,40 @@
-use std::str::FromStr;
+use std::{str::FromStr, sync::Arc};
 
+use halo2curves_axiom::secp256r1;
 use num_bigint::BigUint;
 use num_traits::{FromPrimitive, Num, Zero};
-use openvm_circuit::arch::testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
+use openvm_circuit::arch::{
+    testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
+    MatrixRecordArena,
+};
 use openvm_circuit_primitives::{
     bigint::utils::{secp256k1_coord_prime, secp256r1_coord_prime},
-    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
 };
 use openvm_ecc_transpiler::Rv32WeierstrassOpcode;
-use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-use openvm_mod_circuit_builder::{test_utils::biguint_to_limbs, ExprBuilderConfig, FieldExpr};
-use openvm_rv32_adapters::{rv32_write_heap_default, Rv32VecHeapAdapterChip};
+use openvm_instructions::{
+    instruction::Instruction,
+    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode, VmOpcode,
+};
+use openvm_mod_circuit_builder::{
+    test_utils::generate_random_biguint, utils::biguint_to_limbs_vec, ExprBuilderConfig,
+};
+use openvm_pairing_guest::bls12_381::BLS12_381_MODULUS;
 use openvm_stark_backend::p3_field::FieldAlgebra;
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{rngs::StdRng, Rng};
 
-use super::{EcAddNeChip, EcDoubleChip};
+use crate::{
+    get_ec_addne_air, get_ec_addne_chip, get_ec_addne_step, get_ec_double_air, get_ec_double_chip,
+    get_ec_double_step, EcDoubleExecutor, WeierstrassAir, WeierstrassChip,
+};
 
-const NUM_LIMBS: usize = 32;
 const LIMB_BITS: usize = 8;
-const BLOCK_SIZE: usize = 32;
+const MAX_INS_CAPACITY: usize = 128;
 type F = BabyBear;
 
 lazy_static::lazy_static! {
@@ -70,233 +86,642 @@ lazy_static::lazy_static! {
     };
 }
 
-fn prime_limbs(expr: &FieldExpr) -> Vec<BabyBear> {
-    expr.prime_limbs
-        .iter()
-        .map(|n| BabyBear::from_canonical_usize(*n))
-        .collect::<Vec<_>>()
+mod ec_addne_tests {
+    use num_traits::One;
+
+    use super::*;
+    use crate::EcAddNeExecutor;
+
+    type EcAddneHarness<const BLOCKS: usize, const BLOCK_SIZE: usize> = TestChipHarness<
+        F,
+        EcAddNeExecutor<BLOCKS, BLOCK_SIZE>,
+        WeierstrassAir<2, BLOCKS, BLOCK_SIZE>,
+        WeierstrassChip<F, 2, BLOCKS, BLOCK_SIZE>,
+        MatrixRecordArena<F>,
+    >;
+
+    fn create_test_addne_chips<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+        tester: &VmChipTestBuilder<F>,
+        config: ExprBuilderConfig,
+        offset: usize,
+    ) -> (
+        EcAddneHarness<BLOCKS, BLOCK_SIZE>,
+        (
+            BitwiseOperationLookupAir<RV32_CELL_BITS>,
+            SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+        ),
+    ) {
+        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+        let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+            bitwise_bus,
+        ));
+
+        let air = get_ec_addne_air::<BLOCKS, BLOCK_SIZE>(
+            tester.execution_bridge(),
+            tester.memory_bridge(),
+            config.clone(),
+            tester.range_checker().bus(),
+            bitwise_bus,
+            tester.address_bits(),
+            offset,
+        );
+        let executor = get_ec_addne_step::<BLOCKS, BLOCK_SIZE>(
+            config.clone(),
+            tester.range_checker().bus(),
+            tester.address_bits(),
+            offset,
+        );
+        let chip = get_ec_addne_chip::<F, BLOCKS, BLOCK_SIZE>(
+            config.clone(),
+            tester.memory_helper(),
+            tester.range_checker(),
+            bitwise_chip.clone(),
+            tester.address_bits(),
+        );
+
+        let harness = EcAddneHarness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+        (harness, (bitwise_chip.air, bitwise_chip))
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn set_and_execute_ec_addne<
+        const BLOCKS: usize,
+        const BLOCK_SIZE: usize,
+        const NUM_LIMBS: usize,
+    >(
+        tester: &mut VmChipTestBuilder<F>,
+        harness: &mut EcAddneHarness<BLOCKS, BLOCK_SIZE>,
+        rng: &mut StdRng,
+        modulus: &BigUint,
+        is_setup: bool,
+        offset: usize,
+        p1: Option<(BigUint, BigUint)>,
+        p2: Option<(BigUint, BigUint)>,
+    ) {
+        let (x1, y1, x2, y2, op_local) = if is_setup {
+            (
+                modulus.clone(),
+                BigUint::one(),
+                BigUint::one(),
+                BigUint::one(),
+                Rv32WeierstrassOpcode::SETUP_EC_ADD_NE as usize,
+            )
+        } else if let Some((x1, y1)) = p1 {
+            let (x2, y2) = p2.unwrap();
+            let x1 = x1 % modulus;
+            let y1 = y1 % modulus;
+            let x2 = x2 % modulus;
+            let y2 = y2 % modulus;
+            if rng.gen_bool(0.5) {
+                (x1, y1, x2, y2, Rv32WeierstrassOpcode::EC_ADD_NE as usize)
+            } else {
+                (x2, y2, x1, y1, Rv32WeierstrassOpcode::EC_ADD_NE as usize)
+            }
+        } else {
+            panic!("Generating random inputs generically is harder because the input points need to be on the curve.");
+        };
+
+        let ptr_as = RV32_REGISTER_AS as usize;
+        let data_as = RV32_MEMORY_AS as usize;
+
+        let rs1_ptr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+        let rs2_ptr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+        let rd_ptr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+
+        let p1_base_addr = gen_pointer(rng, BLOCK_SIZE) as u32;
+        let p2_base_addr = gen_pointer(rng, BLOCK_SIZE) as u32;
+        let result_base_addr = gen_pointer(rng, BLOCK_SIZE) as u32;
+
+        tester.write::<RV32_REGISTER_NUM_LIMBS>(
+            ptr_as,
+            rs1_ptr,
+            p1_base_addr.to_le_bytes().map(F::from_canonical_u8),
+        );
+        tester.write::<RV32_REGISTER_NUM_LIMBS>(
+            ptr_as,
+            rs2_ptr,
+            p2_base_addr.to_le_bytes().map(F::from_canonical_u8),
+        );
+        tester.write::<RV32_REGISTER_NUM_LIMBS>(
+            ptr_as,
+            rd_ptr,
+            result_base_addr.to_le_bytes().map(F::from_canonical_u8),
+        );
+
+        let x1_limbs: Vec<F> = biguint_to_limbs_vec(&x1, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+        let x2_limbs: Vec<F> = biguint_to_limbs_vec(&x2, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+        let y1_limbs: Vec<F> = biguint_to_limbs_vec(&y1, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+        let y2_limbs: Vec<F> = biguint_to_limbs_vec(&y2, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+
+        for i in (0..NUM_LIMBS).step_by(BLOCK_SIZE) {
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                p1_base_addr as usize + i,
+                x1_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                (p1_base_addr + NUM_LIMBS as u32) as usize + i,
+                y1_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                p2_base_addr as usize + i,
+                x2_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                (p2_base_addr + NUM_LIMBS as u32) as usize + i,
+                y2_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+        }
+
+        let instruction = Instruction::from_isize(
+            VmOpcode::from_usize(offset + op_local),
+            rd_ptr as isize,
+            rs1_ptr as isize,
+            rs2_ptr as isize,
+            ptr_as as isize,
+            data_as as isize,
+        );
+
+        tester.execute(harness, &instruction);
+    }
+
+    fn run_ec_addne_test<const BLOCKS: usize, const BLOCK_SIZE: usize, const NUM_LIMBS: usize>(
+        offset: usize,
+        modulus: BigUint,
+    ) {
+        let mut rng = create_seeded_rng();
+        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+        let config = ExprBuilderConfig {
+            modulus: modulus.clone(),
+            num_limbs: NUM_LIMBS,
+            limb_bits: LIMB_BITS,
+        };
+
+        let (mut harness, bitwise) =
+            create_test_addne_chips::<BLOCKS, BLOCK_SIZE>(&tester, config, offset);
+
+        set_and_execute_ec_addne::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            true,
+            offset,
+            None,
+            None,
+        );
+
+        set_and_execute_ec_addne::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            false,
+            offset,
+            Some(SampleEcPoints[0].clone()),
+            Some(SampleEcPoints[1].clone()),
+        );
+
+        set_and_execute_ec_addne::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            false,
+            offset,
+            Some(SampleEcPoints[2].clone()),
+            Some(SampleEcPoints[3].clone()),
+        );
+
+        let tester = tester
+            .build()
+            .load(harness)
+            .load_periphery(bitwise)
+            .finalize();
+
+        tester.simple_test().expect("Verification failed");
+    }
+
+    #[test]
+    fn test_ec_addne_2x32() {
+        run_ec_addne_test::<2, 32, 32>(
+            Rv32WeierstrassOpcode::CLASS_OFFSET,
+            secp256k1_coord_prime(),
+        );
+    }
+
+    ///////////////////////////////////////////////////////////////////////////////////////
+    /// SANITY TESTS
+    ///
+    /// Ensure that execute functions produce the correct results.
+    ///////////////////////////////////////////////////////////////////////////////////////
+    #[test]
+    fn ec_addne_sanity_test() {
+        let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+        let config = ExprBuilderConfig {
+            modulus: secp256k1_coord_prime(),
+            num_limbs: 32,
+            limb_bits: LIMB_BITS,
+        };
+
+        let executor = get_ec_addne_step::<2, 32>(
+            config,
+            tester.range_checker().bus(),
+            tester.address_bits(),
+            Rv32WeierstrassOpcode::CLASS_OFFSET,
+        );
+
+        let (p1_x, p1_y) = SampleEcPoints[0].clone();
+        let (p2_x, p2_y) = SampleEcPoints[1].clone();
+        assert_eq!(executor.expr.builder.num_variables, 3); // lambda, x3, y3
+        let r = executor
+            .expr
+            .execute(vec![p1_x, p1_y, p2_x, p2_y], vec![true]);
+
+        assert_eq!(r.len(), 3); // lambda, x3, y3
+        assert_eq!(r[1], SampleEcPoints[2].0);
+        assert_eq!(r[2], SampleEcPoints[2].1);
+
+        let (p1_x, p1_y) = SampleEcPoints[2].clone();
+        let (p2_x, p2_y) = SampleEcPoints[3].clone();
+        assert_eq!(executor.expr.builder.num_variables, 3); // lambda, x3, y3
+        let r = executor
+            .expr
+            .execute(vec![p1_x, p1_y, p2_x, p2_y], vec![true]);
+
+        assert_eq!(r.len(), 3); // lambda, x3, y3
+        assert_eq!(r[1], SampleEcPoints[4].0);
+        assert_eq!(r[2], SampleEcPoints[4].1);
+    }
 }
 
-#[test]
-fn test_add_ne() {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: secp256k1_coord_prime(),
-        num_limbs: NUM_LIMBS,
-        limb_bits: LIMB_BITS,
-    };
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let adapter = Rv32VecHeapAdapterChip::<F, 2, 2, 2, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-    let mut chip = EcAddNeChip::new(
-        adapter,
-        config,
-        Rv32WeierstrassOpcode::CLASS_OFFSET,
-        tester.range_checker(),
-        tester.offline_memory_mutex_arc(),
-    );
-    assert_eq!(chip.0.core.expr().builder.num_variables, 3); // lambda, x3, y3
-
-    let (p1_x, p1_y) = SampleEcPoints[0].clone();
-    let (p2_x, p2_y) = SampleEcPoints[1].clone();
-
-    let p1_x_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(p1_x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-    let p1_y_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(p1_y.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-    let p2_x_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(p2_x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-    let p2_y_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(p2_y.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-
-    let r = chip
-        .0
-        .core
-        .expr()
-        .execute(vec![p1_x, p1_y, p2_x, p2_y], vec![true]);
-    assert_eq!(r.len(), 3); // lambda, x3, y3
-    assert_eq!(r[1], SampleEcPoints[2].0);
-    assert_eq!(r[2], SampleEcPoints[2].1);
-
-    let prime_limbs: [BabyBear; NUM_LIMBS] = prime_limbs(chip.0.core.expr()).try_into().unwrap();
-    let mut one_limbs = [BabyBear::ONE; NUM_LIMBS];
-    one_limbs[0] = BabyBear::ONE;
-    let setup_instruction = rv32_write_heap_default(
-        &mut tester,
-        vec![prime_limbs, one_limbs], // inputs[0] = prime, others doesn't matter
-        vec![one_limbs, one_limbs],
-        chip.0.core.air.offset + Rv32WeierstrassOpcode::SETUP_EC_ADD_NE as usize,
-    );
-    tester.execute(&mut chip, &setup_instruction);
-
-    let instruction = rv32_write_heap_default(
-        &mut tester,
-        vec![p1_x_limbs, p1_y_limbs],
-        vec![p2_x_limbs, p2_y_limbs],
-        chip.0.core.air.offset + Rv32WeierstrassOpcode::EC_ADD_NE as usize,
-    );
-
-    tester.execute(&mut chip, &instruction);
-
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-
-    tester.simple_test().expect("Verification failed");
-}
+mod ec_double_tests {
+    use super::*;
+
+    type EcDoubleHarness<const BLOCKS: usize, const BLOCK_SIZE: usize> = TestChipHarness<
+        F,
+        EcDoubleExecutor<BLOCKS, BLOCK_SIZE>,
+        WeierstrassAir<1, BLOCKS, BLOCK_SIZE>,
+        WeierstrassChip<F, 1, BLOCKS, BLOCK_SIZE>,
+        MatrixRecordArena<F>,
+    >;
+
+    fn create_test_double_chips<const BLOCKS: usize, const BLOCK_SIZE: usize>(
+        tester: &VmChipTestBuilder<F>,
+        config: ExprBuilderConfig,
+        offset: usize,
+        a_biguint: BigUint,
+    ) -> (
+        EcDoubleHarness<BLOCKS, BLOCK_SIZE>,
+        (
+            BitwiseOperationLookupAir<RV32_CELL_BITS>,
+            SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+        ),
+    ) {
+        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+        let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+            bitwise_bus,
+        ));
+        let air = get_ec_double_air(
+            tester.execution_bridge(),
+            tester.memory_bridge(),
+            config.clone(),
+            tester.range_checker().bus(),
+            bitwise_bus,
+            tester.address_bits(),
+            offset,
+            a_biguint.clone(),
+        );
+        let executor = get_ec_double_step(
+            config.clone(),
+            tester.range_checker().bus(),
+            tester.address_bits(),
+            offset,
+            a_biguint.clone(),
+        );
+        let chip = get_ec_double_chip(
+            config.clone(),
+            tester.memory_helper(),
+            tester.range_checker(),
+            bitwise_chip.clone(),
+            tester.address_bits(),
+            a_biguint,
+        );
+        let harness = EcDoubleHarness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+        (harness, (bitwise_chip.air, bitwise_chip))
+    }
+
+    #[allow(clippy::too_many_arguments)]
+    fn set_and_execute_ec_double<
+        const BLOCKS: usize,
+        const BLOCK_SIZE: usize,
+        const NUM_LIMBS: usize,
+    >(
+        tester: &mut VmChipTestBuilder<F>,
+        harness: &mut EcDoubleHarness<BLOCKS, BLOCK_SIZE>,
+        rng: &mut StdRng,
+        modulus: &BigUint,
+        a_biguint: &BigUint,
+        is_setup: bool,
+        offset: usize,
+        x: Option<BigUint>,
+        y: Option<BigUint>,
+    ) {
+        let (x1, y1, op_local) = if is_setup {
+            (
+                modulus.clone(),
+                a_biguint.clone(),
+                Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize,
+            )
+        } else if let Some(x) = x {
+            let y = y.unwrap();
+            let x = x % modulus;
+            let y = y % modulus;
+            (x, y, Rv32WeierstrassOpcode::EC_DOUBLE as usize)
+        } else {
+            let x = generate_random_biguint(modulus);
+            let y = generate_random_biguint(modulus);
+
+            (x, y, Rv32WeierstrassOpcode::EC_DOUBLE as usize)
+        };
+
+        let ptr_as = RV32_REGISTER_AS as usize;
+        let data_as = RV32_MEMORY_AS as usize;
+
+        let rs1_ptr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+        let rd_ptr = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+
+        let p1_base_addr = gen_pointer(rng, BLOCK_SIZE) as u32;
+        let result_base_addr = gen_pointer(rng, BLOCK_SIZE) as u32;
+
+        tester.write::<RV32_REGISTER_NUM_LIMBS>(
+            ptr_as,
+            rs1_ptr,
+            p1_base_addr.to_le_bytes().map(F::from_canonical_u8),
+        );
+        tester.write::<RV32_REGISTER_NUM_LIMBS>(
+            ptr_as,
+            rd_ptr,
+            result_base_addr.to_le_bytes().map(F::from_canonical_u8),
+        );
+
+        let x1_limbs: Vec<F> = biguint_to_limbs_vec(&x1, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+        let y1_limbs: Vec<F> = biguint_to_limbs_vec(&y1, NUM_LIMBS)
+            .into_iter()
+            .map(F::from_canonical_u8)
+            .collect();
+
+        for i in (0..NUM_LIMBS).step_by(BLOCK_SIZE) {
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                p1_base_addr as usize + i,
+                x1_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+
+            tester.write::<BLOCK_SIZE>(
+                data_as,
+                (p1_base_addr + NUM_LIMBS as u32) as usize + i,
+                y1_limbs[i..i + BLOCK_SIZE].try_into().unwrap(),
+            );
+        }
+
+        let instruction = Instruction::from_isize(
+            VmOpcode::from_usize(offset + op_local),
+            rd_ptr as isize,
+            rs1_ptr as isize,
+            0,
+            ptr_as as isize,
+            data_as as isize,
+        );
+
+        tester.execute(harness, &instruction);
+    }
+
+    fn run_ec_double_test<const BLOCKS: usize, const BLOCK_SIZE: usize, const NUM_LIMBS: usize>(
+        offset: usize,
+        modulus: BigUint,
+        num_ops: usize,
+        a: BigUint,
+    ) {
+        let mut rng = create_seeded_rng();
+        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+        let config = ExprBuilderConfig {
+            modulus: modulus.clone(),
+            num_limbs: NUM_LIMBS,
+            limb_bits: LIMB_BITS,
+        };
+
+        let (mut harness, bitwise) =
+            create_test_double_chips::<BLOCKS, BLOCK_SIZE>(&tester, config, offset, a.clone());
+
+        for i in 0..num_ops {
+            set_and_execute_ec_double::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+                &mut tester,
+                &mut harness,
+                &mut rng,
+                &modulus,
+                &a,
+                i == 0,
+                offset,
+                None,
+                None,
+            );
+        }
+
+        set_and_execute_ec_double::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            &a,
+            false,
+            offset,
+            Some(SampleEcPoints[0].0.clone()),
+            Some(SampleEcPoints[0].1.clone()),
+        );
+
+        set_and_execute_ec_double::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            &a,
+            false,
+            offset,
+            Some(SampleEcPoints[1].0.clone()),
+            Some(SampleEcPoints[1].1.clone()),
+        );
+
+        // Testing data from: http://point-at-infinity.org/ecc/nisttv
+        let p1_x = BigUint::from_str_radix(
+            "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296",
+            16,
+        )
+        .unwrap();
+        let p1_y = BigUint::from_str_radix(
+            "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5",
+            16,
+        )
+        .unwrap();
 
-#[test]
-fn test_double() {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: secp256k1_coord_prime(),
-        num_limbs: NUM_LIMBS,
-        limb_bits: LIMB_BITS,
-    };
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let adapter = Rv32VecHeapAdapterChip::<F, 1, 2, 2, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-
-    let (p1_x, p1_y) = SampleEcPoints[1].clone();
-    let p1_x_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(p1_x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-    let p1_y_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(p1_y.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-
-    let mut chip = EcDoubleChip::new(
-        adapter,
-        tester.memory_controller().borrow().range_checker.clone(),
-        config,
-        Rv32WeierstrassOpcode::CLASS_OFFSET,
-        BigUint::zero(),
-        tester.offline_memory_mutex_arc(),
-    );
-    assert_eq!(chip.0.core.air.expr.builder.num_variables, 3); // lambda, x3, y3
-
-    let r = chip.0.core.air.expr.execute(vec![p1_x, p1_y], vec![true]);
-    assert_eq!(r.len(), 3); // lambda, x3, y3
-    assert_eq!(r[1], SampleEcPoints[3].0);
-    assert_eq!(r[2], SampleEcPoints[3].1);
-
-    let prime_limbs: [BabyBear; NUM_LIMBS] = prime_limbs(&chip.0.core.air.expr).try_into().unwrap();
-    let a_limbs = [BabyBear::ZERO; NUM_LIMBS];
-    let setup_instruction = rv32_write_heap_default(
-        &mut tester,
-        vec![prime_limbs, a_limbs], /* inputs[0] = prime, inputs[1] = a coeff of weierstrass
-                                     * equation */
-        vec![],
-        chip.0.core.air.offset + Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize,
-    );
-    tester.execute(&mut chip, &setup_instruction);
-
-    let instruction = rv32_write_heap_default(
-        &mut tester,
-        vec![p1_x_limbs, p1_y_limbs],
-        vec![],
-        chip.0.core.air.offset + Rv32WeierstrassOpcode::EC_DOUBLE as usize,
-    );
-
-    tester.execute(&mut chip, &instruction);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-
-    tester.simple_test().expect("Verification failed");
-}
+        set_and_execute_ec_double::<BLOCKS, BLOCK_SIZE, NUM_LIMBS>(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            &modulus,
+            &a,
+            false,
+            offset,
+            Some(p1_x),
+            Some(p1_y),
+        );
+
+        let tester = tester
+            .build()
+            .load(harness)
+            .load_periphery(bitwise)
+            .finalize();
+
+        tester.simple_test().expect("Verification failed");
+    }
+
+    #[test]
+    fn test_ec_double_2x32() {
+        run_ec_double_test::<2, 32, 32>(
+            Rv32WeierstrassOpcode::CLASS_OFFSET,
+            secp256k1_coord_prime(),
+            50,
+            BigUint::zero(),
+        );
+    }
+
+    #[test]
+    fn test_ec_double_2x32_nonzero_a_1() {
+        let coeff_a = (-secp256r1::Fp::from(3)).to_bytes();
+        let a = BigUint::from_bytes_le(&coeff_a);
+
+        run_ec_double_test::<2, 32, 32>(
+            Rv32WeierstrassOpcode::CLASS_OFFSET,
+            secp256r1_coord_prime(),
+            50,
+            a,
+        );
+    }
+
+    #[test]
+    fn test_ec_double_6x16() {
+        run_ec_double_test::<6, 16, 48>(
+            Rv32WeierstrassOpcode::CLASS_OFFSET,
+            BLS12_381_MODULUS.clone(),
+            50,
+            BigUint::zero(),
+        );
+    }
+
+    ///////////////////////////////////////////////////////////////////////////////////////
+    /// SANITY TESTS
+    ///
+    /// Ensure that execute functions produce the correct results.
+    ///////////////////////////////////////////////////////////////////////////////////////
+    #[test]
+    fn ec_double_sanity_test_sample_ec_points() {
+        let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+        let config = ExprBuilderConfig {
+            modulus: secp256k1_coord_prime(),
+            num_limbs: 32,
+            limb_bits: LIMB_BITS,
+        };
+
+        let executor = get_ec_double_step::<2, 32>(
+            config,
+            tester.range_checker().bus(),
+            tester.address_bits(),
+            Rv32WeierstrassOpcode::CLASS_OFFSET,
+            BigUint::zero(),
+        );
+
+        let (p1_x, p1_y) = SampleEcPoints[1].clone();
+
+        assert_eq!(executor.expr.builder.num_variables, 3); // lambda, x3, y3
+
+        let r = executor.expr.execute(vec![p1_x, p1_y], vec![true]);
+        assert_eq!(r.len(), 3); // lambda, x3, y3
+        assert_eq!(r[1], SampleEcPoints[3].0);
+        assert_eq!(r[2], SampleEcPoints[3].1);
+    }
+
+    #[test]
+    fn ec_double_sanity_test() {
+        let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
+        let config = ExprBuilderConfig {
+            modulus: secp256r1_coord_prime(),
+            num_limbs: 32,
+            limb_bits: LIMB_BITS,
+        };
+        let a = BigUint::from_str_radix(
+            "ffffffff00000001000000000000000000000000fffffffffffffffffffffffc",
+            16,
+        )
+        .unwrap();
 
-#[test]
-fn test_p256_double() {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: secp256r1_coord_prime(),
-        num_limbs: NUM_LIMBS,
-        limb_bits: LIMB_BITS,
-    };
-    let a = BigUint::from_str_radix(
-        "ffffffff00000001000000000000000000000000fffffffffffffffffffffffc",
-        16,
-    )
-    .unwrap();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let adapter = Rv32VecHeapAdapterChip::<F, 1, 2, 2, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-
-    // Testing data from: http://point-at-infinity.org/ecc/nisttv
-    let p1_x = BigUint::from_str_radix(
-        "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296",
-        16,
-    )
-    .unwrap();
-    let p1_y = BigUint::from_str_radix(
-        "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5",
-        16,
-    )
-    .unwrap();
-    let p1_x_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(p1_x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-    let p1_y_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(p1_y.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-
-    let mut chip = EcDoubleChip::new(
-        adapter,
-        tester.memory_controller().borrow().range_checker.clone(),
-        config,
-        Rv32WeierstrassOpcode::CLASS_OFFSET,
-        a.clone(),
-        tester.offline_memory_mutex_arc(),
-    );
-    assert_eq!(chip.0.core.air.expr.builder.num_variables, 3); // lambda, x3, y3
-
-    let r = chip.0.core.air.expr.execute(vec![p1_x, p1_y], vec![true]);
-    assert_eq!(r.len(), 3); // lambda, x3, y3
-    let expected_double_x = BigUint::from_str_radix(
-        "7CF27B188D034F7E8A52380304B51AC3C08969E277F21B35A60B48FC47669978",
-        16,
-    )
-    .unwrap();
-    let expected_double_y = BigUint::from_str_radix(
-        "07775510DB8ED040293D9AC69F7430DBBA7DADE63CE982299E04B79D227873D1",
-        16,
-    )
-    .unwrap();
-    assert_eq!(r[1], expected_double_x);
-    assert_eq!(r[2], expected_double_y);
-
-    let prime_limbs: [BabyBear; NUM_LIMBS] = prime_limbs(&chip.0.core.air.expr).try_into().unwrap();
-    let a_limbs =
-        biguint_to_limbs::<NUM_LIMBS>(a.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32);
-    let setup_instruction = rv32_write_heap_default(
-        &mut tester,
-        vec![prime_limbs, a_limbs], /* inputs[0] = prime, inputs[1] = a coeff of weierstrass
-                                     * equation */
-        vec![],
-        chip.0.core.air.offset + Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize,
-    );
-    tester.execute(&mut chip, &setup_instruction);
-
-    let instruction = rv32_write_heap_default(
-        &mut tester,
-        vec![p1_x_limbs, p1_y_limbs],
-        vec![],
-        chip.0.core.air.offset + Rv32WeierstrassOpcode::EC_DOUBLE as usize,
-    );
-
-    tester.execute(&mut chip, &instruction);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-
-    tester.simple_test().expect("Verification failed");
+        let executor = get_ec_double_step::<2, 32>(
+            config.clone(),
+            tester.range_checker().bus(),
+            tester.address_bits(),
+            Rv32WeierstrassOpcode::CLASS_OFFSET,
+            a.clone(),
+        );
+
+        // Testing data from: http://point-at-infinity.org/ecc/nisttv
+        let p1_x = BigUint::from_str_radix(
+            "6B17D1F2E12C4247F8BCE6E563A440F277037D812DEB33A0F4A13945D898C296",
+            16,
+        )
+        .unwrap();
+        let p1_y = BigUint::from_str_radix(
+            "4FE342E2FE1A7F9B8EE7EB4A7C0F9E162BCE33576B315ECECBB6406837BF51F5",
+            16,
+        )
+        .unwrap();
+
+        assert_eq!(executor.expr.builder.num_variables, 3); // lambda, x3, y3
+
+        let r = executor.expr.execute(vec![p1_x, p1_y], vec![true]);
+        assert_eq!(r.len(), 3); // lambda, x3, y3
+        let expected_double_x = BigUint::from_str_radix(
+            "7CF27B188D034F7E8A52380304B51AC3C08969E277F21B35A60B48FC47669978",
+            16,
+        )
+        .unwrap();
+        let expected_double_y = BigUint::from_str_radix(
+            "07775510DB8ED040293D9AC69F7430DBBA7DADE63CE982299E04B79D227873D1",
+            16,
+        )
+        .unwrap();
+        assert_eq!(r[1], expected_double_x);
+        assert_eq!(r[2], expected_double_y);
+    }
 }
diff --git a/extensions/ecc/circuit/src/weierstrass_extension.rs b/extensions/ecc/circuit/src/weierstrass_extension.rs
index a0fd9cabb4..5048584183 100644
--- a/extensions/ecc/circuit/src/weierstrass_extension.rs
+++ b/extensions/ecc/circuit/src/weierstrass_extension.rs
@@ -1,28 +1,43 @@
-use derive_more::derive::From;
+use std::sync::Arc;
+
 use hex_literal::hex;
 use lazy_static::lazy_static;
 use num_bigint::BigUint;
 use num_traits::{FromPrimitive, Zero};
 use once_cell::sync::Lazy;
 use openvm_circuit::{
-    arch::{SystemPort, VmExtension, VmInventory, VmInventoryBuilder, VmInventoryError},
-    system::phantom::PhantomChip,
+    arch::{
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutionBridge,
+        ExecutorInventoryBuilder, ExecutorInventoryError, RowMajorMatrixArena, VmCircuitExtension,
+        VmExecutionExtension, VmProverExtension,
+    },
+    system::{memory::SharedMemoryHelper, SystemPort},
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor};
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
+    var_range::VariableRangeCheckerBus,
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
 use openvm_ecc_transpiler::Rv32WeierstrassOpcode;
 use openvm_instructions::{LocalOpcode, VmOpcode};
 use openvm_mod_circuit_builder::ExprBuilderConfig;
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
 use serde::{Deserialize, Serialize};
 use serde_with::{serde_as, DisplayFromStr};
 use strum::EnumCount;
 
-use super::{EcAddNeChip, EcDoubleChip};
+use crate::{
+    get_ec_addne_air, get_ec_addne_chip, get_ec_addne_step, get_ec_double_air, get_ec_double_chip,
+    get_ec_double_step, EcAddNeExecutor, EcDoubleExecutor, EccCpuProverExt, WeierstrassAir,
+};
 
 #[serde_as]
 #[derive(Clone, Debug, derive_new::new, Serialize, Deserialize)]
@@ -77,147 +92,97 @@ impl WeierstrassExtension {
     }
 }
 
-#[derive(Chip, ChipUsageGetter, InstructionExecutor, AnyEnum)]
-pub enum WeierstrassExtensionExecutor<F: PrimeField32> {
+#[derive(Clone, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum WeierstrassExtensionExecutor {
     // 32 limbs prime
-    EcAddNeRv32_32(EcAddNeChip<F, 2, 32>),
-    EcDoubleRv32_32(EcDoubleChip<F, 2, 32>),
+    EcAddNeRv32_32(EcAddNeExecutor<2, 32>),
+    EcDoubleRv32_32(EcDoubleExecutor<2, 32>),
     // 48 limbs prime
-    EcAddNeRv32_48(EcAddNeChip<F, 6, 16>),
-    EcDoubleRv32_48(EcDoubleChip<F, 6, 16>),
+    EcAddNeRv32_48(EcAddNeExecutor<6, 16>),
+    EcDoubleRv32_48(EcDoubleExecutor<6, 16>),
 }
 
-#[derive(ChipUsageGetter, Chip, AnyEnum, From)]
-pub enum WeierstrassExtensionPeriphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    Phantom(PhantomChip<F>),
-}
-
-impl<F: PrimeField32> VmExtension<F> for WeierstrassExtension {
-    type Executor = WeierstrassExtensionExecutor<F>;
-    type Periphery = WeierstrassExtensionPeriphery<F>;
+impl<F: PrimeField32> VmExecutionExtension<F> for WeierstrassExtension {
+    type Executor = WeierstrassExtensionExecutor;
 
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
-        let SystemPort {
-            execution_bus,
-            program_bus,
-            memory_bridge,
-        } = builder.system_port();
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
-        };
-        let offline_memory = builder.system_base().offline_memory();
-        let range_checker = builder.system_base().range_checker_chip.clone();
-        let pointer_bits = builder.system_config().memory_config.pointer_max_bits;
-        let ec_add_ne_opcodes = (Rv32WeierstrassOpcode::EC_ADD_NE as usize)
-            ..=(Rv32WeierstrassOpcode::SETUP_EC_ADD_NE as usize);
-        let ec_double_opcodes = (Rv32WeierstrassOpcode::EC_DOUBLE as usize)
-            ..=(Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize);
-
+        inventory: &mut ExecutorInventoryBuilder<F, WeierstrassExtensionExecutor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
+        // TODO: somehow get the range checker bus from `ExecutorInventory`
+        let dummy_range_checker_bus = VariableRangeCheckerBus::new(u16::MAX, 16);
         for (i, curve) in self.supported_curves.iter().enumerate() {
             let start_offset =
                 Rv32WeierstrassOpcode::CLASS_OFFSET + i * Rv32WeierstrassOpcode::COUNT;
             let bytes = curve.modulus.bits().div_ceil(8);
-            let config32 = ExprBuilderConfig {
-                modulus: curve.modulus.clone(),
-                num_limbs: 32,
-                limb_bits: 8,
-            };
-            let config48 = ExprBuilderConfig {
-                modulus: curve.modulus.clone(),
-                num_limbs: 48,
-                limb_bits: 8,
-            };
+
             if bytes <= 32 {
-                let add_ne_chip = EcAddNeChip::new(
-                    Rv32VecHeapAdapterChip::<F, 2, 2, 2, 32, 32>::new(
-                        execution_bus,
-                        program_bus,
-                        memory_bridge,
-                        pointer_bits,
-                        bitwise_lu_chip.clone(),
-                    ),
-                    config32.clone(),
+                let config = ExprBuilderConfig {
+                    modulus: curve.modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+                let addne = get_ec_addne_step(
+                    config.clone(),
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    WeierstrassExtensionExecutor::EcAddNeRv32_32(add_ne_chip),
-                    ec_add_ne_opcodes
-                        .clone()
+                    WeierstrassExtensionExecutor::EcAddNeRv32_32(addne),
+                    ((Rv32WeierstrassOpcode::EC_ADD_NE as usize)
+                        ..=(Rv32WeierstrassOpcode::SETUP_EC_ADD_NE as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
-                let double_chip = EcDoubleChip::new(
-                    Rv32VecHeapAdapterChip::<F, 1, 2, 2, 32, 32>::new(
-                        execution_bus,
-                        program_bus,
-                        memory_bridge,
-                        pointer_bits,
-                        bitwise_lu_chip.clone(),
-                    ),
-                    range_checker.clone(),
-                    config32.clone(),
+
+                let double = get_ec_double_step(
+                    config,
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
                     curve.a.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    WeierstrassExtensionExecutor::EcDoubleRv32_32(double_chip),
-                    ec_double_opcodes
-                        .clone()
+                    WeierstrassExtensionExecutor::EcDoubleRv32_32(double),
+                    ((Rv32WeierstrassOpcode::EC_DOUBLE as usize)
+                        ..=(Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
             } else if bytes <= 48 {
-                let add_ne_chip = EcAddNeChip::new(
-                    Rv32VecHeapAdapterChip::<F, 2, 6, 6, 16, 16>::new(
-                        execution_bus,
-                        program_bus,
-                        memory_bridge,
-                        pointer_bits,
-                        bitwise_lu_chip.clone(),
-                    ),
-                    config48.clone(),
+                let config = ExprBuilderConfig {
+                    modulus: curve.modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+                let addne = get_ec_addne_step(
+                    config.clone(),
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
-                    range_checker.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    WeierstrassExtensionExecutor::EcAddNeRv32_48(add_ne_chip),
-                    ec_add_ne_opcodes
-                        .clone()
+                    WeierstrassExtensionExecutor::EcAddNeRv32_48(addne),
+                    ((Rv32WeierstrassOpcode::EC_ADD_NE as usize)
+                        ..=(Rv32WeierstrassOpcode::SETUP_EC_ADD_NE as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
-                let double_chip = EcDoubleChip::new(
-                    Rv32VecHeapAdapterChip::<F, 1, 6, 6, 16, 16>::new(
-                        execution_bus,
-                        program_bus,
-                        memory_bridge,
-                        pointer_bits,
-                        bitwise_lu_chip.clone(),
-                    ),
-                    range_checker.clone(),
-                    config48.clone(),
+
+                let double = get_ec_double_step(
+                    config,
+                    dummy_range_checker_bus,
+                    pointer_max_bits,
                     start_offset,
                     curve.a.clone(),
-                    offline_memory.clone(),
                 );
+
                 inventory.add_executor(
-                    WeierstrassExtensionExecutor::EcDoubleRv32_48(double_chip),
-                    ec_double_opcodes
-                        .clone()
+                    WeierstrassExtensionExecutor::EcDoubleRv32_48(double),
+                    ((Rv32WeierstrassOpcode::EC_DOUBLE as usize)
+                        ..=(Rv32WeierstrassOpcode::SETUP_EC_DOUBLE as usize))
                         .map(|x| VmOpcode::from_usize(x + start_offset)),
                 )?;
             } else {
@@ -225,7 +190,200 @@ impl<F: PrimeField32> VmExtension<F> for WeierstrassExtension {
             }
         }
 
-        Ok(inventory)
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for WeierstrassExtension {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
+        let SystemPort {
+            execution_bus,
+            program_bus,
+            memory_bridge,
+        } = inventory.system().port();
+
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let range_checker_bus = inventory.range_checker().bus;
+        let pointer_max_bits = inventory.pointer_max_bits();
+
+        let bitwise_lu = {
+            // A trick to get around Rust's borrow rules
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
+        };
+        for (i, curve) in self.supported_curves.iter().enumerate() {
+            let start_offset =
+                Rv32WeierstrassOpcode::CLASS_OFFSET + i * Rv32WeierstrassOpcode::COUNT;
+            let bytes = curve.modulus.bits().div_ceil(8);
+
+            if bytes <= 32 {
+                let config = ExprBuilderConfig {
+                    modulus: curve.modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+
+                let addne = get_ec_addne_air::<2, 32>(
+                    exec_bridge,
+                    memory_bridge,
+                    config.clone(),
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(addne);
+
+                let double = get_ec_double_air::<2, 32>(
+                    exec_bridge,
+                    memory_bridge,
+                    config,
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                    curve.a.clone(),
+                );
+                inventory.add_air(double);
+            } else if bytes <= 48 {
+                let config = ExprBuilderConfig {
+                    modulus: curve.modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+
+                let addne = get_ec_addne_air::<6, 16>(
+                    exec_bridge,
+                    memory_bridge,
+                    config.clone(),
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                );
+                inventory.add_air(addne);
+
+                let double = get_ec_double_air::<6, 16>(
+                    exec_bridge,
+                    memory_bridge,
+                    config,
+                    range_checker_bus,
+                    bitwise_lu,
+                    pointer_max_bits,
+                    start_offset,
+                    curve.a.clone(),
+                );
+                inventory.add_air(double);
+            } else {
+                panic!("Modulus too large");
+            }
+        }
+
+        Ok(())
+    }
+}
+
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, WeierstrassExtension> for EccCpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        extension: &WeierstrassExtension,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let pointer_max_bits = inventory.airs().pointer_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+        for curve in extension.supported_curves.iter() {
+            let bytes = curve.modulus.bits().div_ceil(8);
+
+            if bytes <= 32 {
+                let config = ExprBuilderConfig {
+                    modulus: curve.modulus.clone(),
+                    num_limbs: 32,
+                    limb_bits: 8,
+                };
+
+                inventory.next_air::<WeierstrassAir<2, 2, 32>>()?;
+                let addne = get_ec_addne_chip::<Val<SC>, 2, 32>(
+                    config.clone(),
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(addne);
+
+                inventory.next_air::<WeierstrassAir<1, 2, 32>>()?;
+                let double = get_ec_double_chip::<Val<SC>, 2, 32>(
+                    config,
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                    curve.a.clone(),
+                );
+                inventory.add_executor_chip(double);
+            } else if bytes <= 48 {
+                let config = ExprBuilderConfig {
+                    modulus: curve.modulus.clone(),
+                    num_limbs: 48,
+                    limb_bits: 8,
+                };
+
+                inventory.next_air::<WeierstrassAir<2, 6, 16>>()?;
+                let addne = get_ec_addne_chip::<Val<SC>, 6, 16>(
+                    config.clone(),
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                );
+                inventory.add_executor_chip(addne);
+
+                inventory.next_air::<WeierstrassAir<1, 6, 16>>()?;
+                let double = get_ec_double_chip::<Val<SC>, 6, 16>(
+                    config,
+                    mem_helper.clone(),
+                    range_checker.clone(),
+                    bitwise_lu.clone(),
+                    pointer_max_bits,
+                    curve.a.clone(),
+                );
+                inventory.add_executor_chip(double);
+            } else {
+                panic!("Modulus too large");
+            }
+        }
+
+        Ok(())
     }
 }
 
diff --git a/extensions/ecc/tests/src/lib.rs b/extensions/ecc/tests/src/lib.rs
index 1bd01eb936..7740c9e155 100644
--- a/extensions/ecc/tests/src/lib.rs
+++ b/extensions/ecc/tests/src/lib.rs
@@ -11,15 +11,18 @@ mod tests {
     use openvm_algebra_transpiler::ModularTranspilerExtension;
     use openvm_circuit::{
         arch::instructions::exe::VmExe,
-        utils::{air_test, air_test_with_min_segments},
+        utils::{air_test, air_test_with_min_segments, test_system_config},
+    };
+    use openvm_ecc_circuit::{
+        CurveConfig, Rv32WeierstrassConfig, Rv32WeierstrassCpuBuilder, P256_CONFIG,
+        SECP256K1_CONFIG,
     };
-    use openvm_ecc_circuit::{CurveConfig, Rv32WeierstrassConfig, P256_CONFIG, SECP256K1_CONFIG};
     use openvm_ecc_transpiler::EccTranspilerExtension;
     use openvm_rv32im_transpiler::{
         Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
     };
     use openvm_sdk::{
-        config::{AppConfig, SdkVmConfig},
+        config::{AppConfig, SdkVmConfig, SdkVmCpuBuilder, TranspilerConfig},
         StdIn,
     };
     use openvm_stark_backend::p3_field::FieldAlgebra;
@@ -35,9 +38,16 @@ mod tests {
 
     type F = BabyBear;
 
+    #[cfg(test)]
+    fn test_rv32weierstrass_config(curves: Vec<CurveConfig>) -> Rv32WeierstrassConfig {
+        let mut config = Rv32WeierstrassConfig::new(curves);
+        *config.as_mut() = test_system_config();
+        config
+    }
+
     #[test]
     fn test_ec() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![SECP256K1_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![SECP256K1_CONFIG.clone()]);
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!(),
             "ec",
@@ -53,13 +63,13 @@ mod tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
-    fn test_ec_nonzero_a() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![P256_CONFIG.clone()]);
+    fn test_nonzero_a() -> Result<()> {
+        let config = test_rv32weierstrass_config(vec![P256_CONFIG.clone()]);
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!(),
             "ec_nonzero_a",
@@ -75,14 +85,14 @@ mod tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
-    fn test_ec_two_curves() -> Result<()> {
+    fn test_two_curves() -> Result<()> {
         let config =
-            Rv32WeierstrassConfig::new(vec![SECP256K1_CONFIG.clone(), P256_CONFIG.clone()]);
+            test_rv32weierstrass_config(vec![SECP256K1_CONFIG.clone(), P256_CONFIG.clone()]);
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!(),
             "ec_two_curves",
@@ -98,7 +108,7 @@ mod tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
@@ -106,8 +116,7 @@ mod tests {
     fn test_decompress() -> Result<()> {
         use halo2curves_axiom::{group::Curve, secp256k1::Secp256k1Affine};
 
-        let config =
-            Rv32WeierstrassConfig::new(vec![SECP256K1_CONFIG.clone(),
+        let config = test_rv32weierstrass_config(vec![SECP256K1_CONFIG.clone(),
                 CurveConfig {
                     struct_name: "CurvePoint5mod8".to_string(),
                     modulus: BigUint::from_str("115792089237316195423570985008687907853269984665640564039457584007913129639501")
@@ -165,7 +174,13 @@ mod tests {
             .into_iter()
             .map(FieldAlgebra::from_canonical_u8)
             .collect();
-        air_test_with_min_segments(config, openvm_exe, vec![coords], 1);
+        air_test_with_min_segments(
+            Rv32WeierstrassCpuBuilder,
+            config,
+            openvm_exe,
+            vec![coords],
+            1,
+        );
         Ok(())
     }
 
@@ -182,7 +197,7 @@ mod tests {
             &config,
         )?;
         let openvm_exe = VmExe::from_elf(elf, config.transpiler())?;
-        air_test(config, openvm_exe);
+        air_test(SdkVmCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
@@ -200,7 +215,7 @@ mod tests {
         let openvm_exe = VmExe::from_elf(elf, config.transpiler())?;
         let mut input = StdIn::default();
         input.write(&P256_RECOVERY_TEST_VECTORS.to_vec());
-        air_test_with_min_segments(config, openvm_exe, input, 1);
+        air_test_with_min_segments(SdkVmCpuBuilder, config, openvm_exe, input, 1);
         Ok(())
     }
 
@@ -218,7 +233,7 @@ mod tests {
         let openvm_exe = VmExe::from_elf(elf, config.transpiler())?;
         let mut input = StdIn::default();
         input.write(&K256_RECOVERY_TEST_VECTORS.to_vec());
-        air_test_with_min_segments(config, openvm_exe, input, 1);
+        air_test_with_min_segments(SdkVmCpuBuilder, config, openvm_exe, input, 1);
         Ok(())
     }
 
@@ -236,7 +251,7 @@ mod tests {
         let openvm_exe = VmExe::from_elf(elf, config.transpiler())?;
         let mut input = StdIn::default();
         input.write(&k256_sec1_decoding_test_vectors());
-        air_test_with_min_segments(config, openvm_exe, input, 1);
+        air_test_with_min_segments(SdkVmCpuBuilder, config, openvm_exe, input, 1);
         Ok(())
     }
 
@@ -261,7 +276,7 @@ mod tests {
         )
         .unwrap();
         let config =
-            Rv32WeierstrassConfig::new(vec![SECP256K1_CONFIG.clone(), P256_CONFIG.clone()]);
-        air_test(config, openvm_exe);
+            test_rv32weierstrass_config(vec![SECP256K1_CONFIG.clone(), P256_CONFIG.clone()]);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
     }
 }
diff --git a/extensions/keccak256/circuit/Cargo.toml b/extensions/keccak256/circuit/Cargo.toml
index 941303ab39..2299a0599a 100644
--- a/extensions/keccak256/circuit/Cargo.toml
+++ b/extensions/keccak256/circuit/Cargo.toml
@@ -23,12 +23,10 @@ p3-keccak-air = { workspace = true }
 strum.workspace = true
 tiny-keccak.workspace = true
 itertools.workspace = true
-tracing.workspace = true
 derive-new.workspace = true
 derive_more = { workspace = true, features = ["from"] }
 rand.workspace = true
 serde.workspace = true
-serde-big-array.workspace = true
 
 [dev-dependencies]
 openvm-stark-sdk = { workspace = true }
diff --git a/extensions/keccak256/circuit/src/execution.rs b/extensions/keccak256/circuit/src/execution.rs
new file mode 100644
index 0000000000..b095fec4c4
--- /dev/null
+++ b/extensions/keccak256/circuit/src/execution.rs
@@ -0,0 +1,154 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_keccak256_transpiler::Rv32KeccakOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+use p3_keccak_air::NUM_ROUNDS;
+
+use super::{KeccakVmExecutor, KECCAK_WORD_SIZE};
+use crate::utils::{keccak256, num_keccak_f};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct KeccakPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl KeccakVmExecutor {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut KeccakPreCompute,
+    ) -> Result<(), StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = KeccakPreCompute {
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            c: c.as_canonical_u32() as u8,
+        };
+        assert_eq!(&Rv32KeccakOpcode::KECCAK256.global_opcode(), opcode);
+        Ok(())
+    }
+}
+
+impl<F: PrimeField32> Executor<F> for KeccakVmExecutor {
+    fn pre_compute_size(&self) -> usize {
+        size_of::<KeccakPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut KeccakPreCompute = data.borrow_mut();
+        self.pre_compute_impl(pc, inst, data)?;
+        Ok(execute_e1_impl::<_, _>)
+    }
+}
+
+impl<F: PrimeField32> MeteredExecutor<F> for KeccakVmExecutor {
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<KeccakPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<KeccakPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        self.pre_compute_impl(pc, inst, &mut data.data)?;
+        Ok(execute_e2_impl::<_, _>)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_E1: bool>(
+    pre_compute: &KeccakPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) -> u32 {
+    let dst = vm_state.vm_read(RV32_REGISTER_AS, pre_compute.a as u32);
+    let src = vm_state.vm_read(RV32_REGISTER_AS, pre_compute.b as u32);
+    let len = vm_state.vm_read(RV32_REGISTER_AS, pre_compute.c as u32);
+    let dst_u32 = u32::from_le_bytes(dst);
+    let src_u32 = u32::from_le_bytes(src);
+    let len_u32 = u32::from_le_bytes(len);
+
+    let (output, height) = if IS_E1 {
+        // SAFETY: RV32_MEMORY_AS is memory address space of type u8
+        let message = vm_state.vm_read_slice(RV32_MEMORY_AS, src_u32, len_u32 as usize);
+        let output = keccak256(message);
+        (output, 0)
+    } else {
+        let num_reads = (len_u32 as usize).div_ceil(KECCAK_WORD_SIZE);
+        let message: Vec<_> = (0..num_reads)
+            .flat_map(|i| {
+                vm_state.vm_read::<u8, KECCAK_WORD_SIZE>(
+                    RV32_MEMORY_AS,
+                    src_u32 + (i * KECCAK_WORD_SIZE) as u32,
+                )
+            })
+            .collect();
+        let output = keccak256(&message[..len_u32 as usize]);
+        let height = (num_keccak_f(len_u32 as usize) * NUM_ROUNDS) as u32;
+        (output, height)
+    };
+    vm_state.vm_write(RV32_MEMORY_AS, dst_u32, &output);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+
+    height
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &KeccakPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, true>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<KeccakPreCompute> = pre_compute.borrow();
+    let height = execute_e12_impl::<F, CTX, false>(&pre_compute.data, vm_state);
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, height);
+}
diff --git a/extensions/keccak256/circuit/src/extension.rs b/extensions/keccak256/circuit/src/extension.rs
index 5993f69eda..b06c2447d6 100644
--- a/extensions/keccak256/circuit/src/extension.rs
+++ b/extensions/keccak256/circuit/src/extension.rs
@@ -1,20 +1,32 @@
+use std::{result::Result, sync::Arc};
+
 use derive_more::derive::From;
 use openvm_circuit::{
     arch::{
-        InitFileGenerator, SystemConfig, SystemPort, VmExtension, VmInventory, VmInventoryBuilder,
-        VmInventoryError,
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError,
+        ExecutorInventoryBuilder, ExecutorInventoryError, InitFileGenerator, MatrixRecordArena,
+        RowMajorMatrixArena, SystemConfig, VmBuilder, VmChipComplex, VmCircuitExtension,
+        VmExecutionExtension, VmProverExtension,
+    },
+    system::{
+        memory::SharedMemoryHelper, SystemChipInventory, SystemCpuBuilder, SystemExecutor,
+        SystemPort,
     },
-    system::phantom::PhantomChip,
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor, VmConfig};
-use openvm_circuit_primitives::bitwise_op_lookup::BitwiseOperationLookupBus;
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor, VmConfig};
+use openvm_circuit_primitives::bitwise_op_lookup::{
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+};
 use openvm_instructions::*;
 use openvm_rv32im_circuit::{
-    Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32Io, Rv32IoExecutor, Rv32IoPeriphery, Rv32M,
-    Rv32MExecutor, Rv32MPeriphery,
+    Rv32I, Rv32IExecutor, Rv32ImCpuProverExt, Rv32Io, Rv32IoExecutor, Rv32M, Rv32MExecutor,
 };
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use openvm_stark_sdk::engine::StarkEngine;
 use serde::{Deserialize, Serialize};
 use strum::IntoEnumIterator;
 
@@ -22,7 +34,7 @@ use crate::*;
 
 #[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
 pub struct Keccak256Rv32Config {
-    #[system]
+    #[config(executor = "SystemExecutor<F>")]
     pub system: SystemConfig,
     #[extension]
     pub rv32i: Rv32I,
@@ -37,7 +49,7 @@ pub struct Keccak256Rv32Config {
 impl Default for Keccak256Rv32Config {
     fn default() -> Self {
         Self {
-            system: SystemConfig::default().with_continuations(),
+            system: SystemConfig::default(),
             rv32i: Rv32I,
             rv32m: Rv32M::default(),
             io: Rv32Io,
@@ -49,62 +61,146 @@ impl Default for Keccak256Rv32Config {
 // Default implementation uses no init file
 impl InitFileGenerator for Keccak256Rv32Config {}
 
-#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
-pub struct Keccak256;
+#[derive(Clone)]
+pub struct Keccak256Rv32CpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Keccak256Rv32CpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Keccak256Rv32Config;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
 
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum Keccak256Executor<F: PrimeField32> {
-    Keccak256(KeccakVmChip<F>),
+    fn create_chip_complex(
+        &self,
+        config: &Keccak256Rv32Config,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32i, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32m, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.io, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &Keccak256CpuProverExt,
+            &config.keccak,
+            inventory,
+        )?;
+        Ok(chip_complex)
+    }
 }
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum Keccak256Periphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    Phantom(PhantomChip<F>),
+// =================================== VM Extension Implementation =================================
+#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
+pub struct Keccak256;
+
+#[derive(Clone, Copy, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum Keccak256Executor {
+    Keccak256(KeccakVmExecutor),
 }
 
-impl<F: PrimeField32> VmExtension<F> for Keccak256 {
-    type Executor = Keccak256Executor<F>;
-    type Periphery = Keccak256Periphery<F>;
+impl<F> VmExecutionExtension<F> for Keccak256 {
+    type Executor = Keccak256Executor;
 
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
+        inventory: &mut ExecutorInventoryBuilder<F, Keccak256Executor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
+        let keccak_step = KeccakVmExecutor::new(Rv32KeccakOpcode::CLASS_OFFSET, pointer_max_bits);
+        inventory.add_executor(
+            keccak_step,
+            Rv32KeccakOpcode::iter().map(|x| x.global_opcode()),
+        )?;
+
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Keccak256 {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
         let SystemPort {
             execution_bus,
             program_bus,
             memory_bridge,
-        } = builder.system_port();
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
+        } = inventory.system().port();
+
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let pointer_max_bits = inventory.pointer_max_bits();
+
+        let bitwise_lu = {
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
         };
-        let offline_memory = builder.system_base().offline_memory();
-        let address_bits = builder.system_config().memory_config.pointer_max_bits;
 
-        let keccak_chip = KeccakVmChip::new(
-            execution_bus,
-            program_bus,
+        let keccak = KeccakVmAir::new(
+            exec_bridge,
             memory_bridge,
-            address_bits,
-            bitwise_lu_chip,
+            bitwise_lu,
+            pointer_max_bits,
             Rv32KeccakOpcode::CLASS_OFFSET,
-            offline_memory,
         );
-        inventory.add_executor(
-            keccak_chip,
-            Rv32KeccakOpcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_air(keccak);
+
+        Ok(())
+    }
+}
+
+pub struct Keccak256CpuProverExt;
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, Keccak256> for Keccak256CpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        _: &Keccak256,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+        let pointer_max_bits = inventory.airs().pointer_max_bits();
+
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+
+        inventory.next_air::<KeccakVmAir>()?;
+        let keccak = KeccakVmChip::new(
+            KeccakVmFiller::new(bitwise_lu, pointer_max_bits),
+            mem_helper,
+        );
+        inventory.add_executor_chip(keccak);
 
-        Ok(inventory)
+        Ok(())
     }
 }
diff --git a/extensions/keccak256/circuit/src/lib.rs b/extensions/keccak256/circuit/src/lib.rs
index c9fd1c9f5a..13bd7b27db 100644
--- a/extensions/keccak256/circuit/src/lib.rs
+++ b/extensions/keccak256/circuit/src/lib.rs
@@ -1,42 +1,21 @@
 //! Stateful keccak256 hasher. Handles full keccak sponge (padding, absorb, keccak-f) on
 //! variable length inputs read from VM memory.
-use std::{
-    array::from_fn,
-    cmp::min,
-    sync::{Arc, Mutex},
-};
 
 use openvm_circuit_primitives::bitwise_op_lookup::SharedBitwiseOperationLookupChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
-use tiny_keccak::{Hasher, Keccak};
-use utils::num_keccak_f;
 
 pub mod air;
 pub mod columns;
+pub mod execution;
 pub mod trace;
 pub mod utils;
 
 mod extension;
-pub use extension::*;
-
 #[cfg(test)]
 mod tests;
-
 pub use air::KeccakVmAir;
-use openvm_circuit::{
-    arch::{ExecutionBridge, ExecutionBus, ExecutionError, ExecutionState, InstructionExecutor},
-    system::{
-        memory::{offline_checker::MemoryBridge, MemoryController, OfflineMemory, RecordId},
-        program::ProgramBus,
-    },
-};
-use openvm_instructions::{
-    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_NUM_LIMBS, LocalOpcode,
-};
+pub use extension::*;
+use openvm_circuit::arch::*;
 use openvm_keccak256_transpiler::Rv32KeccakOpcode;
-use openvm_rv32im_circuit::adapters::read_rv32_register;
 
 // ==== Constants for register/memory adapter ====
 /// Register reads to get dst, src, len
@@ -69,218 +48,16 @@ pub const KECCAK_DIGEST_BYTES: usize = 32;
 /// Number of 64-bit digest limbs.
 pub const KECCAK_DIGEST_U64S: usize = KECCAK_DIGEST_BYTES / 8;
 
-pub struct KeccakVmChip<F: PrimeField32> {
-    pub air: KeccakVmAir,
-    /// IO and memory data necessary for each opcode call
-    pub records: Vec<KeccakRecord<F>>,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
-
-    offset: usize,
-
-    offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-}
-
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct KeccakRecord<F> {
-    pub pc: F,
-    pub dst_read: RecordId,
-    pub src_read: RecordId,
-    pub len_read: RecordId,
-    pub input_blocks: Vec<KeccakInputBlock>,
-    pub digest_writes: [RecordId; KECCAK_DIGEST_WRITES],
-}
+pub type KeccakVmChip<F> = VmChipWrapper<F, KeccakVmFiller>;
 
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct KeccakInputBlock {
-    /// Memory reads for non-padding bytes in this block. Length is at most [KECCAK_RATE_BYTES /
-    /// KECCAK_WORD_SIZE].
-    pub reads: Vec<RecordId>,
-    /// Index in `reads` of the memory read for < KECCAK_WORD_SIZE bytes, if any.
-    pub partial_read_idx: Option<usize>,
-    /// Bytes with padding. Can be derived from `bytes_read` but we store for convenience.
-    #[serde(with = "BigArray")]
-    pub padded_bytes: [u8; KECCAK_RATE_BYTES],
-    pub remaining_len: usize,
-    pub src: usize,
-    pub is_new_start: bool,
+#[derive(derive_new::new, Clone, Copy)]
+pub struct KeccakVmExecutor {
+    pub offset: usize,
+    pub pointer_max_bits: usize,
 }
 
-impl<F: PrimeField32> KeccakVmChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        address_bits: usize,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
-        offset: usize,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        Self {
-            air: KeccakVmAir::new(
-                ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                bitwise_lookup_chip.bus(),
-                address_bits,
-                offset,
-            ),
-            bitwise_lookup_chip,
-            records: Vec::new(),
-            offset,
-            offline_memory,
-        }
-    }
-}
-
-impl<F: PrimeField32> InstructionExecutor<F> for KeccakVmChip<F> {
-    fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>, ExecutionError> {
-        let &Instruction {
-            opcode,
-            a,
-            b,
-            c,
-            d,
-            e,
-            ..
-        } = instruction;
-        let local_opcode = Rv32KeccakOpcode::from_usize(opcode.local_opcode_idx(self.offset));
-        debug_assert_eq!(local_opcode, Rv32KeccakOpcode::KECCAK256);
-
-        let mut timestamp_delta = 3;
-        let (dst_read, dst) = read_rv32_register(memory, d, a);
-        let (src_read, src) = read_rv32_register(memory, d, b);
-        let (len_read, len) = read_rv32_register(memory, d, c);
-        #[cfg(debug_assertions)]
-        {
-            assert!(dst < (1 << self.air.ptr_max_bits));
-            assert!(src < (1 << self.air.ptr_max_bits));
-            assert!(len < (1 << self.air.ptr_max_bits));
-        }
-
-        let mut remaining_len = len as usize;
-        let num_blocks = num_keccak_f(remaining_len);
-        let mut input_blocks = Vec::with_capacity(num_blocks);
-        let mut hasher = Keccak::v256();
-        let mut src = src as usize;
-
-        for block_idx in 0..num_blocks {
-            if block_idx != 0 {
-                memory.increment_timestamp_by(KECCAK_REGISTER_READS as u32);
-                timestamp_delta += KECCAK_REGISTER_READS as u32;
-            }
-            let mut reads = Vec::with_capacity(KECCAK_RATE_BYTES);
-
-            let mut partial_read_idx = None;
-            let mut bytes = [0u8; KECCAK_RATE_BYTES];
-            for i in (0..KECCAK_RATE_BYTES).step_by(KECCAK_WORD_SIZE) {
-                if i < remaining_len {
-                    let read =
-                        memory.read::<RV32_REGISTER_NUM_LIMBS>(e, F::from_canonical_usize(src + i));
-
-                    let chunk = read.1.map(|x| {
-                        x.as_canonical_u32()
-                            .try_into()
-                            .expect("Memory cell not a byte")
-                    });
-                    let copy_len = min(KECCAK_WORD_SIZE, remaining_len - i);
-                    if copy_len != KECCAK_WORD_SIZE {
-                        partial_read_idx = Some(reads.len());
-                    }
-                    bytes[i..i + copy_len].copy_from_slice(&chunk[..copy_len]);
-                    reads.push(read.0);
-                } else {
-                    memory.increment_timestamp();
-                }
-                timestamp_delta += 1;
-            }
-
-            let mut block = KeccakInputBlock {
-                reads,
-                partial_read_idx,
-                padded_bytes: bytes,
-                remaining_len,
-                src,
-                is_new_start: block_idx == 0,
-            };
-            if block_idx != num_blocks - 1 {
-                src += KECCAK_RATE_BYTES;
-                remaining_len -= KECCAK_RATE_BYTES;
-                hasher.update(&block.padded_bytes);
-            } else {
-                // handle padding here since it is convenient
-                debug_assert!(remaining_len < KECCAK_RATE_BYTES);
-                hasher.update(&block.padded_bytes[..remaining_len]);
-
-                if remaining_len == KECCAK_RATE_BYTES - 1 {
-                    block.padded_bytes[remaining_len] = 0b1000_0001;
-                } else {
-                    block.padded_bytes[remaining_len] = 0x01;
-                    block.padded_bytes[KECCAK_RATE_BYTES - 1] = 0x80;
-                }
-            }
-            input_blocks.push(block);
-        }
-        let mut output = [0u8; 32];
-        hasher.finalize(&mut output);
-        let dst = dst as usize;
-        let digest_writes: [_; KECCAK_DIGEST_WRITES] = from_fn(|i| {
-            timestamp_delta += 1;
-            memory
-                .write::<KECCAK_WORD_SIZE>(
-                    e,
-                    F::from_canonical_usize(dst + i * KECCAK_WORD_SIZE),
-                    from_fn(|j| F::from_canonical_u8(output[i * KECCAK_WORD_SIZE + j])),
-                )
-                .0
-        });
-        tracing::trace!("[runtime] keccak256 output: {:?}", output);
-
-        let record = KeccakRecord {
-            pc: F::from_canonical_u32(from_state.pc),
-            dst_read,
-            src_read,
-            len_read,
-            input_blocks,
-            digest_writes,
-        };
-
-        // Add the events to chip state for later trace generation usage
-        self.records.push(record);
-
-        // NOTE: Check this is consistent with KeccakVmAir::timestamp_change (we don't use it to
-        // avoid unnecessary conversions here)
-        let total_timestamp_delta =
-            len + (KECCAK_REGISTER_READS + KECCAK_ABSORB_READS + KECCAK_DIGEST_WRITES) as u32;
-        memory.increment_timestamp_by(total_timestamp_delta - timestamp_delta);
-
-        Ok(ExecutionState {
-            pc: from_state.pc + DEFAULT_PC_STEP,
-            timestamp: from_state.timestamp + total_timestamp_delta,
-        })
-    }
-
-    fn get_opcode_name(&self, _: usize) -> String {
-        "KECCAK256".to_string()
-    }
-}
-
-impl Default for KeccakInputBlock {
-    fn default() -> Self {
-        // Padding for empty byte array so padding constraints still hold
-        let mut padded_bytes = [0u8; KECCAK_RATE_BYTES];
-        padded_bytes[0] = 0x01;
-        *padded_bytes.last_mut().unwrap() = 0x80;
-        Self {
-            padded_bytes,
-            partial_read_idx: None,
-            remaining_len: 0,
-            is_new_start: true,
-            reads: Vec::new(),
-            src: 0,
-        }
-    }
+#[derive(derive_new::new)]
+pub struct KeccakVmFiller {
+    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
+    pub pointer_max_bits: usize,
 }
diff --git a/extensions/keccak256/circuit/src/tests.rs b/extensions/keccak256/circuit/src/tests.rs
index 65a34491b8..c09a1c9475 100644
--- a/extensions/keccak256/circuit/src/tests.rs
+++ b/extensions/keccak256/circuit/src/tests.rs
@@ -1,104 +1,262 @@
-use std::borrow::BorrowMut;
+use std::{array, borrow::BorrowMut, sync::Arc};
 
 use hex::FromHex;
-use openvm_circuit::arch::testing::{VmChipTestBuilder, VmChipTester, BITWISE_OP_LOOKUP_BUS};
+use openvm_circuit::{
+    arch::{
+        testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
+        Arena, DenseRecordArena, PreflightExecutor,
+    },
+    utils::get_random_message,
+};
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_keccak256_transpiler::Rv32KeccakOpcode;
-use openvm_stark_backend::{
-    p3_field::FieldAlgebra, utils::disable_debug_builder, verifier::VerificationError,
+use openvm_instructions::{
+    instruction::Instruction,
+    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS},
+    LocalOpcode,
 };
-use openvm_stark_sdk::{
-    config::baby_bear_blake3::BabyBearBlake3Config, p3_baby_bear::BabyBear,
-    utils::create_seeded_rng,
+use openvm_keccak256_transpiler::Rv32KeccakOpcode::{self, *};
+use openvm_stark_backend::{
+    p3_field::FieldAlgebra,
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
+    utils::disable_debug_builder,
+    verifier::VerificationError,
 };
-use p3_keccak_air::NUM_ROUNDS;
-use rand::Rng;
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{rngs::StdRng, Rng};
 use tiny_keccak::Hasher;
 
-use super::{columns::KeccakVmCols, utils::num_keccak_f, KeccakVmChip};
+use super::{columns::KeccakVmCols, KeccakVmChip};
+use crate::{
+    trace::KeccakVmRecordLayout, utils::keccak256, KeccakVmAir, KeccakVmExecutor, KeccakVmFiller,
+};
 
 type F = BabyBear;
-// io is vector of (input, expected_output, prank_output) where prank_output is Some if the trace
-// will be replaced
-#[allow(clippy::type_complexity)]
-fn build_keccak256_test(
-    io: Vec<(Vec<u8>, Option<[u8; 32]>, Option<[u8; 32]>)>,
-) -> VmChipTester<BabyBearBlake3Config> {
+const MAX_INS_CAPACITY: usize = 4096;
+type Harness<RA> = TestChipHarness<F, KeccakVmExecutor, KeccakVmAir, KeccakVmChip<F>, RA>;
+
+fn create_test_chips<RA: Arena>(
+    tester: &mut VmChipTestBuilder<F>,
+) -> (
+    Harness<RA>,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
     let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<8>::new(bitwise_bus);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
 
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = KeccakVmChip::new(
-        tester.execution_bus(),
-        tester.program_bus(),
+    let air = KeccakVmAir::new(
+        tester.execution_bridge(),
         tester.memory_bridge(),
+        bitwise_chip.bus(),
         tester.address_bits(),
-        bitwise_chip.clone(),
         Rv32KeccakOpcode::CLASS_OFFSET,
-        tester.offline_memory_mutex_arc(),
     );
 
-    let mut dst = 0;
-    let src = 0;
+    let executor = KeccakVmExecutor::new(Rv32KeccakOpcode::CLASS_OFFSET, tester.address_bits());
+    let chip = KeccakVmChip::new(
+        KeccakVmFiller::new(bitwise_chip.clone(), tester.address_bits()),
+        tester.memory_helper(),
+    );
 
-    for (input, expected_output, _) in &io {
-        let [a, b, c] = [0, 4, 8]; // space apart for register limbs
-        let [d, e] = [1, 2];
+    let harness = Harness::<RA>::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-        tester.write(d, a, (dst as u32).to_le_bytes().map(F::from_canonical_u8));
-        tester.write(d, b, (src as u32).to_le_bytes().map(F::from_canonical_u8));
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
+fn set_and_execute<RA: Arena>(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness<RA>,
+    rng: &mut StdRng,
+    opcode: Rv32KeccakOpcode,
+    message: Option<&[u8]>,
+    len: Option<usize>,
+    expected_output: Option<[u8; 32]>,
+) where
+    KeccakVmExecutor: PreflightExecutor<F, RA>,
+{
+    let len = len.unwrap_or(rng.gen_range(1..3000));
+    let tmp = get_random_message(rng, len);
+    let message: &[u8] = message.unwrap_or(&tmp);
+    let len = message.len();
+
+    let rd = gen_pointer(rng, 4);
+    let rs1 = gen_pointer(rng, 4);
+    let rs2 = gen_pointer(rng, 4);
+
+    let dst_ptr = gen_pointer(rng, 4);
+    let src_ptr = gen_pointer(rng, 4);
+    tester.write(1, rd, dst_ptr.to_le_bytes().map(F::from_canonical_u8));
+    tester.write(1, rs1, src_ptr.to_le_bytes().map(F::from_canonical_u8));
+    tester.write(1, rs2, len.to_le_bytes().map(F::from_canonical_u8));
+
+    message.chunks(4).enumerate().for_each(|(i, chunk)| {
+        let rng = rng.gen();
+        let chunk: [&u8; 4] = array::from_fn(|i| chunk.get(i).unwrap_or(&rng));
         tester.write(
-            d,
-            c,
-            (input.len() as u32).to_le_bytes().map(F::from_canonical_u8),
+            RV32_MEMORY_AS as usize,
+            src_ptr + i * 4,
+            chunk.map(|&x| F::from_canonical_u8(x)),
         );
-        for (i, byte) in input.iter().enumerate() {
-            tester.write_cell(e, src + i, F::from_canonical_u8(*byte));
-        }
+    });
 
-        tester.execute(
-            &mut chip,
-            &Instruction::from_isize(
-                Rv32KeccakOpcode::KECCAK256.global_opcode(),
-                a as isize,
-                b as isize,
-                c as isize,
-                d as isize,
-                e as isize,
-            ),
+    tester.execute(
+        harness,
+        &Instruction::from_usize(opcode.global_opcode(), [rd, rs1, rs2, 1, 2]),
+    );
+
+    let expected_output = expected_output.unwrap_or(keccak256(message));
+    println!("expected_output: {:?}", expected_output);
+    println!("keccak256(message): {:?}", keccak256(message));
+    assert_eq!(
+        expected_output.map(F::from_canonical_u8),
+        tester.read(RV32_MEMORY_AS as usize, dst_ptr)
+    );
+}
+
+///////////////////////////////////////////////////////////////////////////////////////
+/// POSITIVE TESTS
+///
+/// Randomly generate computations and execute, ensuring that the generated trace
+/// passes all constraints.
+///////////////////////////////////////////////////////////////////////////////////////
+#[test]
+fn rand_keccak256_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut harness, bitwise) = create_test_chips(&mut tester);
+
+    let num_ops: usize = 10;
+    for _ in 0..num_ops {
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            KECCAK256,
+            None,
+            None,
+            None,
         );
-        if let Some(output) = expected_output {
-            for (i, byte) in output.iter().enumerate() {
-                assert_eq!(tester.read_cell(e, dst + i), F::from_canonical_u8(*byte));
-            }
-        }
-        // shift dst to not deal with timestamps for pranking
-        dst += 32;
     }
-    let mut tester = tester.build().load(chip).load(bitwise_chip).finalize();
-
-    let keccak_trace = tester.air_proof_inputs[2]
-        .1
-        .raw
-        .common_main
-        .as_mut()
-        .unwrap();
-    let mut row = 0;
-    for (input, _, prank_output) in io {
-        let num_blocks = num_keccak_f(input.len());
-        let num_rows = NUM_ROUNDS * num_blocks;
-        row += num_rows;
-        if prank_output.is_none() {
-            continue;
-        }
-        let output = prank_output.unwrap();
-        let digest_row: &mut KeccakVmCols<_> = keccak_trace.row_mut(row - 1).borrow_mut();
+
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
+}
+
+#[test]
+fn keccak256_length_tests() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut harness, bitwise) = create_test_chips(&mut tester);
+
+    // Test special length edge cases:
+    for len in [0, 135, 136, 137, 2000, 10000] {
+        println!("Testing length: {}", len);
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            KECCAK256,
+            None,
+            Some(len),
+            None,
+        );
+    }
+
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
+}
+
+// Keccak Known Answer Test (KAT) vectors from https://keccak.team/obsolete/KeccakKAT-3.zip.
+// Only selecting a small subset for now (add more later)
+// KAT includes inputs at the bit level; we only include the ones that are bytes
+#[test]
+fn test_keccak256_positive_kat_vectors() {
+    // input, output, Len in bits
+    let test_vectors = vec![
+        ("", "C5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470"), // ShortMsgKAT_256 Len = 0
+        ("CC", "EEAD6DBFC7340A56CAEDC044696A168870549A6A7F6F56961E84A54BD9970B8A"), // ShortMsgKAT_256 Len = 8
+        ("B55C10EAE0EC684C16D13463F29291BF26C82E2FA0422A99C71DB4AF14DD9C7F33EDA52FD73D017CC0F2DBE734D831F0D820D06D5F89DACC485739144F8CFD4799223B1AFF9031A105CB6A029BA71E6E5867D85A554991C38DF3C9EF8C1E1E9A7630BE61CAABCA69280C399C1FB7A12D12AEFC", "0347901965D3635005E75A1095695CCA050BC9ED2D440C0372A31B348514A889"), // ShortMsgKAT_256 Len = 920
+        ("2EDC282FFB90B97118DD03AAA03B145F363905E3CBD2D50ECD692B37BF000185C651D3E9726C690D3773EC1E48510E42B17742B0B0377E7DE6B8F55E00A8A4DB4740CEE6DB0830529DD19617501DC1E9359AA3BCF147E0A76B3AB70C4984C13E339E6806BB35E683AF8527093670859F3D8A0FC7D493BCBA6BB12B5F65E71E705CA5D6C948D66ED3D730B26DB395B3447737C26FAD089AA0AD0E306CB28BF0ACF106F89AF3745F0EC72D534968CCA543CD2CA50C94B1456743254E358C1317C07A07BF2B0ECA438A709367FAFC89A57239028FC5FECFD53B8EF958EF10EE0608B7F5CB9923AD97058EC067700CC746C127A61EE3", "DD1D2A92B3F3F3902F064365838E1F5F3468730C343E2974E7A9ECFCD84AA6DB"), // ShortMsgKAT_256 Len = 1952,
+        ("724627916C50338643E6996F07877EAFD96BDF01DA7E991D4155B9BE1295EA7D21C9391F4C4A41C75F77E5D27389253393725F1427F57914B273AB862B9E31DABCE506E558720520D33352D119F699E784F9E548FF91BC35CA147042128709820D69A8287EA3257857615EB0321270E94B84F446942765CE882B191FAEE7E1C87E0F0BD4E0CD8A927703524B559B769CA4ECE1F6DBF313FDCF67C572EC4185C1A88E86EC11B6454B371980020F19633B6B95BD280E4FBCB0161E1A82470320CEC6ECFA25AC73D09F1536F286D3F9DACAFB2CD1D0CE72D64D197F5C7520B3CCB2FD74EB72664BA93853EF41EABF52F015DD591500D018DD162815CC993595B195", "EA0E416C0F7B4F11E3F00479FDDF954F2539E5E557753BD546F69EE375A5DE29"), // LongMsgKAT_256 Len = 2048
+        ("6E1CADFB2A14C5FFB1DD69919C0124ED1B9A414B2BEA1E5E422D53B022BDD13A9C88E162972EBB9852330006B13C5B2F2AFBE754AB7BACF12479D4558D19DDBB1A6289387B3AC084981DF335330D1570850B97203DBA5F20CF7FF21775367A8401B6EBE5B822ED16C39383232003ABC412B0CE0DD7C7DA064E4BB73E8C58F222A1512D5FE6D947316E02F8AA87E7AA7A3AA1C299D92E6414AE3B927DB8FF708AC86A09B24E1884743BC34067BB0412453B4A6A6509504B550F53D518E4BCC3D9C1EFDB33DA2EACCB84C9F1CAEC81057A8508F423B25DB5500E5FC86AB3B5EB10D6D0BF033A716DDE55B09FD53451BBEA644217AE1EF91FAD2B5DCC6515249C96EE7EABFD12F1EF65256BD1CFF2087DABF2F69AD1FFB9CF3BC8CA437C7F18B6095BC08D65DF99CC7F657C418D8EB109FDC91A13DC20A438941726EF24F9738B6552751A320C4EA9C8D7E8E8592A3B69D30A419C55FB6CB0850989C029AAAE66305E2C14530B39EAA86EA3BA2A7DECF4B2848B01FAA8AA91F2440B7CC4334F63061CE78AA1589BEFA38B194711697AE3AADCB15C9FBF06743315E2F97F1A8B52236ACB444069550C2345F4ED12E5B8E881CDD472E803E5DCE63AE485C2713F81BC307F25AC74D39BAF7E3BC5E7617465C2B9C309CB0AC0A570A7E46C6116B2242E1C54F456F6589E20B1C0925BF1CD5F9344E01F63B5BA9D4671ABBF920C7ED32937A074C33836F0E019DFB6B35D865312C6058DFDAFF844C8D58B75071523E79DFBAB2EA37479DF12C474584F4FF40F00F92C6BADA025CE4DF8FAF0AFB2CE75C07773907CA288167D6B011599C3DE0FFF16C1161D31DF1C1DDE217CB574ED5A33751759F8ED2B1E6979C5088B940926B9155C9D250B479948C20ACB5578DC02C97593F646CC5C558A6A0F3D8D273258887CCFF259197CB1A7380622E371FD2EB5376225EC04F9ED1D1F2F08FA2376DB5B790E73086F581064ED1C5F47E989E955D77716B50FB64B853388FBA01DAC2CEAE99642341F2DA64C56BEFC4789C051E5EB79B063F2F084DB4491C3C5AA7B4BCF7DD7A1D7CED1554FA67DCA1F9515746A237547A4A1D22ACF649FA1ED3B9BB52BDE0C6996620F8CFDB293F8BACAD02BCE428363D0BB3D391469461D212769048219220A7ED39D1F9157DFEA3B4394CA8F5F612D9AC162BF0B961BFBC157E5F863CE659EB235CF98E8444BC8C7880BDDCD0B3B389AAA89D5E05F84D0649EEBACAB4F1C75352E89F0E9D91E4ACA264493A50D2F4AED66BD13650D1F18E7199E931C78AEB763E903807499F1CD99AF81276B615BE8EC709B039584B2B57445B014F6162577F3548329FD288B0800F936FC5EA1A412E3142E609FC8E39988CA53DF4D8FB5B5FB5F42C0A01648946AC6864CFB0E92856345B08E5DF0D235261E44CFE776456B40AEF0AC1A0DFA2FE639486666C05EA196B0C1A9D346435E03965E6139B1CE10129F8A53745F80100A94AE04D996C13AC14CF2713E39DFBB19A936CF3861318BD749B1FB82F40D73D714E406CBEB3D920EA037B7DE566455CCA51980F0F53A762D5BF8A4DBB55AAC0EDDB4B1F2AED2AA3D01449D34A57FDE4329E7FF3F6BECE4456207A4225218EE9F174C2DE0FF51CEAF2A07CF84F03D1DF316331E3E725C5421356C40ED25D5ABF9D24C4570FED618CA41000455DBD759E32E2BF0B6C5E61297C20F752C3042394CE840C70943C451DD5598EB0E4953CE26E833E5AF64FC1007C04456D19F87E45636F456B7DC9D31E757622E2739573342DE75497AE181AAE7A5425756C8E2A7EEF918E5C6A968AEFE92E8B261BBFE936B19F9E69A3C90094096DAE896450E1505ED5828EE2A7F0EA3A28E6EC47C0AF711823E7689166EA07ECA00FFC493131D65F93A4E1D03E0354AFC2115CFB8D23DAE8C6F96891031B23226B8BC82F1A73DAA5BB740FC8CC36C0975BEFA0C7895A9BBC261EDB7FD384103968F7A18353D5FE56274E4515768E4353046C785267DE01E816A2873F97AAD3AB4D7234EBFD9832716F43BE8245CF0B4408BA0F0F764CE9D24947AB6ABDD9879F24FCFF10078F5894B0D64F6A8D3EA3DD92A0C38609D3C14FDC0A44064D501926BE84BF8034F1D7A8C5F382E6989BFFA2109D4FBC56D1F091E8B6FABFF04D21BB19656929D19DECB8E8291E6AE5537A169874E0FE9890DFF11FFD159AD23D749FB9E8B676E2C31313C16D1EFA06F4D7BC191280A4EE63049FCEF23042B20303AECDD412A526D7A53F760A089FBDF13F361586F0DCA76BB928EDB41931D11F679619F948A6A9E8DBA919327769006303C6EF841438A7255C806242E2E7FF4621BB0F8AFA0B4A248EAD1A1E946F3E826FBFBBF8013CE5CC814E20FEF21FA5DB19EC7FF0B06C592247B27E500EB4705E6C37D41D09E83CB0A618008CA1AAAE8A215171D817659063C2FA385CFA3C1078D5C2B28CE7312876A276773821BE145785DFF24BBB24D590678158A61EA49F2BE56FDAC8CE7F94B05D62F15ADD351E5930FD4F31B3E7401D5C0FF7FC845B165FB6ABAFD4788A8B0615FEC91092B34B710A68DA518631622BA2AAE5D19010D307E565A161E64A4319A6B261FB2F6A90533997B1AEC32EF89CF1F232696E213DAFE4DBEB1CF1D5BBD12E5FF2EBB2809184E37CD9A0E58A4E0AF099493E6D8CC98B05A2F040A7E39515038F6EE21FC25F8D459A327B83EC1A28A234237ACD52465506942646AC248EC96EBBA6E1B092475F7ADAE4D35E009FD338613C7D4C12E381847310A10E6F02C02392FC32084FBE939689BC6518BE27AF7842DEEA8043828E3DFFE3BBAC4794CA0CC78699722709F2E4B0EAE7287DEB06A27B462423EC3F0DF227ACF589043292685F2C0E73203E8588B62554FF19D6260C7FE48DF301509D33BE0D8B31D3F658C921EF7F55449FF3887D91BFB894116DF57206098E8C5835B", "3C79A3BD824542C20AF71F21D6C28DF2213A041F77DD79A328A0078123954E7B"), // LongMsgKAT_256 Len = 16664
+        ("7ADC0B6693E61C269F278E6944A5A2D8300981E40022F839AC644387BFAC9086650085C2CDC585FEA47B9D2E52D65A2B29A7DC370401EF5D60DD0D21F9E2B90FAE919319B14B8C5565B0423CEFB827D5F1203302A9D01523498A4DB10374", "4CC2AFF141987F4C2E683FA2DE30042BACDCD06087D7A7B014996E9CFEAA58CE"), // ShortMsgKAT_256 Len = 752
+    ];
+
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut harness, bitwise) = create_test_chips(&mut tester);
+
+    for (input, output) in test_vectors {
+        let input = Vec::from_hex(input).unwrap();
+        let output = Vec::from_hex(output).unwrap().try_into().unwrap();
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            KECCAK256,
+            Some(&input),
+            None,
+            Some(output),
+        );
+    }
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
+}
+
+//////////////////////////////////////////////////////////////////////////////////////
+// NEGATIVE TESTS
+//
+// Given a fake trace of a single operation, setup a chip and run the test. We replace
+// part of the trace and check that the chip throws the expected error.
+//////////////////////////////////////////////////////////////////////////////////////
+fn run_negative_keccak256_test(
+    input: &[u8],
+    prank_output: [u8; 32],
+    verification_error: VerificationError,
+) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut harness, bitwise) = create_test_chips(&mut tester);
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        KECCAK256,
+        Some(input),
+        None,
+        None,
+    );
+
+    let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
+        let mut trace_row = trace.row_slice(16).to_vec();
+        let digest_row: &mut KeccakVmCols<_> = trace_row.as_mut_slice().borrow_mut();
         for i in 0..16 {
-            let out_limb =
-                F::from_canonical_u16(output[2 * i] as u16 + ((output[2 * i + 1] as u16) << 8));
+            let out_limb = F::from_canonical_u16(
+                prank_output[2 * i] as u16 + ((prank_output[2 * i + 1] as u16) << 8),
+            );
             let x = i / 4;
             let y = 0;
             let limb = i % 4;
@@ -108,9 +266,16 @@ fn build_keccak256_test(
                 digest_row.inner.a_prime_prime[y][x][limb] = out_limb;
             }
         }
-    }
+        *trace = RowMajorMatrix::new(trace_row, trace.width());
+    };
 
-    tester
+    disable_debug_builder();
+    let tester = tester
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test_with_expected_error(verification_error);
 }
 
 #[test]
@@ -122,37 +287,49 @@ fn test_keccak256_negative() {
     let mut out = [0u8; 32];
     hasher.finalize(&mut out);
     out[0] = rng.gen();
-    let tester = build_keccak256_test(vec![(input, None, Some(out))]);
-    disable_debug_builder();
-    assert_eq!(
-        tester.simple_test().err(),
-        Some(VerificationError::OodEvaluationMismatch)
-    );
+    run_negative_keccak256_test(&input, out, VerificationError::OodEvaluationMismatch);
 }
 
-// Keccak Known Answer Test (KAT) vectors from https://keccak.team/obsolete/KeccakKAT-3.zip.
-// Only selecting a small subset for now (add more later)
-// KAT includes inputs at the bit level; we only include the ones that are bytes
+///////////////////////////////////////////////////////////////////////////////////////
+/// DENSE TESTS
+///
+/// Ensure that the chip works as expected with dense records.
+/// We first execute some instructions with a [DenseRecordArena] and transfer the records
+/// to a [MatrixRecordArena]. After transferring we generate the trace and make sure that
+/// all the constraints pass.
+///////////////////////////////////////////////////////////////////////////////////////
 #[test]
-fn test_keccak256_positive_kat_vectors() {
-    // input, output, Len in bits
-    let test_vectors = vec![
-        ("", "C5D2460186F7233C927E7DB2DCC703C0E500B653CA82273B7BFAD8045D85A470"), // ShortMsgKAT_256 Len = 0
-        ("CC", "EEAD6DBFC7340A56CAEDC044696A168870549A6A7F6F56961E84A54BD9970B8A"), // ShortMsgKAT_256 Len = 8
-        ("B55C10EAE0EC684C16D13463F29291BF26C82E2FA0422A99C71DB4AF14DD9C7F33EDA52FD73D017CC0F2DBE734D831F0D820D06D5F89DACC485739144F8CFD4799223B1AFF9031A105CB6A029BA71E6E5867D85A554991C38DF3C9EF8C1E1E9A7630BE61CAABCA69280C399C1FB7A12D12AEFC", "0347901965D3635005E75A1095695CCA050BC9ED2D440C0372A31B348514A889"), // ShortMsgKAT_256 Len = 920
-        ("2EDC282FFB90B97118DD03AAA03B145F363905E3CBD2D50ECD692B37BF000185C651D3E9726C690D3773EC1E48510E42B17742B0B0377E7DE6B8F55E00A8A4DB4740CEE6DB0830529DD19617501DC1E9359AA3BCF147E0A76B3AB70C4984C13E339E6806BB35E683AF8527093670859F3D8A0FC7D493BCBA6BB12B5F65E71E705CA5D6C948D66ED3D730B26DB395B3447737C26FAD089AA0AD0E306CB28BF0ACF106F89AF3745F0EC72D534968CCA543CD2CA50C94B1456743254E358C1317C07A07BF2B0ECA438A709367FAFC89A57239028FC5FECFD53B8EF958EF10EE0608B7F5CB9923AD97058EC067700CC746C127A61EE3", "DD1D2A92B3F3F3902F064365838E1F5F3468730C343E2974E7A9ECFCD84AA6DB"), // ShortMsgKAT_256 Len = 1952,
-        ("724627916C50338643E6996F07877EAFD96BDF01DA7E991D4155B9BE1295EA7D21C9391F4C4A41C75F77E5D27389253393725F1427F57914B273AB862B9E31DABCE506E558720520D33352D119F699E784F9E548FF91BC35CA147042128709820D69A8287EA3257857615EB0321270E94B84F446942765CE882B191FAEE7E1C87E0F0BD4E0CD8A927703524B559B769CA4ECE1F6DBF313FDCF67C572EC4185C1A88E86EC11B6454B371980020F19633B6B95BD280E4FBCB0161E1A82470320CEC6ECFA25AC73D09F1536F286D3F9DACAFB2CD1D0CE72D64D197F5C7520B3CCB2FD74EB72664BA93853EF41EABF52F015DD591500D018DD162815CC993595B195", "EA0E416C0F7B4F11E3F00479FDDF954F2539E5E557753BD546F69EE375A5DE29"), // LongMsgKAT_256 Len = 2048
-        ("6E1CADFB2A14C5FFB1DD69919C0124ED1B9A414B2BEA1E5E422D53B022BDD13A9C88E162972EBB9852330006B13C5B2F2AFBE754AB7BACF12479D4558D19DDBB1A6289387B3AC084981DF335330D1570850B97203DBA5F20CF7FF21775367A8401B6EBE5B822ED16C39383232003ABC412B0CE0DD7C7DA064E4BB73E8C58F222A1512D5FE6D947316E02F8AA87E7AA7A3AA1C299D92E6414AE3B927DB8FF708AC86A09B24E1884743BC34067BB0412453B4A6A6509504B550F53D518E4BCC3D9C1EFDB33DA2EACCB84C9F1CAEC81057A8508F423B25DB5500E5FC86AB3B5EB10D6D0BF033A716DDE55B09FD53451BBEA644217AE1EF91FAD2B5DCC6515249C96EE7EABFD12F1EF65256BD1CFF2087DABF2F69AD1FFB9CF3BC8CA437C7F18B6095BC08D65DF99CC7F657C418D8EB109FDC91A13DC20A438941726EF24F9738B6552751A320C4EA9C8D7E8E8592A3B69D30A419C55FB6CB0850989C029AAAE66305E2C14530B39EAA86EA3BA2A7DECF4B2848B01FAA8AA91F2440B7CC4334F63061CE78AA1589BEFA38B194711697AE3AADCB15C9FBF06743315E2F97F1A8B52236ACB444069550C2345F4ED12E5B8E881CDD472E803E5DCE63AE485C2713F81BC307F25AC74D39BAF7E3BC5E7617465C2B9C309CB0AC0A570A7E46C6116B2242E1C54F456F6589E20B1C0925BF1CD5F9344E01F63B5BA9D4671ABBF920C7ED32937A074C33836F0E019DFB6B35D865312C6058DFDAFF844C8D58B75071523E79DFBAB2EA37479DF12C474584F4FF40F00F92C6BADA025CE4DF8FAF0AFB2CE75C07773907CA288167D6B011599C3DE0FFF16C1161D31DF1C1DDE217CB574ED5A33751759F8ED2B1E6979C5088B940926B9155C9D250B479948C20ACB5578DC02C97593F646CC5C558A6A0F3D8D273258887CCFF259197CB1A7380622E371FD2EB5376225EC04F9ED1D1F2F08FA2376DB5B790E73086F581064ED1C5F47E989E955D77716B50FB64B853388FBA01DAC2CEAE99642341F2DA64C56BEFC4789C051E5EB79B063F2F084DB4491C3C5AA7B4BCF7DD7A1D7CED1554FA67DCA1F9515746A237547A4A1D22ACF649FA1ED3B9BB52BDE0C6996620F8CFDB293F8BACAD02BCE428363D0BB3D391469461D212769048219220A7ED39D1F9157DFEA3B4394CA8F5F612D9AC162BF0B961BFBC157E5F863CE659EB235CF98E8444BC8C7880BDDCD0B3B389AAA89D5E05F84D0649EEBACAB4F1C75352E89F0E9D91E4ACA264493A50D2F4AED66BD13650D1F18E7199E931C78AEB763E903807499F1CD99AF81276B615BE8EC709B039584B2B57445B014F6162577F3548329FD288B0800F936FC5EA1A412E3142E609FC8E39988CA53DF4D8FB5B5FB5F42C0A01648946AC6864CFB0E92856345B08E5DF0D235261E44CFE776456B40AEF0AC1A0DFA2FE639486666C05EA196B0C1A9D346435E03965E6139B1CE10129F8A53745F80100A94AE04D996C13AC14CF2713E39DFBB19A936CF3861318BD749B1FB82F40D73D714E406CBEB3D920EA037B7DE566455CCA51980F0F53A762D5BF8A4DBB55AAC0EDDB4B1F2AED2AA3D01449D34A57FDE4329E7FF3F6BECE4456207A4225218EE9F174C2DE0FF51CEAF2A07CF84F03D1DF316331E3E725C5421356C40ED25D5ABF9D24C4570FED618CA41000455DBD759E32E2BF0B6C5E61297C20F752C3042394CE840C70943C451DD5598EB0E4953CE26E833E5AF64FC1007C04456D19F87E45636F456B7DC9D31E757622E2739573342DE75497AE181AAE7A5425756C8E2A7EEF918E5C6A968AEFE92E8B261BBFE936B19F9E69A3C90094096DAE896450E1505ED5828EE2A7F0EA3A28E6EC47C0AF711823E7689166EA07ECA00FFC493131D65F93A4E1D03E0354AFC2115CFB8D23DAE8C6F96891031B23226B8BC82F1A73DAA5BB740FC8CC36C0975BEFA0C7895A9BBC261EDB7FD384103968F7A18353D5FE56274E4515768E4353046C785267DE01E816A2873F97AAD3AB4D7234EBFD9832716F43BE8245CF0B4408BA0F0F764CE9D24947AB6ABDD9879F24FCFF10078F5894B0D64F6A8D3EA3DD92A0C38609D3C14FDC0A44064D501926BE84BF8034F1D7A8C5F382E6989BFFA2109D4FBC56D1F091E8B6FABFF04D21BB19656929D19DECB8E8291E6AE5537A169874E0FE9890DFF11FFD159AD23D749FB9E8B676E2C31313C16D1EFA06F4D7BC191280A4EE63049FCEF23042B20303AECDD412A526D7A53F760A089FBDF13F361586F0DCA76BB928EDB41931D11F679619F948A6A9E8DBA919327769006303C6EF841438A7255C806242E2E7FF4621BB0F8AFA0B4A248EAD1A1E946F3E826FBFBBF8013CE5CC814E20FEF21FA5DB19EC7FF0B06C592247B27E500EB4705E6C37D41D09E83CB0A618008CA1AAAE8A215171D817659063C2FA385CFA3C1078D5C2B28CE7312876A276773821BE145785DFF24BBB24D590678158A61EA49F2BE56FDAC8CE7F94B05D62F15ADD351E5930FD4F31B3E7401D5C0FF7FC845B165FB6ABAFD4788A8B0615FEC91092B34B710A68DA518631622BA2AAE5D19010D307E565A161E64A4319A6B261FB2F6A90533997B1AEC32EF89CF1F232696E213DAFE4DBEB1CF1D5BBD12E5FF2EBB2809184E37CD9A0E58A4E0AF099493E6D8CC98B05A2F040A7E39515038F6EE21FC25F8D459A327B83EC1A28A234237ACD52465506942646AC248EC96EBBA6E1B092475F7ADAE4D35E009FD338613C7D4C12E381847310A10E6F02C02392FC32084FBE939689BC6518BE27AF7842DEEA8043828E3DFFE3BBAC4794CA0CC78699722709F2E4B0EAE7287DEB06A27B462423EC3F0DF227ACF589043292685F2C0E73203E8588B62554FF19D6260C7FE48DF301509D33BE0D8B31D3F658C921EF7F55449FF3887D91BFB894116DF57206098E8C5835B", "3C79A3BD824542C20AF71F21D6C28DF2213A041F77DD79A328A0078123954E7B"), // LongMsgKAT_256 Len = 16664
-        ("7ADC0B6693E61C269F278E6944A5A2D8300981E40022F839AC644387BFAC9086650085C2CDC585FEA47B9D2E52D65A2B29A7DC370401EF5D60DD0D21F9E2B90FAE919319B14B8C5565B0423CEFB827D5F1203302A9D01523498A4DB10374", "4CC2AFF141987F4C2E683FA2DE30042BACDCD06087D7A7B014996E9CFEAA58CE"), // ShortMsgKAT_256 Len = 752
-    ];
+fn dense_record_arena_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut sparse_harness, bitwise) = create_test_chips(&mut tester);
 
-    let mut io = vec![];
-    for (input, output) in test_vectors {
-        let input = Vec::from_hex(input).unwrap();
-        let output = Vec::from_hex(output).unwrap();
-        io.push((input, Some(output.try_into().unwrap()), None));
+    {
+        let mut dense_harness = create_test_chips::<DenseRecordArena>(&mut tester).0;
+
+        let num_ops: usize = 10;
+        for _ in 0..num_ops {
+            set_and_execute(
+                &mut tester,
+                &mut dense_harness,
+                &mut rng,
+                KECCAK256,
+                None,
+                None,
+                None,
+            );
+        }
+
+        let mut record_interpreter = dense_harness
+            .arena
+            .get_record_seeker::<_, KeccakVmRecordLayout>();
+        record_interpreter.transfer_to_matrix_arena(&mut sparse_harness.arena);
     }
 
-    let tester = build_keccak256_test(io);
+    let tester = tester
+        .build()
+        .load(sparse_harness)
+        .load_periphery(bitwise)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
diff --git a/extensions/keccak256/circuit/src/trace.rs b/extensions/keccak256/circuit/src/trace.rs
index c314c38eac..f9b1f7f4c5 100644
--- a/extensions/keccak256/circuit/src/trace.rs
+++ b/extensions/keccak256/circuit/src/trace.rs
@@ -1,16 +1,30 @@
-use std::{array::from_fn, borrow::BorrowMut, sync::Arc};
+use std::{
+    array::{self, from_fn},
+    borrow::{Borrow, BorrowMut},
+    cmp::min,
+};
 
-use openvm_circuit::system::memory::RecordId;
-use openvm_instructions::riscv::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use openvm_circuit::{
+    arch::*,
+    system::memory::{
+        offline_checker::{MemoryReadAuxRecord, MemoryWriteBytesAuxRecord},
+        online::TracingMemory,
+        MemoryAuxColsFactory,
+    },
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_keccak256_transpiler::Rv32KeccakOpcode;
+use openvm_rv32im_circuit::adapters::{read_rv32_register, tracing_read, tracing_write};
 use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
-    p3_air::BaseAir,
-    p3_field::{FieldAlgebra, PrimeField32},
+    p3_field::PrimeField32,
     p3_matrix::{dense::RowMajorMatrix, Matrix},
     p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
-    rap::get_air_name,
-    AirRef, Chip, ChipUsageGetter,
 };
 use p3_keccak_air::{
     generate_trace_rows, NUM_KECCAK_COLS as NUM_KECCAK_PERM_COLS, NUM_ROUNDS, U64_LIMBS,
@@ -18,258 +32,537 @@ use p3_keccak_air::{
 use tiny_keccak::keccakf;
 
 use super::{
-    columns::{KeccakInstructionCols, KeccakVmCols},
-    KeccakVmChip, KECCAK_ABSORB_READS, KECCAK_DIGEST_WRITES, KECCAK_RATE_BYTES, KECCAK_RATE_U16S,
+    columns::KeccakVmCols, KECCAK_ABSORB_READS, KECCAK_DIGEST_WRITES, KECCAK_RATE_BYTES,
     KECCAK_REGISTER_READS, NUM_ABSORB_ROUNDS,
 };
+use crate::{
+    columns::NUM_KECCAK_VM_COLS,
+    utils::{keccak256, keccak_f, num_keccak_f},
+    KeccakVmExecutor, KeccakVmFiller, KECCAK_DIGEST_BYTES, KECCAK_RATE_U16S, KECCAK_WORD_SIZE,
+};
+
+#[derive(Clone, Copy)]
+pub struct KeccakVmMetadata {
+    pub len: usize,
+}
+
+impl MultiRowMetadata for KeccakVmMetadata {
+    #[inline(always)]
+    fn get_num_rows(&self) -> usize {
+        num_keccak_f(self.len) * NUM_ROUNDS
+    }
+}
+
+pub(crate) type KeccakVmRecordLayout = MultiRowLayout<KeccakVmMetadata>;
+
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug, Clone)]
+pub struct KeccakVmRecordHeader {
+    pub from_pc: u32,
+    pub timestamp: u32,
+    pub rd_ptr: u32,
+    pub rs1_ptr: u32,
+    pub rs2_ptr: u32,
+    pub dst: u32,
+    pub src: u32,
+    pub len: u32,
+
+    pub register_reads_aux: [MemoryReadAuxRecord; KECCAK_REGISTER_READS],
+    pub write_aux: [MemoryWriteBytesAuxRecord<KECCAK_WORD_SIZE>; KECCAK_DIGEST_WRITES],
+}
+
+pub struct KeccakVmRecordMut<'a> {
+    pub inner: &'a mut KeccakVmRecordHeader,
+    // Having a continuous slice of the input is useful for fast hashing in `execute`
+    pub input: &'a mut [u8],
+    pub read_aux: &'a mut [MemoryReadAuxRecord],
+}
+
+/// Custom borrowing that splits the buffer into a fixed `KeccakVmRecord` header
+/// followed by a slice of `u8`'s of length `num_reads * KECCAK_WORD_SIZE` where `num_reads` is
+/// provided at runtime, followed by a slice of `MemoryReadAuxRecord`'s of length `num_reads`.
+/// Uses `align_to_mut()` to make sure the slice is properly aligned to `MemoryReadAuxRecord`.
+/// Has debug assertions that check the size and alignment of the slices.
+impl<'a> CustomBorrow<'a, KeccakVmRecordMut<'a>, KeccakVmRecordLayout> for [u8] {
+    fn custom_borrow(&'a mut self, layout: KeccakVmRecordLayout) -> KeccakVmRecordMut<'a> {
+        let (record_buf, rest) =
+            unsafe { self.split_at_mut_unchecked(size_of::<KeccakVmRecordHeader>()) };
+
+        let num_reads = layout.metadata.len.div_ceil(KECCAK_WORD_SIZE);
+        // Note: each read is `KECCAK_WORD_SIZE` bytes
+        let (input, rest) = unsafe { rest.split_at_mut_unchecked(num_reads * KECCAK_WORD_SIZE) };
+        let (_, read_aux_buf, _) = unsafe { rest.align_to_mut::<MemoryReadAuxRecord>() };
+        KeccakVmRecordMut {
+            inner: record_buf.borrow_mut(),
+            input,
+            read_aux: &mut read_aux_buf[..num_reads],
+        }
+    }
 
-impl<SC: StarkGenericConfig> Chip<SC> for KeccakVmChip<Val<SC>>
+    unsafe fn extract_layout(&self) -> KeccakVmRecordLayout {
+        let header: &KeccakVmRecordHeader = self.borrow();
+        KeccakVmRecordLayout {
+            metadata: KeccakVmMetadata {
+                len: header.len as usize,
+            },
+        }
+    }
+}
+
+impl SizedRecord<KeccakVmRecordLayout> for KeccakVmRecordMut<'_> {
+    fn size(layout: &KeccakVmRecordLayout) -> usize {
+        let num_reads = layout.metadata.len.div_ceil(KECCAK_WORD_SIZE);
+        let mut total_len = size_of::<KeccakVmRecordHeader>();
+        total_len += num_reads * KECCAK_WORD_SIZE;
+        // Align the pointer to the alignment of `MemoryReadAuxRecord`
+        total_len = total_len.next_multiple_of(align_of::<MemoryReadAuxRecord>());
+        total_len += num_reads * size_of::<MemoryReadAuxRecord>();
+        total_len
+    }
+
+    fn alignment(_layout: &KeccakVmRecordLayout) -> usize {
+        align_of::<KeccakVmRecordHeader>()
+    }
+}
+
+impl<F, RA> PreflightExecutor<F, RA> for KeccakVmExecutor
 where
-    Val<SC>: PrimeField32,
+    F: PrimeField32,
+    for<'buf> RA: RecordArena<'buf, KeccakVmRecordLayout, KeccakVmRecordMut<'buf>>,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air)
+    fn get_opcode_name(&self, _: usize) -> String {
+        format!("{:?}", Rv32KeccakOpcode::KECCAK256)
     }
 
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let trace_width = self.trace_width();
-        let records = self.records;
-        let total_num_blocks: usize = records.iter().map(|r| r.input_blocks.len()).sum();
-        let mut states = Vec::with_capacity(total_num_blocks);
-        let mut instruction_blocks = Vec::with_capacity(total_num_blocks);
-        let memory = self.offline_memory.lock().unwrap();
-
-        #[derive(Clone)]
-        struct StateDiff {
-            /// hi-byte of pre-state
-            pre_hi: [u8; KECCAK_RATE_U16S],
-            /// hi-byte of post-state
-            post_hi: [u8; KECCAK_RATE_U16S],
-            /// if first block
-            register_reads: Option<[RecordId; KECCAK_REGISTER_READS]>,
-            /// if last block
-            digest_writes: Option<[RecordId; KECCAK_DIGEST_WRITES]>,
+    fn execute(
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
+        instruction: &Instruction<F>,
+    ) -> Result<(), ExecutionError> {
+        let &Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = instruction;
+        debug_assert_eq!(opcode, Rv32KeccakOpcode::KECCAK256.global_opcode());
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+        debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
+
+        // Reading the length first without tracing to allocate a record of correct size
+        let len = read_rv32_register(state.memory.data(), c.as_canonical_u32()) as usize;
+
+        let num_reads = len.div_ceil(KECCAK_WORD_SIZE);
+        let num_blocks = num_keccak_f(len);
+        let record = state
+            .ctx
+            .alloc(KeccakVmRecordLayout::new(KeccakVmMetadata { len }));
+
+        record.inner.from_pc = *state.pc;
+        record.inner.timestamp = state.memory.timestamp();
+        record.inner.rd_ptr = a.as_canonical_u32();
+        record.inner.rs1_ptr = b.as_canonical_u32();
+        record.inner.rs2_ptr = c.as_canonical_u32();
+
+        record.inner.dst = u32::from_le_bytes(tracing_read(
+            state.memory,
+            RV32_REGISTER_AS,
+            record.inner.rd_ptr,
+            &mut record.inner.register_reads_aux[0].prev_timestamp,
+        ));
+        record.inner.src = u32::from_le_bytes(tracing_read(
+            state.memory,
+            RV32_REGISTER_AS,
+            record.inner.rs1_ptr,
+            &mut record.inner.register_reads_aux[1].prev_timestamp,
+        ));
+        record.inner.len = u32::from_le_bytes(tracing_read(
+            state.memory,
+            RV32_REGISTER_AS,
+            record.inner.rs2_ptr,
+            &mut record.inner.register_reads_aux[2].prev_timestamp,
+        ));
+
+        debug_assert!(record.inner.src as usize + len <= (1 << self.pointer_max_bits));
+        debug_assert!(
+            record.inner.dst as usize + KECCAK_DIGEST_BYTES <= (1 << self.pointer_max_bits)
+        );
+        // We don't support messages longer than 2^[pointer_max_bits] bytes
+        debug_assert!(record.inner.len < (1 << self.pointer_max_bits));
+
+        for idx in 0..num_reads {
+            if idx % KECCAK_ABSORB_READS == 0 && idx != 0 {
+                // Need to increment the timestamp according at the start of each block due to the
+                // AIR constraints
+                state
+                    .memory
+                    .increment_timestamp_by(KECCAK_REGISTER_READS as u32);
+            }
+            let read = tracing_read::<KECCAK_WORD_SIZE>(
+                state.memory,
+                RV32_MEMORY_AS,
+                record.inner.src + (idx * KECCAK_WORD_SIZE) as u32,
+                &mut record.read_aux[idx].prev_timestamp,
+            );
+            record.input[idx * KECCAK_WORD_SIZE..(idx + 1) * KECCAK_WORD_SIZE]
+                .copy_from_slice(&read);
         }
 
-        impl Default for StateDiff {
-            fn default() -> Self {
-                Self {
-                    pre_hi: [0; KECCAK_RATE_U16S],
-                    post_hi: [0; KECCAK_RATE_U16S],
-                    register_reads: None,
-                    digest_writes: None,
-                }
+        // Due to the AIR constraints, need to set the timestamp to the following:
+        state.memory.timestamp = record.inner.timestamp
+            + (num_blocks * (KECCAK_ABSORB_READS + KECCAK_REGISTER_READS)) as u32;
+
+        let digest = keccak256(&record.input[..len]);
+        for (i, word) in digest.chunks_exact(KECCAK_WORD_SIZE).enumerate() {
+            tracing_write::<KECCAK_WORD_SIZE>(
+                state.memory,
+                RV32_MEMORY_AS,
+                record.inner.dst + (i * KECCAK_WORD_SIZE) as u32,
+                word.try_into().unwrap(),
+                &mut record.inner.write_aux[i].prev_timestamp,
+                &mut record.inner.write_aux[i].prev_data,
+            );
+        }
+
+        // Due to the AIR constraints, the final memory timestamp should be the following:
+        state.memory.timestamp = record.inner.timestamp
+            + (len + KECCAK_REGISTER_READS + KECCAK_ABSORB_READS + KECCAK_DIGEST_WRITES) as u32;
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+        Ok(())
+    }
+}
+
+impl<F: PrimeField32> TraceFiller<F> for KeccakVmFiller {
+    fn fill_trace(
+        &self,
+        mem_helper: &MemoryAuxColsFactory<F>,
+        trace_matrix: &mut RowMajorMatrix<F>,
+        rows_used: usize,
+    ) {
+        if rows_used == 0 {
+            return;
+        }
+
+        let mut chunks = Vec::with_capacity(trace_matrix.height() / NUM_ROUNDS);
+        let mut sizes = Vec::with_capacity(trace_matrix.height() / NUM_ROUNDS);
+        let mut trace = &mut trace_matrix.values[..];
+        let mut num_blocks_so_far = 0;
+
+        // First pass over the trace to get the number of blocks for each instruction
+        // and divide the matrix into chunks of needed sizes
+        loop {
+            if num_blocks_so_far * NUM_ROUNDS >= rows_used {
+                // Push all the dummy rows as a single chunk and break
+                chunks.push(trace);
+                sizes.push((0, 0));
+                break;
+            } else {
+                let record: &KeccakVmRecordHeader =
+                    unsafe { get_record_from_slice(&mut trace, ()) };
+                let num_blocks = num_keccak_f(record.len as usize);
+                let (chunk, rest) =
+                    trace.split_at_mut(NUM_KECCAK_VM_COLS * NUM_ROUNDS * num_blocks);
+                chunks.push(chunk);
+                sizes.push((num_blocks, record.len as usize));
+                num_blocks_so_far += num_blocks;
+                trace = rest;
             }
         }
 
-        // prepare the states
-        let mut state: [u64; 25];
-        for record in records {
-            let dst_read = memory.record_by_id(record.dst_read);
-            let src_read = memory.record_by_id(record.src_read);
-            let len_read = memory.record_by_id(record.len_read);
-
-            state = [0u64; 25];
-            let src_limbs: [_; RV32_REGISTER_NUM_LIMBS - 1] = src_read.data_slice()
-                [1..RV32_REGISTER_NUM_LIMBS]
-                .try_into()
-                .unwrap();
-            let len_limbs: [_; RV32_REGISTER_NUM_LIMBS - 1] = len_read.data_slice()
-                [1..RV32_REGISTER_NUM_LIMBS]
-                .try_into()
-                .unwrap();
-            let mut instruction = KeccakInstructionCols {
-                pc: record.pc,
-                is_enabled: Val::<SC>::ONE,
-                is_enabled_first_round: Val::<SC>::ZERO,
-                start_timestamp: Val::<SC>::from_canonical_u32(dst_read.timestamp),
-                dst_ptr: dst_read.pointer,
-                src_ptr: src_read.pointer,
-                len_ptr: len_read.pointer,
-                dst: dst_read.data_slice().try_into().unwrap(),
-                src_limbs,
-                src: Val::<SC>::from_canonical_usize(record.input_blocks[0].src),
-                len_limbs,
-                remaining_len: Val::<SC>::from_canonical_usize(
-                    record.input_blocks[0].remaining_len,
-                ),
-            };
-            let num_blocks = record.input_blocks.len();
-            for (idx, block) in record.input_blocks.into_iter().enumerate() {
-                // absorb
-                for (bytes, s) in block.padded_bytes.chunks_exact(8).zip(state.iter_mut()) {
-                    // u64 <-> bytes conversion is little-endian
-                    for (i, &byte) in bytes.iter().enumerate() {
-                        let s_byte = (*s >> (i * 8)) as u8;
-                        // Update bitwise lookup (i.e. xor) chip state: order matters!
-                        if idx != 0 {
-                            self.bitwise_lookup_chip
-                                .request_xor(byte as u32, s_byte as u32);
-                        }
-                        *s ^= (byte as u64) << (i * 8);
-                    }
-                }
-                let pre_hi: [u8; KECCAK_RATE_U16S] =
-                    from_fn(|i| (state[i / U64_LIMBS] >> ((i % U64_LIMBS) * 16 + 8)) as u8);
-                states.push(state);
-                keccakf(&mut state);
-                let post_hi: [u8; KECCAK_RATE_U16S] =
-                    from_fn(|i| (state[i / U64_LIMBS] >> ((i % U64_LIMBS) * 16 + 8)) as u8);
-                // Range check the final state
-                if idx == num_blocks - 1 {
-                    for s in state.into_iter().take(NUM_ABSORB_ROUNDS) {
-                        for s_byte in s.to_le_bytes() {
-                            self.bitwise_lookup_chip.request_xor(0, s_byte as u32);
-                        }
-                    }
+        // First, parallelize over instruction chunks, every instruction can have multiple blocks
+        // Then, compute some additional values for each block and parallelize over blocks within an
+        // instruction Finally, compute some additional values for each row and parallelize
+        // over rows within a block
+        chunks
+            .par_iter_mut()
+            .zip(sizes.par_iter())
+            .for_each(|(slice, (num_blocks, len))| {
+                if *num_blocks == 0 {
+                    // Fill in the dummy rows in parallel
+                    // Note: a 'block' of dummy rows is generated by `generate_trace_rows` from the
+                    // zero state       dummy rows are repeated every
+                    // `NUM_ROUNDS` rows
+                    let p3_trace: RowMajorMatrix<F> = generate_trace_rows(vec![[0u64; 25]; 1], 0);
+
+                    slice
+                        .par_chunks_exact_mut(NUM_KECCAK_VM_COLS)
+                        .enumerate()
+                        .for_each(|(row_idx, row)| {
+                            let idx = row_idx % NUM_ROUNDS;
+                            row[..NUM_KECCAK_PERM_COLS].copy_from_slice(
+                                &p3_trace.values
+                                    [idx * NUM_KECCAK_PERM_COLS..(idx + 1) * NUM_KECCAK_PERM_COLS],
+                            );
+
+                            // Need to get rid of the accidental garbage data that might overflow
+                            // the F's prime field. Unfortunately, there
+                            // is no good way around this
+                            unsafe {
+                                std::ptr::write_bytes(
+                                    row.as_mut_ptr().add(NUM_KECCAK_PERM_COLS) as *mut u8,
+                                    0,
+                                    (NUM_KECCAK_VM_COLS - NUM_KECCAK_PERM_COLS) * size_of::<F>(),
+                                );
+                            }
+                            let cols: &mut KeccakVmCols<F> = row.borrow_mut();
+                            // The first row of a `dummy` block should have `is_new_start = F::ONE`
+                            cols.sponge.is_new_start = F::from_bool(idx == 0);
+                            cols.sponge.block_bytes[0] = F::ONE;
+                            cols.sponge.block_bytes[KECCAK_RATE_BYTES - 1] =
+                                F::from_canonical_u32(0x80);
+                            cols.sponge.is_padding_byte = [F::ONE; KECCAK_RATE_BYTES];
+                        });
+                    return;
                 }
-                let register_reads =
-                    (idx == 0).then_some([record.dst_read, record.src_read, record.len_read]);
-                let digest_writes = (idx == num_blocks - 1).then_some(record.digest_writes);
-                let diff = StateDiff {
-                    pre_hi,
-                    post_hi,
-                    register_reads,
-                    digest_writes,
+
+                let num_reads = len.div_ceil(KECCAK_WORD_SIZE);
+                let read_len = num_reads * KECCAK_WORD_SIZE;
+
+                let record: KeccakVmRecordMut = unsafe {
+                    get_record_from_slice(
+                        slice,
+                        KeccakVmRecordLayout::new(KeccakVmMetadata { len: *len }),
+                    )
                 };
-                instruction_blocks.push((instruction, diff, block));
-                instruction.remaining_len -= Val::<SC>::from_canonical_usize(KECCAK_RATE_BYTES);
-                instruction.src += Val::<SC>::from_canonical_usize(KECCAK_RATE_BYTES);
-                instruction.start_timestamp +=
-                    Val::<SC>::from_canonical_usize(KECCAK_REGISTER_READS + KECCAK_ABSORB_READS);
-            }
-        }
 
-        // We need to transpose state matrices due to a plonky3 issue: https://github.com/Plonky3/Plonky3/issues/672
-        // Note: the fix for this issue will be a commit after the major Field crate refactor PR https://github.com/Plonky3/Plonky3/pull/640
-        //       which will require a significant refactor to switch to.
-        let p3_states = states
-            .into_iter()
-            .map(|state| {
-                // transpose of 5x5 matrix
-                from_fn(|i| {
-                    let x = i / 5;
-                    let y = i % 5;
-                    state[x + 5 * y]
-                })
-            })
-            .collect();
-        let p3_keccak_trace: RowMajorMatrix<Val<SC>> = generate_trace_rows(p3_states, 0);
-        let num_rows = p3_keccak_trace.height();
-        // Every `NUM_ROUNDS` rows corresponds to one input block
-        let num_blocks = num_rows.div_ceil(NUM_ROUNDS);
-        // Resize with dummy `is_enabled = 0`
-        instruction_blocks.resize(num_blocks, Default::default());
-
-        let aux_cols_factory = memory.aux_cols_factory();
-
-        // Use unsafe alignment so we can parallelly write to the matrix
-        let mut trace =
-            RowMajorMatrix::new(Val::<SC>::zero_vec(num_rows * trace_width), trace_width);
-        let limb_shift_bits = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - self.air.ptr_max_bits;
-
-        trace
-            .values
-            .par_chunks_mut(trace_width * NUM_ROUNDS)
-            .zip(
-                p3_keccak_trace
-                    .values
-                    .par_chunks(NUM_KECCAK_PERM_COLS * NUM_ROUNDS),
-            )
-            .zip(instruction_blocks.into_par_iter())
-            .for_each(|((rows, p3_keccak_mat), (instruction, diff, block))| {
-                let height = rows.len() / trace_width;
-                for (row, p3_keccak_row) in rows
-                    .chunks_exact_mut(trace_width)
-                    .zip(p3_keccak_mat.chunks_exact(NUM_KECCAK_PERM_COLS))
-                {
-                    // Safety: `KeccakPermCols` **must** be the first field in `KeccakVmCols`
-                    row[..NUM_KECCAK_PERM_COLS].copy_from_slice(p3_keccak_row);
-                    let row_mut: &mut KeccakVmCols<Val<SC>> = row.borrow_mut();
-                    row_mut.instruction = instruction;
-
-                    row_mut.sponge.block_bytes =
-                        block.padded_bytes.map(Val::<SC>::from_canonical_u8);
-                    if let Some(partial_read_idx) = block.partial_read_idx {
-                        let partial_read = memory.record_by_id(block.reads[partial_read_idx]);
-                        row_mut
-                            .mem_oc
-                            .partial_block
-                            .copy_from_slice(&partial_read.data_slice()[1..]);
-                    }
-                    for (i, is_padding) in row_mut.sponge.is_padding_byte.iter_mut().enumerate() {
-                        *is_padding = Val::<SC>::from_bool(i >= block.remaining_len);
-                    }
+                // Copy the read aux records and inner record to another place
+                // to safely fill in the trace matrix without overwriting the record
+                let mut read_aux_records = Vec::with_capacity(num_reads);
+                read_aux_records.extend_from_slice(record.read_aux);
+                let vm_record = record.inner.clone();
+                let partial_block = if read_len != *len {
+                    record.input[read_len - KECCAK_WORD_SIZE + 1..]
+                        .try_into()
+                        .unwrap()
+                } else {
+                    [0u8; KECCAK_WORD_SIZE - 1]
                 }
-                let first_row: &mut KeccakVmCols<Val<SC>> = rows[..trace_width].borrow_mut();
-                first_row.sponge.is_new_start = Val::<SC>::from_bool(block.is_new_start);
-                first_row.sponge.state_hi = diff.pre_hi.map(Val::<SC>::from_canonical_u8);
-                first_row.instruction.is_enabled_first_round = first_row.instruction.is_enabled;
-                // Make memory access aux columns. Any aux column not explicitly defined defaults to
-                // all 0s
-                if let Some(register_reads) = diff.register_reads {
-                    let need_range_check = [
-                        &register_reads[0], // dst
-                        &register_reads[1], // src
-                        &register_reads[2], // len
-                        &register_reads[2],
-                    ]
-                    .map(|r| {
-                        memory
-                            .record_by_id(*r)
-                            .data_slice()
-                            .last()
-                            .unwrap()
-                            .as_canonical_u32()
+                .map(F::from_canonical_u8);
+                let mut input = Vec::with_capacity(*num_blocks * KECCAK_RATE_BYTES);
+                input.extend_from_slice(&record.input[..*len]);
+                // Pad the input according to the Keccak spec
+                input.push(0x01);
+                input.resize(input.capacity(), 0);
+                *input.last_mut().unwrap() += 0x80;
+
+                let mut states = Vec::with_capacity(*num_blocks);
+                let mut state = [0u64; 25];
+
+                input
+                    .chunks_exact(KECCAK_RATE_BYTES)
+                    .enumerate()
+                    .for_each(|(idx, chunk)| {
+                        // absorb
+                        for (bytes, s) in chunk.chunks_exact(8).zip(state.iter_mut()) {
+                            // u64 <-> bytes conversion is little-endian
+                            for (i, &byte) in bytes.iter().enumerate() {
+                                let s_byte = (*s >> (i * 8)) as u8;
+                                // Update bitwise lookup (i.e. xor) chip state: order matters!
+                                if idx != 0 {
+                                    self.bitwise_lookup_chip
+                                        .request_xor(byte as u32, s_byte as u32);
+                                }
+                                *s ^= (byte as u64) << (i * 8);
+                            }
+                        }
+                        states.push(state);
+                        keccakf(&mut state);
                     });
-                    for bytes in need_range_check.chunks(2) {
-                        self.bitwise_lookup_chip.request_range(
-                            bytes[0] << limb_shift_bits,
-                            bytes[1] << limb_shift_bits,
-                        );
-                    }
-                    for (i, id) in register_reads.into_iter().enumerate() {
-                        aux_cols_factory.generate_read_aux(
-                            memory.record_by_id(id),
-                            &mut first_row.mem_oc.register_aux[i],
-                        );
-                    }
-                }
-                for (i, id) in block.reads.into_iter().enumerate() {
-                    aux_cols_factory.generate_read_aux(
-                        memory.record_by_id(id),
-                        &mut first_row.mem_oc.absorb_reads[i],
-                    );
-                }
 
-                let last_row: &mut KeccakVmCols<Val<SC>> =
-                    rows[(height - 1) * trace_width..].borrow_mut();
-                last_row.sponge.state_hi = diff.post_hi.map(Val::<SC>::from_canonical_u8);
-                last_row.inner.export = instruction.is_enabled
-                    * Val::<SC>::from_bool(block.remaining_len < KECCAK_RATE_BYTES);
-                if let Some(digest_writes) = diff.digest_writes {
-                    for (i, record_id) in digest_writes.into_iter().enumerate() {
-                        let record = memory.record_by_id(record_id);
-                        aux_cols_factory
-                            .generate_write_aux(record, &mut last_row.mem_oc.digest_writes[i]);
-                    }
-                }
-            });
+                slice
+                    .par_chunks_exact_mut(NUM_ROUNDS * NUM_KECCAK_VM_COLS)
+                    .enumerate()
+                    .for_each(|(block_idx, block_slice)| {
+                        // We need to transpose state matrices due to a plonky3 issue: https://github.com/Plonky3/Plonky3/issues/672
+                        // Note: the fix for this issue will be a commit after the major Field crate refactor PR https://github.com/Plonky3/Plonky3/pull/640
+                        //       which will require a significant refactor to switch to.
+                        let state = from_fn(|i| {
+                            let x = i / 5;
+                            let y = i % 5;
+                            states[block_idx][x + 5 * y]
+                        });
 
-        AirProofInput::simple_no_pis(trace)
-    }
-}
+                        // Note: we can call `generate_trace_rows` for each block separately because
+                        // its trace only depends on the current `state`
+                        // `generate_trace_rows` will generate additional dummy rows to make the
+                        // height into power of 2, but we can safely discard them
+                        let p3_trace: RowMajorMatrix<F> = generate_trace_rows(vec![state], 0);
+                        let input_offset = block_idx * KECCAK_RATE_BYTES;
+                        let start_timestamp = vm_record.timestamp
+                            + (block_idx * (KECCAK_REGISTER_READS + KECCAK_ABSORB_READS)) as u32;
+                        let rem_len = *len - input_offset;
 
-impl<F: PrimeField32> ChipUsageGetter for KeccakVmChip<F> {
-    fn air_name(&self) -> String {
-        get_air_name(&self.air)
-    }
-    fn current_trace_height(&self) -> usize {
-        let num_blocks: usize = self.records.iter().map(|r| r.input_blocks.len()).sum();
-        num_blocks * NUM_ROUNDS
-    }
+                        block_slice
+                            .par_chunks_exact_mut(NUM_KECCAK_VM_COLS)
+                            .enumerate()
+                            .zip(p3_trace.values.par_chunks(NUM_KECCAK_PERM_COLS))
+                            .for_each(|((row_idx, row), p3_row)| {
+                                // Fill the inner columns
+                                // Safety: `KeccakPermCols` **must** be the first field in
+                                // `KeccakVmCols`
+                                row[..NUM_KECCAK_PERM_COLS].copy_from_slice(p3_row);
 
-    fn trace_width(&self) -> usize {
-        BaseAir::<F>::width(&self.air)
+                                let cols: &mut KeccakVmCols<F> = row.borrow_mut();
+                                // Fill the sponge columns
+                                cols.sponge.is_new_start =
+                                    F::from_bool(block_idx == 0 && row_idx == 0);
+                                if rem_len < KECCAK_RATE_BYTES {
+                                    cols.sponge.is_padding_byte[..rem_len].fill(F::ZERO);
+                                    cols.sponge.is_padding_byte[rem_len..].fill(F::ONE);
+                                } else {
+                                    cols.sponge.is_padding_byte = [F::ZERO; KECCAK_RATE_BYTES];
+                                }
+                                cols.sponge.block_bytes = array::from_fn(|i| {
+                                    F::from_canonical_u8(input[input_offset + i])
+                                });
+                                if row_idx == 0 {
+                                    cols.sponge.state_hi = from_fn(|i| {
+                                        F::from_canonical_u8(
+                                            (states[block_idx][i / U64_LIMBS]
+                                                >> ((i % U64_LIMBS) * 16 + 8))
+                                                as u8,
+                                        )
+                                    });
+                                } else if row_idx == NUM_ROUNDS - 1 {
+                                    let state = keccak_f(states[block_idx]);
+                                    cols.sponge.state_hi = from_fn(|i| {
+                                        F::from_canonical_u8(
+                                            (state[i / U64_LIMBS] >> ((i % U64_LIMBS) * 16 + 8))
+                                                as u8,
+                                        )
+                                    });
+                                    if block_idx == num_blocks - 1 {
+                                        cols.inner.export = F::ONE;
+                                        for s in state.into_iter().take(NUM_ABSORB_ROUNDS) {
+                                            for s_byte in s.to_le_bytes() {
+                                                self.bitwise_lookup_chip
+                                                    .request_xor(0, s_byte as u32);
+                                            }
+                                        }
+                                    }
+                                } else {
+                                    cols.sponge.state_hi = [F::ZERO; KECCAK_RATE_U16S];
+                                }
+
+                                // Fill the instruction columns
+                                cols.instruction.pc = F::from_canonical_u32(vm_record.from_pc);
+                                cols.instruction.is_enabled = F::ONE;
+                                cols.instruction.is_enabled_first_round =
+                                    F::from_bool(row_idx == 0);
+                                cols.instruction.start_timestamp =
+                                    F::from_canonical_u32(start_timestamp);
+                                cols.instruction.dst_ptr = F::from_canonical_u32(vm_record.rd_ptr);
+                                cols.instruction.src_ptr = F::from_canonical_u32(vm_record.rs1_ptr);
+                                cols.instruction.len_ptr = F::from_canonical_u32(vm_record.rs2_ptr);
+                                cols.instruction.dst =
+                                    vm_record.dst.to_le_bytes().map(F::from_canonical_u8);
+
+                                let src = vm_record.src + (block_idx * KECCAK_RATE_BYTES) as u32;
+                                cols.instruction.src = F::from_canonical_u32(src);
+                                cols.instruction.src_limbs.copy_from_slice(
+                                    &src.to_le_bytes().map(F::from_canonical_u8)[1..],
+                                );
+                                cols.instruction.len_limbs.copy_from_slice(
+                                    &(rem_len as u32).to_le_bytes().map(F::from_canonical_u8)[1..],
+                                );
+                                cols.instruction.remaining_len =
+                                    F::from_canonical_u32(rem_len as u32);
+
+                                // Fill the register reads
+                                if row_idx == 0 && block_idx == 0 {
+                                    for ((i, cols), vm_record) in cols
+                                        .mem_oc
+                                        .register_aux
+                                        .iter_mut()
+                                        .enumerate()
+                                        .zip(vm_record.register_reads_aux.iter())
+                                    {
+                                        mem_helper.fill(
+                                            vm_record.prev_timestamp,
+                                            start_timestamp + i as u32,
+                                            cols.as_mut(),
+                                        );
+                                    }
+
+                                    let msl_rshift = RV32_CELL_BITS * (RV32_REGISTER_NUM_LIMBS - 1);
+                                    let msl_lshift = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS
+                                        - self.pointer_max_bits;
+                                    // Update the bitwise lookup chip
+                                    self.bitwise_lookup_chip.request_range(
+                                        (vm_record.dst >> msl_rshift) << msl_lshift,
+                                        (vm_record.src >> msl_rshift) << msl_lshift,
+                                    );
+                                    self.bitwise_lookup_chip.request_range(
+                                        (vm_record.len >> msl_rshift) << msl_lshift,
+                                        (vm_record.len >> msl_rshift) << msl_lshift,
+                                    );
+                                } else {
+                                    cols.mem_oc.register_aux.par_iter_mut().for_each(|aux| {
+                                        mem_helper.fill_zero(aux.as_mut());
+                                    });
+                                }
+
+                                // Fill the absorb reads
+                                if row_idx == 0 {
+                                    let reads_offs = block_idx * KECCAK_ABSORB_READS;
+                                    let num_reads = min(
+                                        rem_len.div_ceil(KECCAK_WORD_SIZE),
+                                        KECCAK_ABSORB_READS,
+                                    );
+                                    let start_timestamp =
+                                        start_timestamp + KECCAK_REGISTER_READS as u32;
+                                    for i in 0..num_reads {
+                                        mem_helper.fill(
+                                            read_aux_records[i + reads_offs].prev_timestamp,
+                                            start_timestamp + i as u32,
+                                            cols.mem_oc.absorb_reads[i].as_mut(),
+                                        );
+                                    }
+                                    for i in num_reads..KECCAK_ABSORB_READS {
+                                        mem_helper.fill_zero(cols.mem_oc.absorb_reads[i].as_mut());
+                                    }
+                                } else {
+                                    cols.mem_oc.absorb_reads.par_iter_mut().for_each(|aux| {
+                                        mem_helper.fill_zero(aux.as_mut());
+                                    });
+                                }
+
+                                if block_idx == num_blocks - 1 && row_idx == NUM_ROUNDS - 1 {
+                                    let timestamp = start_timestamp
+                                        + (KECCAK_ABSORB_READS + KECCAK_REGISTER_READS) as u32;
+                                    cols.mem_oc
+                                        .digest_writes
+                                        .par_iter_mut()
+                                        .enumerate()
+                                        .zip(vm_record.write_aux.par_iter())
+                                        .for_each(|((i, cols), vm_record)| {
+                                            cols.set_prev_data(
+                                                vm_record.prev_data.map(F::from_canonical_u8),
+                                            );
+                                            mem_helper.fill(
+                                                vm_record.prev_timestamp,
+                                                timestamp + i as u32,
+                                                cols.as_mut(),
+                                            );
+                                        });
+                                } else {
+                                    cols.mem_oc.digest_writes.par_iter_mut().for_each(|aux| {
+                                        aux.set_prev_data([F::ZERO; KECCAK_WORD_SIZE]);
+                                        mem_helper.fill_zero(aux.as_mut());
+                                    });
+                                }
+
+                                // Set the partial block only for the last block
+                                if block_idx == num_blocks - 1 {
+                                    cols.mem_oc.partial_block = partial_block;
+                                } else {
+                                    cols.mem_oc.partial_block = [F::ZERO; KECCAK_WORD_SIZE - 1];
+                                }
+                            });
+                    });
+            });
     }
 }
diff --git a/extensions/native/circuit/Cargo.toml b/extensions/native/circuit/Cargo.toml
index 5d5913b4be..f9b9bd78c5 100644
--- a/extensions/native/circuit/Cargo.toml
+++ b/extensions/native/circuit/Cargo.toml
@@ -17,23 +17,27 @@ openvm-circuit = { workspace = true }
 openvm-circuit-derive = { workspace = true }
 openvm-instructions = { workspace = true }
 openvm-rv32im-circuit = { workspace = true }
+openvm-rv32im-transpiler = { workspace = true }
 openvm-native-compiler = { workspace = true }
 
 
 strum.workspace = true
 itertools.workspace = true
-tracing.workspace = true
 derive-new.workspace = true
 derive_more = { workspace = true, features = ["from"] }
 rand.workspace = true
 eyre.workspace = true
 serde.workspace = true
-serde-big-array.workspace = true
 static_assertions.workspace = true
 
 [dev-dependencies]
 openvm-stark-sdk = { workspace = true }
 openvm-circuit = { workspace = true, features = ["test-utils"] }
+openvm-native-circuit = { workspace = true, features = ["test-utils"] }
+openvm-native-compiler-derive = { workspace = true }
+p3-symmetric = { workspace = true }
+test-case = { workspace = true }
+test-log = { workspace = true }
 
 [features]
 default = ["parallel"]
diff --git a/extensions/native/circuit/src/adapters/alu_native_adapter.rs b/extensions/native/circuit/src/adapters/alu_native_adapter.rs
index e85797536f..24ce7dfbbb 100644
--- a/extensions/native/circuit/src/adapters/alu_native_adapter.rs
+++ b/extensions/native/circuit/src/adapters/alu_native_adapter.rs
@@ -1,23 +1,26 @@
 use std::{
     borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
+    mem::size_of,
 };
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, MinimalInstruction, VmAdapterAir,
     },
     system::{
         memory::{
-            offline_checker::{MemoryBridge, MemoryReadOrImmediateAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory,
+            offline_checker::{
+                MemoryBridge, MemoryReadAuxRecord, MemoryReadOrImmediateAuxCols,
+                MemoryWriteAuxCols, MemoryWriteAuxRecord,
+            },
+            online::TracingMemory,
+            MemoryAddress, MemoryAuxColsFactory,
         },
-        native_adapter::{NativeReadRecord, NativeWriteRecord},
-        program::ProgramBus,
+        native_adapter::util::{tracing_read_or_imm_native, tracing_write_native},
     },
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP};
 use openvm_native_compiler::conversion::AS;
@@ -27,28 +30,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
 
-#[derive(Debug)]
-pub struct AluNativeAdapterChip<F: Field> {
-    pub air: AluNativeAdapterAir,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32> AluNativeAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: AluNativeAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _marker: PhantomData,
-        }
-    }
-}
-
 #[repr(C)]
 #[derive(AlignedBorrow)]
 pub struct AluNativeAdapterCols<T> {
@@ -93,6 +74,8 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for AluNativeAdapterAir {
 
         let native_as = AB::Expr::from_canonical_u32(AS::Native as u32);
 
+        // TODO: we assume address space is either 0 or 4, should we add a
+        //       constraint for that?
         self.memory_bridge
             .read_or_immediate(
                 MemoryAddress::new(cols.e_as, cols.b_pointer),
@@ -144,88 +127,131 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for AluNativeAdapterAir {
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for AluNativeAdapterChip<F> {
-    type ReadRecord = NativeReadRecord<F, 2>;
-    type WriteRecord = NativeWriteRecord<F, 1>;
-    type Air = AluNativeAdapterAir;
-    type Interface = BasicAdapterInterface<F, MinimalInstruction<F>, 2, 1, 1, 1>;
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct AluNativeAdapterRecord<F> {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { b, c, e, f, .. } = *instruction;
-
-        let reads = vec![memory.read::<1>(e, b), memory.read::<1>(f, c)];
-        let i_reads: [_; 2] = std::array::from_fn(|i| reads[i].1);
-
-        Ok((
-            i_reads,
-            Self::ReadRecord {
-                reads: reads.try_into().unwrap(),
-            },
-        ))
+    pub a_ptr: F,
+    pub b: F,
+    pub c: F,
+
+    // Will set prev_timestamp to `u32::MAX` if the read is an immediate
+    pub reads_aux: [MemoryReadAuxRecord; 2],
+    pub write_aux: MemoryWriteAuxRecord<F, 1>,
+}
+
+#[derive(derive_new::new, Clone, Copy)]
+pub struct AluNativeAdapterExecutor;
+
+#[derive(derive_new::new)]
+pub struct AluNativeAdapterFiller;
+
+impl<F: PrimeField32> AdapterTraceExecutor<F> for AluNativeAdapterExecutor {
+    const WIDTH: usize = size_of::<AluNativeAdapterCols<u8>>();
+    type ReadData = [F; 2];
+    type WriteData = [F; 1];
+    type RecordMut<'a> = &'a mut AluNativeAdapterRecord<F>;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        _instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction { a, .. } = *_instruction;
-        let writes = vec![memory.write(
-            F::from_canonical_u32(AS::Native as u32),
-            a,
-            output.writes[0],
-        )];
-
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord {
-                from_state,
-                writes: writes.try_into().unwrap(),
-            },
-        ))
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let &Instruction { b, c, e, f, .. } = instruction;
+
+        record.b = b;
+        let rs1 = tracing_read_or_imm_native(memory, e, b, &mut record.reads_aux[0].prev_timestamp);
+        record.c = c;
+        let rs2 = tracing_read_or_imm_native(memory, f, c, &mut record.reads_aux[1].prev_timestamp);
+        [rs1, rs2]
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        let row_slice: &mut AluNativeAdapterCols<_> = row_slice.borrow_mut();
-        let aux_cols_factory = memory.aux_cols_factory();
+        let &Instruction { a, .. } = instruction;
 
-        row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
+        record.a_ptr = a;
+        tracing_write_native(
+            memory,
+            a.as_canonical_u32(),
+            data,
+            &mut record.write_aux.prev_timestamp,
+            &mut record.write_aux.prev_data,
+        );
+    }
+}
 
-        row_slice.a_pointer = memory.record_by_id(write_record.writes[0].0).pointer;
-        row_slice.b_pointer = memory.record_by_id(read_record.reads[0].0).pointer;
-        row_slice.c_pointer = memory.record_by_id(read_record.reads[1].0).pointer;
-        row_slice.e_as = memory.record_by_id(read_record.reads[0].0).address_space;
-        row_slice.f_as = memory.record_by_id(read_record.reads[1].0).address_space;
+impl<F: PrimeField32> AdapterTraceFiller<F> for AluNativeAdapterFiller {
+    const WIDTH: usize = size_of::<AluNativeAdapterCols<u8>>();
 
-        for (i, x) in read_record.reads.iter().enumerate() {
-            let read = memory.record_by_id(x.0);
-            aux_cols_factory.generate_read_or_immediate_aux(read, &mut row_slice.reads_aux[i]);
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &AluNativeAdapterRecord<F> =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut AluNativeAdapterCols<F> = adapter_row.borrow_mut();
+
+        // Writing in reverse order to avoid overwriting the `record`
+        adapter_row
+            .write_aux
+            .set_prev_data(record.write_aux.prev_data);
+        mem_helper.fill(
+            record.write_aux.prev_timestamp,
+            record.from_timestamp + 2,
+            adapter_row.write_aux.as_mut(),
+        );
+
+        let native_as = F::from_canonical_u32(AS::Native as u32);
+        for ((i, read_record), read_cols) in record
+            .reads_aux
+            .iter()
+            .enumerate()
+            .zip(adapter_row.reads_aux.iter_mut())
+            .rev()
+        {
+            let as_col = if i == 0 {
+                &mut adapter_row.e_as
+            } else {
+                &mut adapter_row.f_as
+            };
+            // previous timestamp is u32::MAX if the read is an immediate
+            if read_record.prev_timestamp == u32::MAX {
+                read_cols.is_zero_aux = F::ZERO;
+                read_cols.is_immediate = F::ONE;
+                mem_helper.fill(0, record.from_timestamp + i as u32, read_cols.as_mut());
+                *as_col = F::ZERO;
+            } else {
+                read_cols.is_zero_aux = native_as.inverse();
+                read_cols.is_immediate = F::ZERO;
+                mem_helper.fill(
+                    read_record.prev_timestamp,
+                    record.from_timestamp + i as u32,
+                    read_cols.as_mut(),
+                );
+                *as_col = native_as;
+            }
         }
 
-        let write = memory.record_by_id(write_record.writes[0].0);
-        aux_cols_factory.generate_write_aux(write, &mut row_slice.write_aux);
-    }
+        adapter_row.c_pointer = record.c;
+        adapter_row.b_pointer = record.b;
+        adapter_row.a_pointer = record.a_ptr;
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/native/circuit/src/adapters/branch_native_adapter.rs b/extensions/native/circuit/src/adapters/branch_native_adapter.rs
index 7d3e97a6bf..aa0c9c5259 100644
--- a/extensions/native/circuit/src/adapters/branch_native_adapter.rs
+++ b/extensions/native/circuit/src/adapters/branch_native_adapter.rs
@@ -1,23 +1,23 @@
 use std::{
     borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
+    mem::size_of,
 };
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, ImmInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, ImmInstruction, VmAdapterAir,
     },
     system::{
         memory::{
-            offline_checker::{MemoryBridge, MemoryReadOrImmediateAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory,
+            offline_checker::{MemoryBridge, MemoryReadAuxRecord, MemoryReadOrImmediateAuxCols},
+            online::TracingMemory,
+            MemoryAddress, MemoryAuxColsFactory,
         },
-        native_adapter::NativeReadRecord,
-        program::ProgramBus,
+        native_adapter::util::tracing_read_or_imm_native,
     },
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP};
 use openvm_native_compiler::conversion::AS;
@@ -27,37 +27,15 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
 
-#[derive(Debug)]
-pub struct BranchNativeAdapterChip<F: Field> {
-    pub air: BranchNativeAdapterAir,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32> BranchNativeAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: BranchNativeAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _marker: PhantomData,
-        }
-    }
-}
-
 #[repr(C)]
-#[derive(AlignedBorrow)]
+#[derive(AlignedBorrow, Debug)]
 pub struct BranchNativeAdapterReadCols<T> {
     pub address: MemoryAddress<T, T>,
     pub read_aux: MemoryReadOrImmediateAuxCols<T>,
 }
 
 #[repr(C)]
-#[derive(AlignedBorrow)]
+#[derive(AlignedBorrow, Debug)]
 pub struct BranchNativeAdapterCols<T> {
     pub from_state: ExecutionState<T>,
     pub reads_aux: [BranchNativeAdapterReadCols<T>; 2],
@@ -145,71 +123,110 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for BranchNativeAdapterAir {
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for BranchNativeAdapterChip<F> {
-    type ReadRecord = NativeReadRecord<F, 2>;
-    type WriteRecord = ExecutionState<u32>;
-    type Air = BranchNativeAdapterAir;
-    type Interface = BasicAdapterInterface<F, ImmInstruction<F>, 2, 0, 1, 1>;
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct BranchNativeAdapterRecord<F> {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+
+    pub ptrs: [F; 2],
+    // Will set prev_timestamp to `u32::MAX` if the read is an immediate
+    pub reads_aux: [MemoryReadAuxRecord; 2],
+}
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { a, b, d, e, .. } = *instruction;
-
-        let reads = vec![memory.read::<1>(d, a), memory.read::<1>(e, b)];
-        let i_reads: [_; 2] = std::array::from_fn(|i| reads[i].1);
-
-        Ok((
-            i_reads,
-            Self::ReadRecord {
-                reads: reads.try_into().unwrap(),
-            },
-        ))
+#[derive(derive_new::new, Clone, Copy)]
+pub struct BranchNativeAdapterExecutor;
+
+#[derive(derive_new::new)]
+pub struct BranchNativeAdapterFiller;
+
+impl<F> AdapterTraceExecutor<F> for BranchNativeAdapterExecutor
+where
+    F: PrimeField32,
+{
+    const WIDTH: usize = size_of::<BranchNativeAdapterCols<u8>>();
+    type ReadData = [F; 2];
+    type WriteData = ();
+    type RecordMut<'a> = &'a mut BranchNativeAdapterRecord<F>;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        _instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            from_state,
-        ))
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let &Instruction { a, b, d, e, .. } = instruction;
+
+        record.ptrs[0] = a;
+        let rs1 = tracing_read_or_imm_native(memory, d, a, &mut record.reads_aux[0].prev_timestamp);
+        record.ptrs[1] = b;
+        let rs2 = tracing_read_or_imm_native(memory, e, b, &mut record.reads_aux[1].prev_timestamp);
+        [rs1, rs2]
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        _memory: &mut TracingMemory,
+        _instruction: &Instruction<F>,
+        _data: Self::WriteData,
+        _record: &mut Self::RecordMut<'_>,
     ) {
-        let row_slice: &mut BranchNativeAdapterCols<_> = row_slice.borrow_mut();
-        let aux_cols_factory = memory.aux_cols_factory();
-
-        row_slice.from_state = write_record.map(F::from_canonical_u32);
-        for (i, x) in read_record.reads.iter().enumerate() {
-            let read = memory.record_by_id(x.0);
+        // This adapter doesn't write anything
+    }
+}
 
-            row_slice.reads_aux[i].address = MemoryAddress::new(read.address_space, read.pointer);
-            aux_cols_factory
-                .generate_read_or_immediate_aux(read, &mut row_slice.reads_aux[i].read_aux);
+impl<F: PrimeField32> AdapterTraceFiller<F> for BranchNativeAdapterFiller {
+    const WIDTH: usize = size_of::<BranchNativeAdapterCols<u8>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &BranchNativeAdapterRecord<F> =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut BranchNativeAdapterCols<F> = adapter_row.borrow_mut();
+
+        // Writing in reverse order to avoid overwriting the `record`
+
+        let native_as = F::from_canonical_u32(AS::Native as u32);
+        for ((i, read_record), read_cols) in record
+            .reads_aux
+            .iter()
+            .enumerate()
+            .zip(adapter_row.reads_aux.iter_mut())
+            .rev()
+        {
+            // previous timestamp is u32::MAX if the read is an immediate
+            if read_record.prev_timestamp == u32::MAX {
+                read_cols.read_aux.is_zero_aux = F::ZERO;
+                read_cols.read_aux.is_immediate = F::ONE;
+                mem_helper.fill(
+                    0,
+                    record.from_timestamp + i as u32,
+                    read_cols.read_aux.as_mut(),
+                );
+                read_cols.address.pointer = record.ptrs[i];
+                read_cols.address.address_space = F::ZERO;
+            } else {
+                read_cols.read_aux.is_zero_aux = native_as.inverse();
+                read_cols.read_aux.is_immediate = F::ZERO;
+                mem_helper.fill(
+                    read_record.prev_timestamp,
+                    record.from_timestamp + i as u32,
+                    read_cols.read_aux.as_mut(),
+                );
+                read_cols.address.pointer = record.ptrs[i];
+                read_cols.address.address_space = native_as;
+            }
         }
-    }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/native/circuit/src/adapters/convert_adapter.rs b/extensions/native/circuit/src/adapters/convert_adapter.rs
index cac6d91bac..9c76c73b59 100644
--- a/extensions/native/circuit/src/adapters/convert_adapter.rs
+++ b/extensions/native/circuit/src/adapters/convert_adapter.rs
@@ -1,71 +1,37 @@
 use std::{
     borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
+    mem::size_of,
 };
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, MinimalInstruction, VmAdapterAir,
     },
     system::{
         memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+            offline_checker::{
+                MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+                MemoryWriteBytesAuxRecord,
+            },
+            online::TracingMemory,
+            MemoryAddress, MemoryAuxColsFactory,
         },
-        program::ProgramBus,
+        native_adapter::util::tracing_read_native,
     },
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP};
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_MEMORY_AS,
+};
 use openvm_native_compiler::conversion::AS;
+use openvm_rv32im_circuit::adapters::tracing_write;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
-
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct VectorReadRecord<const NUM_READS: usize, const READ_SIZE: usize> {
-    #[serde(with = "BigArray")]
-    pub reads: [RecordId; NUM_READS],
-}
-
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct VectorWriteRecord<const WRITE_SIZE: usize> {
-    pub from_state: ExecutionState<u32>,
-    pub writes: [RecordId; 1],
-}
-
-#[allow(dead_code)]
-#[derive(Debug)]
-pub struct ConvertAdapterChip<F: Field, const READ_SIZE: usize, const WRITE_SIZE: usize> {
-    pub air: ConvertAdapterAir<READ_SIZE, WRITE_SIZE>,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32, const READ_SIZE: usize, const WRITE_SIZE: usize>
-    ConvertAdapterChip<F, READ_SIZE, WRITE_SIZE>
-{
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: ConvertAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _marker: PhantomData,
-        }
-    }
-}
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -155,74 +121,112 @@ impl<AB: InteractionBuilder, const READ_SIZE: usize, const WRITE_SIZE: usize> Vm
     }
 }
 
-impl<F: PrimeField32, const READ_SIZE: usize, const WRITE_SIZE: usize> VmAdapterChip<F>
-    for ConvertAdapterChip<F, READ_SIZE, WRITE_SIZE>
-{
-    type ReadRecord = VectorReadRecord<1, READ_SIZE>;
-    type WriteRecord = VectorWriteRecord<WRITE_SIZE>;
-    type Air = ConvertAdapterAir<READ_SIZE, WRITE_SIZE>;
-    type Interface = BasicAdapterInterface<F, MinimalInstruction<F>, 1, 1, READ_SIZE, WRITE_SIZE>;
-
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { b, e, .. } = *instruction;
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct ConvertAdapterRecord<F, const READ_SIZE: usize, const WRITE_SIZE: usize> {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+
+    pub a_ptr: F,
+    pub b_ptr: F,
+
+    pub read_aux: MemoryReadAuxRecord,
+    pub write_aux: MemoryWriteBytesAuxRecord<WRITE_SIZE>,
+}
+
+#[derive(derive_new::new, Clone, Copy)]
+pub struct ConvertAdapterExecutor<const READ_SIZE: usize, const WRITE_SIZE: usize>;
 
-        let y_val = memory.read::<READ_SIZE>(e, b);
+#[derive(derive_new::new)]
+pub struct ConvertAdapterFiller<const READ_SIZE: usize, const WRITE_SIZE: usize>;
 
-        Ok(([y_val.1], Self::ReadRecord { reads: [y_val.0] }))
+impl<F: PrimeField32, const READ_SIZE: usize, const WRITE_SIZE: usize> AdapterTraceExecutor<F>
+    for ConvertAdapterExecutor<READ_SIZE, WRITE_SIZE>
+{
+    const WIDTH: usize = size_of::<ConvertAdapterCols<u8, READ_SIZE, WRITE_SIZE>>();
+    type ReadData = [F; READ_SIZE];
+    type WriteData = [u8; WRITE_SIZE];
+    type RecordMut<'a> = &'a mut ConvertAdapterRecord<F, READ_SIZE, WRITE_SIZE>;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction { a, d, .. } = *instruction;
-        let (write_id, _) = memory.write::<WRITE_SIZE>(d, a, output.writes[0]);
-
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord {
-                from_state,
-                writes: [write_id],
-            },
-        ))
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let &Instruction { b, e, .. } = instruction;
+        debug_assert_eq!(e.as_canonical_u32(), AS::Native as u32);
+
+        record.b_ptr = b;
+
+        tracing_read_native(
+            memory,
+            b.as_canonical_u32(),
+            &mut record.read_aux.prev_timestamp,
+        )
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let row_slice: &mut ConvertAdapterCols<_, READ_SIZE, WRITE_SIZE> = row_slice.borrow_mut();
-
-        let read = memory.record_by_id(read_record.reads[0]);
-        let write = memory.record_by_id(write_record.writes[0]);
-
-        row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
-        row_slice.a_pointer = write.pointer;
-        row_slice.b_pointer = read.pointer;
-
-        aux_cols_factory.generate_read_aux(read, &mut row_slice.reads_aux[0]);
-        aux_cols_factory.generate_write_aux(write, &mut row_slice.writes_aux[0]);
+        let &Instruction { a, d, .. } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), RV32_MEMORY_AS);
+
+        record.a_ptr = a;
+        tracing_write(
+            memory,
+            RV32_MEMORY_AS,
+            a.as_canonical_u32(),
+            data,
+            &mut record.write_aux.prev_timestamp,
+            &mut record.write_aux.prev_data,
+        );
     }
+}
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+impl<F: PrimeField32, const READ_SIZE: usize, const WRITE_SIZE: usize> AdapterTraceFiller<F>
+    for ConvertAdapterFiller<READ_SIZE, WRITE_SIZE>
+{
+    const WIDTH: usize = size_of::<ConvertAdapterCols<u8, READ_SIZE, WRITE_SIZE>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut row_slice: &mut [F]) {
+        let record: &ConvertAdapterRecord<F, READ_SIZE, WRITE_SIZE> =
+            unsafe { get_record_from_slice(&mut row_slice, ()) };
+        let adapter_row: &mut ConvertAdapterCols<F, READ_SIZE, WRITE_SIZE> = row_slice.borrow_mut();
+
+        // Writing in reverse order to avoid overwriting the `record`
+        mem_helper.fill(
+            record.read_aux.prev_timestamp,
+            record.from_timestamp,
+            adapter_row.reads_aux[0].as_mut(),
+        );
+
+        adapter_row.writes_aux[0]
+            .set_prev_data(record.write_aux.prev_data.map(F::from_canonical_u8));
+        mem_helper.fill(
+            record.write_aux.prev_timestamp,
+            record.from_timestamp + 1,
+            adapter_row.writes_aux[0].as_mut(),
+        );
+
+        adapter_row.b_pointer = record.b_ptr;
+        adapter_row.a_pointer = record.a_ptr;
+
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/native/circuit/src/adapters/loadstore_native_adapter.rs b/extensions/native/circuit/src/adapters/loadstore_native_adapter.rs
index 4bcf96d195..d33d74972e 100644
--- a/extensions/native/circuit/src/adapters/loadstore_native_adapter.rs
+++ b/extensions/native/circuit/src/adapters/loadstore_native_adapter.rs
@@ -5,19 +5,24 @@ use std::{
 
 use openvm_circuit::{
     arch::{
-        instructions::LocalOpcode, AdapterAirContext, AdapterRuntimeContext, ExecutionBridge,
-        ExecutionBus, ExecutionState, Result, VmAdapterAir, VmAdapterChip, VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        ExecutionBridge, ExecutionState, VmAdapterAir, VmAdapterInterface,
     },
     system::{
         memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+            offline_checker::{
+                MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+                MemoryWriteAuxRecord,
+            },
+            online::TracingMemory,
+            MemoryAddress, MemoryAuxColsFactory,
         },
-        program::ProgramBus,
+        native_adapter::util::{tracing_read_native, tracing_write_native},
     },
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_native_compiler::{
     conversion::AS,
     NativeLoadStoreOpcode::{self, *},
@@ -27,7 +32,6 @@ use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
 
 pub struct NativeLoadStoreInstruction<T> {
     pub is_valid: T,
@@ -48,55 +52,6 @@ impl<T, const NUM_CELLS: usize> VmAdapterInterface<T>
     type ProcessedInstruction = NativeLoadStoreInstruction<T>;
 }
 
-#[derive(Debug)]
-pub struct NativeLoadStoreAdapterChip<F: Field, const NUM_CELLS: usize> {
-    pub air: NativeLoadStoreAdapterAir<NUM_CELLS>,
-    offset: usize,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32, const NUM_CELLS: usize> NativeLoadStoreAdapterChip<F, NUM_CELLS> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        offset: usize,
-    ) -> Self {
-        Self {
-            air: NativeLoadStoreAdapterAir {
-                memory_bridge,
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-            },
-            offset,
-            _marker: PhantomData,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct NativeLoadStoreReadRecord<F: Field, const NUM_CELLS: usize> {
-    pub pointer_read: RecordId,
-    pub data_read: Option<RecordId>,
-    pub write_as: F,
-    pub write_ptr: F,
-
-    pub a: F,
-    pub b: F,
-    pub c: F,
-    pub d: F,
-    pub e: F,
-}
-
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct NativeLoadStoreWriteRecord<F: Field, const NUM_CELLS: usize> {
-    pub from_state: ExecutionState<F>,
-    pub write_id: RecordId,
-}
-
 #[repr(C)]
 #[derive(Clone, Debug, AlignedBorrow)]
 pub struct NativeLoadStoreAdapterCols<T, const NUM_CELLS: usize> {
@@ -214,23 +169,52 @@ impl<AB: InteractionBuilder, const NUM_CELLS: usize> VmAdapterAir<AB>
     }
 }
 
-impl<F: PrimeField32, const NUM_CELLS: usize> VmAdapterChip<F>
-    for NativeLoadStoreAdapterChip<F, NUM_CELLS>
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct NativeLoadStoreAdapterRecord<F, const NUM_CELLS: usize> {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+    pub a: F,
+    pub b: F,
+    pub c: F,
+    pub write_ptr: F,
+
+    pub ptr_read: MemoryReadAuxRecord,
+    // Will set `prev_timestamp` to u32::MAX if `HINT_STOREW`
+    pub data_read: MemoryReadAuxRecord,
+    pub data_write: MemoryWriteAuxRecord<F, NUM_CELLS>,
+}
+
+#[derive(derive_new::new, Clone, Copy)]
+pub struct NativeLoadStoreAdapterExecutor<const NUM_CELLS: usize> {
+    offset: usize,
+}
+
+#[derive(derive_new::new)]
+pub struct NativeLoadStoreAdapterFiller<const NUM_CELLS: usize>;
+
+impl<F: PrimeField32, const NUM_CELLS: usize> AdapterTraceExecutor<F>
+    for NativeLoadStoreAdapterExecutor<NUM_CELLS>
 {
-    type ReadRecord = NativeLoadStoreReadRecord<F, NUM_CELLS>;
-    type WriteRecord = NativeLoadStoreWriteRecord<F, NUM_CELLS>;
-    type Air = NativeLoadStoreAdapterAir<NUM_CELLS>;
-    type Interface = NativeLoadStoreAdapterInterface<F, NUM_CELLS>;
-
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    const WIDTH: usize = std::mem::size_of::<NativeLoadStoreAdapterCols<u8, NUM_CELLS>>();
+    type ReadData = (F, [F; NUM_CELLS]);
+    type WriteData = [F; NUM_CELLS];
+    type RecordMut<'a> = &'a mut NativeLoadStoreAdapterRecord<F, NUM_CELLS>;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp();
+    }
+
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction {
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let &Instruction {
             opcode,
             a,
             b,
@@ -238,100 +222,116 @@ impl<F: PrimeField32, const NUM_CELLS: usize> VmAdapterChip<F>
             d,
             e,
             ..
-        } = *instruction;
+        } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), AS::Native as u32);
+        debug_assert_eq!(e.as_canonical_u32(), AS::Native as u32);
+
         let local_opcode = NativeLoadStoreOpcode::from_usize(opcode.local_opcode_idx(self.offset));
 
-        let read_as = d;
-        let read_ptr = c;
-        let read_cell = memory.read_cell(read_as, read_ptr);
+        record.a = a;
+        record.b = b;
+        record.c = c;
 
-        let (data_read_as, data_write_as) = {
-            match local_opcode {
-                LOADW => (e, d),
-                STOREW | HINT_STOREW => (d, e),
+        // Read the pointer value from memory
+        let [read_cell] = tracing_read_native::<F, 1>(
+            memory,
+            c.as_canonical_u32(),
+            &mut record.ptr_read.prev_timestamp,
+        );
+
+        let data_read_ptr = match local_opcode {
+            LOADW => read_cell + record.b,
+            STOREW | HINT_STOREW => record.a,
+        }
+        .as_canonical_u32();
+
+        // It's easier to do this here than in `write`
+        match local_opcode {
+            LOADW => record.write_ptr = record.a,
+            STOREW | HINT_STOREW => record.write_ptr = read_cell + record.b,
+        }
+
+        // Read data based on opcode
+        let data_read: [F; NUM_CELLS] = match local_opcode {
+            HINT_STOREW => {
+                record.data_read.prev_timestamp = u32::MAX;
+                [F::ZERO; NUM_CELLS]
             }
-        };
-        let (data_read_ptr, data_write_ptr) = {
-            match local_opcode {
-                LOADW => (read_cell.1 + b, a),
-                STOREW | HINT_STOREW => (a, read_cell.1 + b),
+            LOADW | STOREW => {
+                tracing_read_native(memory, data_read_ptr, &mut record.data_read.prev_timestamp)
             }
         };
 
-        let data_read = match local_opcode {
-            HINT_STOREW => None,
-            LOADW | STOREW => Some(memory.read::<NUM_CELLS>(data_read_as, data_read_ptr)),
-        };
-        let record = NativeLoadStoreReadRecord {
-            pointer_read: read_cell.0,
-            data_read: data_read.map(|x| x.0),
-            write_as: data_write_as,
-            write_ptr: data_write_ptr,
-            a,
-            b,
-            c,
-            d,
-            e,
-        };
-
-        Ok((
-            (read_cell.1, data_read.map_or([F::ZERO; NUM_CELLS], |x| x.1)),
-            record,
-        ))
+        (read_cell, data_read)
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn write(
+        &self,
+        memory: &mut TracingMemory,
         _instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let (write_id, _) =
-            memory.write::<NUM_CELLS>(read_record.write_as, read_record.write_ptr, output.writes);
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord {
-                from_state: from_state.map(F::from_canonical_u32),
-                write_id,
-            },
-        ))
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
+    ) {
+        // Write data to memory
+        tracing_write_native(
+            memory,
+            record.write_ptr.as_canonical_u32(),
+            data,
+            &mut record.data_write.prev_timestamp,
+            &mut record.data_write.prev_data,
+        );
     }
+}
 
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
-    ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let cols: &mut NativeLoadStoreAdapterCols<_, NUM_CELLS> = row_slice.borrow_mut();
-        cols.from_state = write_record.from_state;
-        cols.a = read_record.a;
-        cols.b = read_record.b;
-        cols.c = read_record.c;
-
-        let data_read = read_record.data_read.map(|read| memory.record_by_id(read));
-        if let Some(data_read) = data_read {
-            aux_cols_factory.generate_read_aux(data_read, &mut cols.data_read_aux_cols);
-        }
+impl<F: PrimeField32, const NUM_CELLS: usize> AdapterTraceFiller<F>
+    for NativeLoadStoreAdapterFiller<NUM_CELLS>
+{
+    const WIDTH: usize = size_of::<NativeLoadStoreAdapterCols<u8, NUM_CELLS>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &NativeLoadStoreAdapterRecord<F, NUM_CELLS> =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut NativeLoadStoreAdapterCols<F, NUM_CELLS> = adapter_row.borrow_mut();
+
+        // Writing in reverse order to avoid overwriting the `record`
+
+        let is_hint_storew = record.data_read.prev_timestamp == u32::MAX;
+
+        adapter_row
+            .data_write_aux_cols
+            .set_prev_data(record.data_write.prev_data);
+        // Note, if `HINT_STOREW` we didn't do a data read and we didn't update the timestamp
+        mem_helper.fill(
+            record.data_write.prev_timestamp,
+            record.from_timestamp + 2 - is_hint_storew as u32,
+            adapter_row.data_write_aux_cols.as_mut(),
+        );
 
-        let write = memory.record_by_id(write_record.write_id);
-        cols.data_write_pointer = write.pointer;
+        if !is_hint_storew {
+            mem_helper.fill(
+                record.data_read.prev_timestamp,
+                record.from_timestamp + 1,
+                adapter_row.data_read_aux_cols.as_mut(),
+            );
+        } else {
+            mem_helper.fill_zero(adapter_row.data_read_aux_cols.as_mut());
+        }
 
-        aux_cols_factory.generate_read_aux(
-            memory.record_by_id(read_record.pointer_read),
-            &mut cols.pointer_read_aux_cols,
+        mem_helper.fill(
+            record.ptr_read.prev_timestamp,
+            record.from_timestamp,
+            adapter_row.pointer_read_aux_cols.as_mut(),
         );
-        aux_cols_factory.generate_write_aux(write, &mut cols.data_write_aux_cols);
-    }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        adapter_row.data_write_pointer = record.write_ptr;
+        adapter_row.c = record.c;
+        adapter_row.b = record.b;
+        adapter_row.a = record.a;
+
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
     }
 }
diff --git a/extensions/native/circuit/src/adapters/mod.rs b/extensions/native/circuit/src/adapters/mod.rs
index c5cd3b9422..308a0705a3 100644
--- a/extensions/native/circuit/src/adapters/mod.rs
+++ b/extensions/native/circuit/src/adapters/mod.rs
@@ -6,3 +6,9 @@ pub mod convert_adapter;
 pub mod loadstore_native_adapter;
 // 2 reads, 1 write, read size = write size = N, no imm support, read/write to address space d
 pub mod native_vectorized_adapter;
+
+pub use alu_native_adapter::*;
+pub use branch_native_adapter::*;
+pub use convert_adapter::*;
+pub use loadstore_native_adapter::*;
+pub use native_vectorized_adapter::*;
diff --git a/extensions/native/circuit/src/adapters/native_vectorized_adapter.rs b/extensions/native/circuit/src/adapters/native_vectorized_adapter.rs
index c151197297..6545e8db39 100644
--- a/extensions/native/circuit/src/adapters/native_vectorized_adapter.rs
+++ b/extensions/native/circuit/src/adapters/native_vectorized_adapter.rs
@@ -1,22 +1,26 @@
 use std::{
     borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
+    mem::size_of,
 };
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, MinimalInstruction, VmAdapterAir,
     },
     system::{
         memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+            offline_checker::{
+                MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+                MemoryWriteAuxRecord,
+            },
+            online::TracingMemory,
+            MemoryAddress, MemoryAuxColsFactory,
         },
-        program::ProgramBus,
+        native_adapter::util::{tracing_read_native, tracing_write_native},
     },
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP};
 use openvm_native_compiler::conversion::AS;
@@ -25,44 +29,6 @@ use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
-
-#[allow(dead_code)]
-#[derive(Debug)]
-pub struct NativeVectorizedAdapterChip<F: Field, const N: usize> {
-    pub air: NativeVectorizedAdapterAir<N>,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32, const N: usize> NativeVectorizedAdapterChip<F, N> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: NativeVectorizedAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _marker: PhantomData,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct NativeVectorizedReadRecord<const N: usize> {
-    pub b: RecordId,
-    pub c: RecordId,
-}
-
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct NativeVectorizedWriteRecord<const N: usize> {
-    pub from_state: ExecutionState<u32>,
-    pub a: RecordId,
-}
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -156,80 +122,124 @@ impl<AB: InteractionBuilder, const N: usize> VmAdapterAir<AB> for NativeVectoriz
     }
 }
 
-impl<F: PrimeField32, const N: usize> VmAdapterChip<F> for NativeVectorizedAdapterChip<F, N> {
-    type ReadRecord = NativeVectorizedReadRecord<N>;
-    type WriteRecord = NativeVectorizedWriteRecord<N>;
-    type Air = NativeVectorizedAdapterAir<N>;
-    type Interface = BasicAdapterInterface<F, MinimalInstruction<F>, 2, 1, N, N>;
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct NativeVectorizedAdapterRecord<F, const N: usize> {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+    pub a_ptr: F,
+    pub b_ptr: F,
+    pub c_ptr: F,
+    pub reads_aux: [MemoryReadAuxRecord; 2],
+    pub write_aux: MemoryWriteAuxRecord<F, N>,
+}
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { b, c, d, e, .. } = *instruction;
-
-        let y_val = memory.read::<N>(d, b);
-        let z_val = memory.read::<N>(e, c);
-
-        Ok((
-            [y_val.1, z_val.1],
-            Self::ReadRecord {
-                b: y_val.0,
-                c: z_val.0,
-            },
-        ))
+#[derive(derive_new::new, Clone, Copy)]
+pub struct NativeVectorizedAdapterExecutor<const N: usize>;
+
+#[derive(derive_new::new)]
+pub struct NativeVectorizedAdapterFiller<const N: usize>;
+
+impl<F: PrimeField32, const N: usize> AdapterTraceExecutor<F>
+    for NativeVectorizedAdapterExecutor<N>
+{
+    const WIDTH: usize = size_of::<NativeVectorizedAdapterCols<u8, N>>();
+    type ReadData = [[F; N]; 2];
+    type WriteData = [F; N];
+    type RecordMut<'a> = &'a mut NativeVectorizedAdapterRecord<F, N>;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp();
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction { a, d, .. } = *instruction;
-        let (a_val, _) = memory.write(d, a, output.writes[0]);
-
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord {
-                from_state,
-                a: a_val,
-            },
-        ))
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let &Instruction { b, c, d, e, .. } = instruction;
+        debug_assert_eq!(d.as_canonical_u32(), AS::Native as u32);
+        debug_assert_eq!(e.as_canonical_u32(), AS::Native as u32);
+
+        record.b_ptr = b;
+        let b_val = tracing_read_native(
+            memory,
+            b.as_canonical_u32(),
+            &mut record.reads_aux[0].prev_timestamp,
+        );
+        record.c_ptr = c;
+        let c_val = tracing_read_native(
+            memory,
+            c.as_canonical_u32(),
+            &mut record.reads_aux[1].prev_timestamp,
+        );
+
+        [b_val, c_val]
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let row_slice: &mut NativeVectorizedAdapterCols<_, N> = row_slice.borrow_mut();
-
-        let b_record = memory.record_by_id(read_record.b);
-        let c_record = memory.record_by_id(read_record.c);
-        let a_record = memory.record_by_id(write_record.a);
-        row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
-        row_slice.a_pointer = a_record.pointer;
-        row_slice.b_pointer = b_record.pointer;
-        row_slice.c_pointer = c_record.pointer;
-        aux_cols_factory.generate_read_aux(b_record, &mut row_slice.reads_aux[0]);
-        aux_cols_factory.generate_read_aux(c_record, &mut row_slice.reads_aux[1]);
-        aux_cols_factory.generate_write_aux(a_record, &mut row_slice.writes_aux[0]);
+        let &Instruction { a, d, .. } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), AS::Native as u32);
+
+        record.a_ptr = a;
+        tracing_write_native(
+            memory,
+            a.as_canonical_u32(),
+            data,
+            &mut record.write_aux.prev_timestamp,
+            &mut record.write_aux.prev_data,
+        );
     }
+}
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+impl<F: PrimeField32, const N: usize> AdapterTraceFiller<F> for NativeVectorizedAdapterFiller<N> {
+    const WIDTH: usize = size_of::<NativeVectorizedAdapterCols<u8, N>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &NativeVectorizedAdapterRecord<F, N> =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut NativeVectorizedAdapterCols<F, N> = adapter_row.borrow_mut();
+
+        // Writing in reverse order to avoid overwriting the `record`
+        adapter_row.writes_aux[0].set_prev_data(record.write_aux.prev_data);
+        mem_helper.fill(
+            record.write_aux.prev_timestamp,
+            record.from_timestamp + 2,
+            adapter_row.writes_aux[0].as_mut(),
+        );
+
+        adapter_row
+            .reads_aux
+            .iter_mut()
+            .enumerate()
+            .zip(record.reads_aux.iter())
+            .rev()
+            .for_each(|((i, read_cols), read_record)| {
+                mem_helper.fill(
+                    read_record.prev_timestamp,
+                    record.from_timestamp + i as u32,
+                    read_cols.as_mut(),
+                );
+            });
+
+        adapter_row.c_pointer = record.c_ptr;
+        adapter_row.b_pointer = record.b_ptr;
+        adapter_row.a_pointer = record.a_ptr;
+
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/native/circuit/src/branch_eq/core.rs b/extensions/native/circuit/src/branch_eq/core.rs
new file mode 100644
index 0000000000..bc1a5a3163
--- /dev/null
+++ b/extensions/native/circuit/src/branch_eq/core.rs
@@ -0,0 +1,120 @@
+use std::borrow::BorrowMut;
+
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_native_compiler::NativeBranchEqualOpcode;
+use openvm_rv32im_circuit::BranchEqualCoreCols;
+use openvm_rv32im_transpiler::BranchEqualOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct NativeBranchEqualCoreRecord<F> {
+    pub a: F,
+    pub b: F,
+    pub imm: F,
+    pub is_beq: bool,
+}
+
+#[derive(derive_new::new, Clone, Copy)]
+pub struct NativeBranchEqualExecutor<A> {
+    adapter: A,
+    pub offset: usize,
+    pub pc_step: u32,
+}
+
+#[derive(derive_new::new)]
+pub struct NativeBranchEqualFiller<A> {
+    adapter: A,
+}
+
+impl<F, A, RA> PreflightExecutor<F, RA> for NativeBranchEqualExecutor<A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceExecutor<F, ReadData: Into<[F; 2]>, WriteData = ()>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut NativeBranchEqualCoreRecord<F>),
+    >,
+{
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            NativeBranchEqualOpcode::from_usize(opcode - self.offset)
+        )
+    }
+
+    fn execute(
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
+        instruction: &Instruction<F>,
+    ) -> Result<(), ExecutionError> {
+        let &Instruction { opcode, c: imm, .. } = instruction;
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        [core_record.a, core_record.b] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
+
+        let cmp_result = core_record.a == core_record.b;
+
+        core_record.imm = imm;
+        core_record.is_beq =
+            opcode.local_opcode_idx(self.offset) == BranchEqualOpcode::BEQ as usize;
+
+        if cmp_result == core_record.is_beq {
+            *state.pc = (F::from_canonical_u32(*state.pc) + imm).as_canonical_u32();
+        } else {
+            *state.pc = state.pc.wrapping_add(self.pc_step);
+        }
+
+        Ok(())
+    }
+}
+
+impl<F, A> TraceFiller<F> for NativeBranchEqualFiller<A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &NativeBranchEqualCoreRecord<F> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut BranchEqualCoreCols<F, 1> = core_row.borrow_mut();
+        let (cmp_result, diff_inv_val) = run_eq(record.is_beq, record.a, record.b);
+
+        // Writing in reverse order to avoid overwriting the `record`
+        core_row.diff_inv_marker[0] = diff_inv_val;
+
+        core_row.opcode_bne_flag = F::from_bool(!record.is_beq);
+        core_row.opcode_beq_flag = F::from_bool(record.is_beq);
+
+        core_row.imm = record.imm;
+        core_row.cmp_result = F::from_bool(cmp_result);
+
+        core_row.b = [record.b];
+        core_row.a = [record.a];
+    }
+}
+
+// Returns (cmp_result, diff_idx, x[diff_idx] - y[diff_idx])
+#[inline(always)]
+pub(super) fn run_eq<F>(is_beq: bool, x: F, y: F) -> (bool, F)
+where
+    F: PrimeField32,
+{
+    if x != y {
+        return (!is_beq, (x - y).inverse());
+    }
+    (is_beq, F::ZERO)
+}
diff --git a/extensions/native/circuit/src/branch_eq/execution.rs b/extensions/native/circuit/src/branch_eq/execution.rs
new file mode 100644
index 0000000000..bbd8051214
--- /dev/null
+++ b/extensions/native/circuit/src/branch_eq/execution.rs
@@ -0,0 +1,209 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{
+    arch::*,
+    system::memory::online::GuestMemory,
+    utils::{transmute_field_to_u32, transmute_u32_to_field},
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_IMM_AS, LocalOpcode, NATIVE_AS,
+};
+use openvm_rv32im_transpiler::BranchEqualOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::NativeBranchEqualExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct NativeBranchEqualPreCompute {
+    imm: isize,
+    a_or_imm: u32,
+    b_or_imm: u32,
+}
+
+impl<A> NativeBranchEqualExecutor<A> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut NativeBranchEqualPreCompute,
+    ) -> Result<(bool, bool, bool), StaticProgramError> {
+        let &Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let local_opcode = BranchEqualOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        let c = c.as_canonical_u32();
+        let imm = if F::ORDER_U32 - c < c {
+            -((F::ORDER_U32 - c) as isize)
+        } else {
+            c as isize
+        };
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+
+        let a_is_imm = d == RV32_IMM_AS;
+        let b_is_imm = e == RV32_IMM_AS;
+
+        let a_or_imm = if a_is_imm {
+            transmute_field_to_u32(&a)
+        } else {
+            a.as_canonical_u32()
+        };
+        let b_or_imm = if b_is_imm {
+            transmute_field_to_u32(&b)
+        } else {
+            b.as_canonical_u32()
+        };
+
+        *data = NativeBranchEqualPreCompute {
+            imm,
+            a_or_imm,
+            b_or_imm,
+        };
+
+        let is_bne = local_opcode == BranchEqualOpcode::BNE;
+
+        Ok((a_is_imm, b_is_imm, is_bne))
+    }
+}
+
+impl<F, A> Executor<F> for NativeBranchEqualExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<NativeBranchEqualPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut NativeBranchEqualPreCompute = data.borrow_mut();
+
+        let (a_is_imm, b_is_imm, is_bne) = self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        let fn_ptr = match (a_is_imm, b_is_imm, is_bne) {
+            (true, true, true) => execute_e1_impl::<_, _, true, true, true>,
+            (true, true, false) => execute_e1_impl::<_, _, true, true, false>,
+            (true, false, true) => execute_e1_impl::<_, _, true, false, true>,
+            (true, false, false) => execute_e1_impl::<_, _, true, false, false>,
+            (false, true, true) => execute_e1_impl::<_, _, false, true, true>,
+            (false, true, false) => execute_e1_impl::<_, _, false, true, false>,
+            (false, false, true) => execute_e1_impl::<_, _, false, false, true>,
+            (false, false, false) => execute_e1_impl::<_, _, false, false, false>,
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A> MeteredExecutor<F> for NativeBranchEqualExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<NativeBranchEqualPreCompute>>()
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut E2PreCompute<NativeBranchEqualPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        let (a_is_imm, b_is_imm, is_bne) =
+            self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        let fn_ptr = match (a_is_imm, b_is_imm, is_bne) {
+            (true, true, true) => execute_e2_impl::<_, _, true, true, true>,
+            (true, true, false) => execute_e2_impl::<_, _, true, true, false>,
+            (true, false, true) => execute_e2_impl::<_, _, true, false, true>,
+            (true, false, false) => execute_e2_impl::<_, _, true, false, false>,
+            (false, true, true) => execute_e2_impl::<_, _, false, true, true>,
+            (false, true, false) => execute_e2_impl::<_, _, false, true, false>,
+            (false, false, true) => execute_e2_impl::<_, _, false, false, true>,
+            (false, false, false) => execute_e2_impl::<_, _, false, false, false>,
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const A_IS_IMM: bool,
+    const B_IS_IMM: bool,
+    const IS_NE: bool,
+>(
+    pre_compute: &NativeBranchEqualPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1 = if A_IS_IMM {
+        transmute_u32_to_field(&pre_compute.a_or_imm)
+    } else {
+        vm_state.vm_read::<F, 1>(NATIVE_AS, pre_compute.a_or_imm)[0]
+    };
+    let rs2 = if B_IS_IMM {
+        transmute_u32_to_field(&pre_compute.b_or_imm)
+    } else {
+        vm_state.vm_read::<F, 1>(NATIVE_AS, pre_compute.b_or_imm)[0]
+    };
+    if (rs1 == rs2) ^ IS_NE {
+        vm_state.pc = (vm_state.pc as isize + pre_compute.imm) as u32;
+    } else {
+        vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    }
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const A_IS_IMM: bool,
+    const B_IS_IMM: bool,
+    const IS_NE: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &NativeBranchEqualPreCompute = pre_compute.borrow();
+    execute_e12_impl::<_, _, A_IS_IMM, B_IS_IMM, IS_NE>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const A_IS_IMM: bool,
+    const B_IS_IMM: bool,
+    const IS_NE: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<NativeBranchEqualPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<_, _, A_IS_IMM, B_IS_IMM, IS_NE>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/native/circuit/src/branch_eq/mod.rs b/extensions/native/circuit/src/branch_eq/mod.rs
index e1b566bb7f..3ca4acb92e 100644
--- a/extensions/native/circuit/src/branch_eq/mod.rs
+++ b/extensions/native/circuit/src/branch_eq/mod.rs
@@ -1,8 +1,18 @@
 use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
-use openvm_rv32im_circuit::{BranchEqualCoreAir, BranchEqualCoreChip};
+use openvm_rv32im_circuit::BranchEqualCoreAir;
 
-use super::adapters::branch_native_adapter::{BranchNativeAdapterAir, BranchNativeAdapterChip};
+mod core;
+mod execution;
+pub use core::*;
+
+use crate::adapters::{
+    BranchNativeAdapterAir, BranchNativeAdapterExecutor, BranchNativeAdapterFiller,
+};
+
+#[cfg(test)]
+mod tests;
 
 pub type NativeBranchEqAir = VmAirWrapper<BranchNativeAdapterAir, BranchEqualCoreAir<1>>;
+pub type NativeBranchEqExecutor = NativeBranchEqualExecutor<BranchNativeAdapterExecutor>;
 pub type NativeBranchEqChip<F> =
-    VmChipWrapper<F, BranchNativeAdapterChip<F>, BranchEqualCoreChip<1>>;
+    VmChipWrapper<F, NativeBranchEqualFiller<BranchNativeAdapterFiller>>;
diff --git a/extensions/native/circuit/src/branch_eq/tests.rs b/extensions/native/circuit/src/branch_eq/tests.rs
new file mode 100644
index 0000000000..4a36045ed2
--- /dev/null
+++ b/extensions/native/circuit/src/branch_eq/tests.rs
@@ -0,0 +1,334 @@
+use std::borrow::BorrowMut;
+
+use openvm_circuit::arch::testing::{TestChipHarness, VmChipTestBuilder};
+use openvm_instructions::{
+    instruction::Instruction,
+    program::{DEFAULT_PC_STEP, PC_BITS},
+    utils::isize_to_field,
+    LocalOpcode,
+};
+use openvm_native_compiler::NativeBranchEqualOpcode;
+use openvm_rv32im_circuit::{
+    adapters::RV_B_TYPE_IMM_BITS, BranchEqualCoreAir, BranchEqualCoreCols,
+};
+use openvm_rv32im_transpiler::BranchEqualOpcode;
+use openvm_stark_backend::{
+    p3_air::BaseAir,
+    p3_field::{FieldAlgebra, PrimeField32},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
+    utils::disable_debug_builder,
+    verifier::VerificationError,
+};
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
+
+use crate::{
+    adapters::{BranchNativeAdapterAir, BranchNativeAdapterExecutor, BranchNativeAdapterFiller},
+    branch_eq::{run_eq, NativeBranchEqAir, NativeBranchEqChip, NativeBranchEqExecutor},
+    test_utils::write_native_or_imm,
+    NativeBranchEqualFiller,
+};
+
+type F = BabyBear;
+const MAX_INS_CAPACITY: usize = 128;
+const ABS_MAX_IMM: i32 = 1 << (RV_B_TYPE_IMM_BITS - 1);
+type Harness = TestChipHarness<F, NativeBranchEqExecutor, NativeBranchEqAir, NativeBranchEqChip<F>>;
+
+fn create_test_chip(tester: &mut VmChipTestBuilder<F>) -> Harness {
+    let air = NativeBranchEqAir::new(
+        BranchNativeAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        BranchEqualCoreAir::new(NativeBranchEqualOpcode::CLASS_OFFSET, DEFAULT_PC_STEP),
+    );
+    let executor = NativeBranchEqExecutor::new(
+        BranchNativeAdapterExecutor,
+        NativeBranchEqualOpcode::CLASS_OFFSET,
+        DEFAULT_PC_STEP,
+    );
+    let chip = NativeBranchEqChip::<F>::new(
+        NativeBranchEqualFiller::new(BranchNativeAdapterFiller),
+        tester.memory_helper(),
+    );
+
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
+}
+
+#[allow(clippy::too_many_arguments)]
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    opcode: NativeBranchEqualOpcode,
+    a: Option<F>,
+    b: Option<F>,
+    imm: Option<i32>,
+) {
+    let a_val = a.unwrap_or(rng.gen());
+    let b_val = b.unwrap_or(if rng.gen_bool(0.5) { a_val } else { rng.gen() });
+    let imm = imm.unwrap_or(rng.gen_range((-ABS_MAX_IMM)..ABS_MAX_IMM));
+    let (a, a_as) = write_native_or_imm(tester, rng, a_val, None);
+    let (b, b_as) = write_native_or_imm(tester, rng, b_val, None);
+    let initial_pc = rng.gen_range(imm.unsigned_abs()..(1 << (PC_BITS - 1)) - imm.unsigned_abs());
+
+    tester.execute_with_pc(
+        harness,
+        &Instruction::new(
+            opcode.global_opcode(),
+            a,
+            b,
+            isize_to_field::<F>(imm as isize),
+            F::from_canonical_usize(a_as),
+            F::from_canonical_usize(b_as),
+            F::ZERO,
+            F::ZERO,
+        ),
+        initial_pc,
+    );
+
+    let cmp_result = run_eq(opcode.0 == BranchEqualOpcode::BEQ, a_val, b_val).0;
+    let from_pc = tester.execution.last_from_pc().as_canonical_u32() as i32;
+    let to_pc = tester.execution.last_to_pc().as_canonical_u32() as i32;
+    let pc_inc = if cmp_result {
+        imm
+    } else {
+        DEFAULT_PC_STEP as i32
+    };
+
+    assert_eq!(to_pc, from_pc + pc_inc);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////
+// POSITIVE TESTS
+//
+// Randomly generate computations and execute, ensuring that the generated trace
+// passes all constraints.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#[test_case(BranchEqualOpcode::BEQ, 100)]
+#[test_case(BranchEqualOpcode::BNE, 100)]
+fn rand_rv32_branch_eq_test(opcode: BranchEqualOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&mut tester);
+    let opcode = NativeBranchEqualOpcode(opcode);
+    for _ in 0..num_ops {
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode,
+            None,
+            None,
+            None,
+        );
+    }
+
+    let tester = tester.build().load(harness).finalize();
+    tester.simple_test().expect("Verification failed");
+}
+
+//////////////////////////////////////////////////////////////////////////////////////
+// NEGATIVE TESTS
+//
+// Given a fake trace of a single operation, setup a chip and run the test. We replace
+// part of the trace and check that the chip throws the expected error.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#[allow(clippy::too_many_arguments)]
+fn run_negative_branch_eq_test(
+    opcode: BranchEqualOpcode,
+    a: F,
+    b: F,
+    prank_cmp_result: Option<bool>,
+    prank_diff_inv_marker: Option<F>,
+    error: VerificationError,
+) {
+    let imm = 16i32;
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&mut tester);
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        NativeBranchEqualOpcode(opcode),
+        Some(a),
+        Some(b),
+        Some(imm),
+    );
+
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+    let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
+        let mut values = trace.row_slice(0).to_vec();
+        let cols: &mut BranchEqualCoreCols<F, 1> =
+            values.split_at_mut(adapter_width).1.borrow_mut();
+        if let Some(cmp_result) = prank_cmp_result {
+            cols.cmp_result = F::from_bool(cmp_result);
+        }
+        if let Some(diff_inv_marker) = prank_diff_inv_marker {
+            cols.diff_inv_marker = [diff_inv_marker];
+        }
+        *trace = RowMajorMatrix::new(values, trace.width());
+    };
+
+    disable_debug_builder();
+    let tester = tester
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .finalize();
+    tester.simple_test_with_expected_error(error);
+}
+
+#[test]
+fn rv32_beq_wrong_cmp_negative_test() {
+    run_negative_branch_eq_test(
+        BranchEqualOpcode::BEQ,
+        F::from_canonical_u32(7 << 16),
+        F::from_canonical_u32(7 << 24),
+        Some(true),
+        None,
+        VerificationError::OodEvaluationMismatch,
+    );
+
+    run_negative_branch_eq_test(
+        BranchEqualOpcode::BEQ,
+        F::from_canonical_u32(7 << 16),
+        F::from_canonical_u32(7 << 16),
+        Some(false),
+        None,
+        VerificationError::OodEvaluationMismatch,
+    );
+}
+
+#[test]
+fn rv32_beq_zero_inv_marker_negative_test() {
+    run_negative_branch_eq_test(
+        BranchEqualOpcode::BEQ,
+        F::from_canonical_u32(7 << 16),
+        F::from_canonical_u32(7 << 24),
+        Some(true),
+        Some(F::ZERO),
+        VerificationError::OodEvaluationMismatch,
+    );
+}
+
+#[test]
+fn rv32_beq_invalid_inv_marker_negative_test() {
+    run_negative_branch_eq_test(
+        BranchEqualOpcode::BEQ,
+        F::from_canonical_u32(7 << 16),
+        F::from_canonical_u32(7 << 24),
+        Some(false),
+        Some(F::from_canonical_u32(1 << 16)),
+        VerificationError::OodEvaluationMismatch,
+    );
+}
+
+#[test]
+fn rv32_bne_wrong_cmp_negative_test() {
+    run_negative_branch_eq_test(
+        BranchEqualOpcode::BNE,
+        F::from_canonical_u32(7 << 16),
+        F::from_canonical_u32(7 << 24),
+        Some(false),
+        None,
+        VerificationError::OodEvaluationMismatch,
+    );
+
+    run_negative_branch_eq_test(
+        BranchEqualOpcode::BNE,
+        F::from_canonical_u32(7 << 16),
+        F::from_canonical_u32(7 << 16),
+        Some(true),
+        None,
+        VerificationError::OodEvaluationMismatch,
+    );
+}
+
+#[test]
+fn rv32_bne_zero_inv_marker_negative_test() {
+    run_negative_branch_eq_test(
+        BranchEqualOpcode::BNE,
+        F::from_canonical_u32(7 << 16),
+        F::from_canonical_u32(7 << 24),
+        Some(false),
+        Some(F::ZERO),
+        VerificationError::OodEvaluationMismatch,
+    );
+}
+
+#[test]
+fn rv32_bne_invalid_inv_marker_negative_test() {
+    run_negative_branch_eq_test(
+        BranchEqualOpcode::BNE,
+        F::from_canonical_u32(7 << 16),
+        F::from_canonical_u32(7 << 24),
+        Some(true),
+        Some(F::from_canonical_u32(1 << 16)),
+        VerificationError::OodEvaluationMismatch,
+    );
+}
+
+///////////////////////////////////////////////////////////////////////////////////////
+/// SANITY TESTS
+///
+/// Ensure that solve functions produce the correct results.
+///////////////////////////////////////////////////////////////////////////////////////
+
+#[test]
+fn execute_roundtrip_sanity_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&mut tester);
+
+    let x = F::from_canonical_u32(u32::from_le_bytes([19, 4, 179, 60]));
+    let y = F::from_canonical_u32(u32::from_le_bytes([19, 32, 180, 60]));
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        NativeBranchEqualOpcode(BranchEqualOpcode::BEQ),
+        Some(x),
+        Some(y),
+        Some(8),
+    );
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        NativeBranchEqualOpcode(BranchEqualOpcode::BNE),
+        Some(x),
+        Some(y),
+        Some(8),
+    );
+}
+
+#[test]
+fn run_eq_sanity_test() {
+    let x = F::from_canonical_u32(u32::from_le_bytes([19, 4, 17, 60]));
+    let (cmp_result, diff_val) = run_eq(true, x, x);
+    assert!(cmp_result);
+    assert_eq!(diff_val, F::ZERO);
+
+    let (cmp_result, diff_val) = run_eq(false, x, x);
+    assert!(!cmp_result);
+    assert_eq!(diff_val, F::ZERO);
+}
+
+#[test]
+fn run_ne_sanity_test() {
+    let x = F::from_canonical_u32(u32::from_le_bytes([19, 4, 17, 60]));
+    let y = F::from_canonical_u32(u32::from_le_bytes([19, 32, 18, 60]));
+    let (cmp_result, diff_val) = run_eq(true, x, y);
+    assert!(!cmp_result);
+    assert_eq!(diff_val * (x - y), F::ONE);
+
+    let (cmp_result, diff_val) = run_eq(false, x, y);
+    assert!(cmp_result);
+    assert_eq!(diff_val * (x - y), F::ONE);
+}
diff --git a/extensions/native/circuit/src/castf/core.rs b/extensions/native/circuit/src/castf/core.rs
index 664767e35e..ff5f15737c 100644
--- a/extensions/native/circuit/src/castf/core.rs
+++ b/extensions/native/circuit/src/castf/core.rs
@@ -1,14 +1,15 @@
 use std::borrow::{Borrow, BorrowMut};
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
+use openvm_circuit_primitives::{
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_native_compiler::CastfOpcode;
 use openvm_rv32im_circuit::adapters::RV32_REGISTER_NUM_LIMBS;
 use openvm_stark_backend::{
@@ -17,7 +18,8 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
+
+use crate::CASTF_MAX_BITS;
 
 // LIMB_BITS is the size of the limbs in bits.
 pub(crate) const LIMB_BITS: usize = 8;
@@ -32,7 +34,7 @@ pub struct CastFCoreCols<T> {
     pub is_valid: T,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(derive_new::new, Copy, Clone, Debug)]
 pub struct CastFCoreAir {
     pub bus: VariableRangeCheckerBus, /* to communicate with the range checker that checks that
                                        * all limbs are < 2^LIMB_BITS */
@@ -105,97 +107,92 @@ where
 }
 
 #[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct CastFRecord<F> {
-    pub in_val: F,
-    pub out_val: [u32; RV32_REGISTER_NUM_LIMBS],
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct CastFCoreRecord {
+    pub val: u32,
 }
 
-pub struct CastFCoreChip {
-    pub air: CastFCoreAir,
-    pub range_checker_chip: SharedVariableRangeCheckerChip,
+#[derive(derive_new::new, Clone, Copy)]
+pub struct CastFCoreExecutor<A> {
+    adapter: A,
 }
 
-impl CastFCoreChip {
-    pub fn new(range_checker_chip: SharedVariableRangeCheckerChip) -> Self {
-        Self {
-            air: CastFCoreAir {
-                bus: range_checker_chip.bus(),
-            },
-            range_checker_chip,
-        }
-    }
+#[derive(derive_new::new)]
+pub struct CastFCoreFiller<A> {
+    adapter: A,
+    pub range_checker_chip: SharedVariableRangeCheckerChip,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>> VmCoreChip<F, I> for CastFCoreChip
+impl<F, A, RA> PreflightExecutor<F, RA> for CastFCoreExecutor<A>
 where
-    I::Reads: Into<[[F; 1]; 1]>,
-    I::Writes: From<[[F; RV32_REGISTER_NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<F, ReadData = [F; 1], WriteData = [u8; RV32_REGISTER_NUM_LIMBS]>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut CastFCoreRecord),
+    >,
 {
-    type Record = CastFRecord<F>;
-    type Air = CastFCoreAir;
+    fn get_opcode_name(&self, _opcode: usize) -> String {
+        format!("{:?}", CastfOpcode::CASTF)
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let Instruction { opcode, .. } = instruction;
+    ) -> Result<(), ExecutionError> {
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        assert_eq!(
-            opcode.local_opcode_idx(CastfOpcode::CLASS_OFFSET),
-            CastfOpcode::CASTF as usize
-        );
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        let y = reads.into()[0][0];
-        let x = CastF::solve(y.as_canonical_u32());
+        core_record.val = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)[0]
+            .as_canonical_u32();
 
-        let output = AdapterRuntimeContext {
-            to_pc: None,
-            writes: [x.map(F::from_canonical_u32)].into(),
-        };
+        let x = run_castf(core_record.val);
 
-        let record = CastFRecord {
-            in_val: y,
-            out_val: x,
-        };
+        self.adapter
+            .write(state.memory, instruction, x, &mut adapter_record);
 
-        Ok((output, record))
-    }
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
 
-    fn get_opcode_name(&self, _opcode: usize) -> String {
-        format!("{:?}", CastfOpcode::CASTF)
+        Ok(())
     }
+}
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        for (i, limb) in record.out_val.iter().enumerate() {
-            if i == 3 {
-                self.range_checker_chip.add_count(*limb, FINAL_LIMB_BITS);
+impl<F, A> TraceFiller<F> for CastFCoreFiller<A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: &CastFCoreRecord = unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut CastFCoreCols<_> = core_row.borrow_mut();
+
+        // Writing in reverse order to avoid overwriting the `record`
+        let out = run_castf(record.val);
+        for (i, &limb) in out.iter().enumerate() {
+            let limb_bits = if i == out.len() - 1 {
+                FINAL_LIMB_BITS
             } else {
-                self.range_checker_chip.add_count(*limb, LIMB_BITS);
-            }
+                LIMB_BITS
+            };
+            self.range_checker_chip.add_count(limb as u32, limb_bits);
         }
-
-        let cols: &mut CastFCoreCols<F> = row_slice.borrow_mut();
-        cols.in_val = record.in_val;
-        cols.out_val = record.out_val.map(F::from_canonical_u32);
-        cols.is_valid = F::ONE;
-    }
-
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.is_valid = F::ONE;
+        core_row.out_val = out.map(F::from_canonical_u8);
+        core_row.in_val = F::from_canonical_u32(record.val);
     }
 }
 
-pub struct CastF;
-impl CastF {
-    pub(super) fn solve(y: u32) -> [u32; RV32_REGISTER_NUM_LIMBS] {
-        let mut x = [0; 4];
-        for (i, limb) in x.iter_mut().enumerate() {
-            *limb = (y >> (8 * i)) & 0xFF;
-        }
-        x
-    }
+#[inline(always)]
+pub(super) fn run_castf(y: u32) -> [u8; RV32_REGISTER_NUM_LIMBS] {
+    debug_assert!(y < 1 << CASTF_MAX_BITS);
+    y.to_le_bytes()
 }
diff --git a/extensions/native/circuit/src/castf/execution.rs b/extensions/native/circuit/src/castf/execution.rs
new file mode 100644
index 0000000000..b477620e4a
--- /dev/null
+++ b/extensions/native/circuit/src/castf/execution.rs
@@ -0,0 +1,136 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_MEMORY_AS, LocalOpcode,
+};
+use openvm_native_compiler::{conversion::AS, CastfOpcode};
+use openvm_rv32im_circuit::adapters::RV32_REGISTER_NUM_LIMBS;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::{run_castf, CastFCoreExecutor};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct CastFPreCompute {
+    a: u32,
+    b: u32,
+}
+
+impl<A> CastFCoreExecutor<A> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut CastFPreCompute,
+    ) -> Result<(), StaticProgramError> {
+        let Instruction {
+            a, b, d, e, opcode, ..
+        } = inst;
+
+        if opcode.local_opcode_idx(CastfOpcode::CLASS_OFFSET) != CastfOpcode::CASTF as usize {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        if d.as_canonical_u32() != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        if e.as_canonical_u32() != AS::Native as u32 {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        *data = CastFPreCompute { a, b };
+
+        Ok(())
+    }
+}
+
+impl<F, A> Executor<F> for CastFCoreExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<CastFPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut CastFPreCompute = data.borrow_mut();
+
+        self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        let fn_ptr = execute_e1_impl::<_, _>;
+
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A> MeteredExecutor<F> for CastFCoreExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<CastFPreCompute>>()
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut E2PreCompute<CastFPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        let fn_ptr = execute_e2_impl::<_, _>;
+
+        Ok(fn_ptr)
+    }
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &CastFPreCompute = pre_compute.borrow();
+    execute_e12_impl(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<CastFPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl(&pre_compute.data, vm_state);
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &CastFPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let y = vm_state.vm_read::<F, 1>(AS::Native as u32, pre_compute.b)[0];
+    let x = run_castf(y.as_canonical_u32());
+
+    vm_state.vm_write::<u8, RV32_REGISTER_NUM_LIMBS>(RV32_MEMORY_AS, pre_compute.a, &x);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
diff --git a/extensions/native/circuit/src/castf/mod.rs b/extensions/native/circuit/src/castf/mod.rs
index 9fbd77f245..3f87e62c1f 100644
--- a/extensions/native/circuit/src/castf/mod.rs
+++ b/extensions/native/circuit/src/castf/mod.rs
@@ -1,12 +1,14 @@
 use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::convert_adapter::{ConvertAdapterAir, ConvertAdapterChip};
-
-#[cfg(test)]
-mod tests;
+use crate::adapters::{ConvertAdapterAir, ConvertAdapterExecutor, ConvertAdapterFiller};
 
 mod core;
+mod execution;
 pub use core::*;
 
+#[cfg(test)]
+mod tests;
+
 pub type CastFAir = VmAirWrapper<ConvertAdapterAir<1, 4>, CastFCoreAir>;
-pub type CastFChip<F> = VmChipWrapper<F, ConvertAdapterChip<F, 1, 4>, CastFCoreChip>;
+pub type CastFExecutor = CastFCoreExecutor<ConvertAdapterExecutor<1, 4>>;
+pub type CastFChip<F> = VmChipWrapper<F, CastFCoreFiller<ConvertAdapterFiller<1, 4>>>;
diff --git a/extensions/native/circuit/src/castf/tests.rs b/extensions/native/circuit/src/castf/tests.rs
index 9758e6b956..9801bb235c 100644
--- a/extensions/native/circuit/src/castf/tests.rs
+++ b/extensions/native/circuit/src/castf/tests.rs
@@ -1,254 +1,222 @@
 use std::borrow::BorrowMut;
 
-use openvm_circuit::arch::testing::{memory::gen_pointer, VmChipTestBuilder};
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_native_compiler::CastfOpcode;
-use openvm_stark_backend::{
-    p3_field::FieldAlgebra, utils::disable_debug_builder, verifier::VerificationError, Chip,
+use openvm_circuit::arch::{
+    testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder},
+    MemoryConfig,
+};
+use openvm_instructions::{
+    instruction::Instruction,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
 };
-use openvm_stark_sdk::{
-    config::baby_bear_poseidon2::BabyBearPoseidon2Engine, engine::StarkFriEngine,
-    p3_baby_bear::BabyBear, utils::create_seeded_rng,
+use openvm_native_compiler::{conversion::AS, CastfOpcode};
+use openvm_stark_backend::{
+    p3_air::BaseAir,
+    p3_field::{FieldAlgebra, PrimeField32},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
+    utils::disable_debug_builder,
+    verifier::VerificationError,
 };
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
 
-use super::{
-    super::adapters::convert_adapter::{ConvertAdapterChip, ConvertAdapterCols},
-    CastF, CastFChip, CastFCoreChip, CastFCoreCols, FINAL_LIMB_BITS, LIMB_BITS,
+use super::{CastFChip, CastFCoreAir, CastFCoreCols, CastFExecutor, LIMB_BITS};
+use crate::{
+    adapters::{
+        ConvertAdapterAir, ConvertAdapterCols, ConvertAdapterExecutor, ConvertAdapterFiller,
+    },
+    castf::run_castf,
+    test_utils::write_native_array,
+    CastFAir, CastFCoreFiller, CASTF_MAX_BITS,
 };
+
+const MAX_INS_CAPACITY: usize = 128;
+const READ_SIZE: usize = 1;
+const WRITE_SIZE: usize = 4;
 type F = BabyBear;
+type Harness = TestChipHarness<F, CastFExecutor, CastFAir, CastFChip<F>>;
 
-fn generate_uint_number(rng: &mut StdRng) -> u32 {
-    rng.gen_range(0..(1 << 30) - 1)
+fn create_test_chip(tester: &VmChipTestBuilder<F>) -> Harness {
+    let range_checker = tester.range_checker().clone();
+    let air = CastFAir::new(
+        ConvertAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        CastFCoreAir::new(range_checker.bus()),
+    );
+    let executor = CastFExecutor::new(ConvertAdapterExecutor::<READ_SIZE, WRITE_SIZE>::new());
+    let chip = CastFChip::<F>::new(
+        CastFCoreFiller::new(ConvertAdapterFiller, range_checker),
+        tester.memory_helper(),
+    );
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
 }
 
-fn prepare_castf_rand_write_execute(
+fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut CastFChip<F>,
-    y: u32,
+    harness: &mut Harness,
     rng: &mut StdRng,
+    b: Option<F>,
 ) {
-    let operand1 = y;
-
-    let as_x = 2usize; // d
-    let as_y = 4usize; // e
-    let address_x = gen_pointer(rng, 32); // a
-    let address_y = gen_pointer(rng, 32); // b
-
-    let operand1_f = F::from_canonical_u32(y);
-
-    tester.write_cell(as_y, address_y, operand1_f);
-    let x = CastF::solve(operand1);
+    let b_val = b.unwrap_or(F::from_canonical_u32(rng.gen_range(0..1 << CASTF_MAX_BITS)));
+    let b_ptr = write_native_array(tester, rng, Some([b_val])).1;
 
+    let a = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
     tester.execute(
-        chip,
+        harness,
         &Instruction::from_usize(
             CastfOpcode::CASTF.global_opcode(),
-            [address_x, address_y, 0, as_x, as_y],
+            [a, b_ptr, 0, RV32_MEMORY_AS as usize, AS::Native as usize],
         ),
     );
-    assert_eq!(
-        x.map(F::from_canonical_u32),
-        tester.read::<4>(as_x, address_x)
-    );
+    let expected = run_castf(b_val.as_canonical_u32());
+    let result = tester.read::<RV32_REGISTER_NUM_LIMBS>(RV32_MEMORY_AS as usize, a);
+    assert_eq!(result.map(|x| x.as_canonical_u32() as u8), expected);
 }
 
+///////////////////////////////////////////////////////////////////////////////////////
+/// POSITIVE TESTS
+///
+/// Randomly generate computations and execute, ensuring that the generated trace
+/// passes all constraints.
+///////////////////////////////////////////////////////////////////////////////////////
+
 #[test]
 fn castf_rand_test() {
     let mut rng = create_seeded_rng();
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = CastFChip::<F>::new(
-        ConvertAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        CastFCoreChip::new(tester.range_checker()),
-        tester.offline_memory_mutex_arc(),
-    );
-    let num_tests: usize = 3;
+    let mut tester = VmChipTestBuilder::volatile(MemoryConfig::default());
+    let mut harness = create_test_chip(&tester);
+    let num_ops = 100;
 
-    for _ in 0..num_tests {
-        let y = generate_uint_number(&mut rng);
-        prepare_castf_rand_write_execute(&mut tester, &mut chip, y, &mut rng);
+    for _ in 0..num_ops {
+        set_and_execute(&mut tester, &mut harness, &mut rng, None);
     }
 
-    let tester = tester.build().load(chip).finalize();
+    set_and_execute(&mut tester, &mut harness, &mut rng, Some(F::ZERO));
+
+    let tester = tester.build().load(harness).finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn negative_castf_overflow_test() {
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.range_checker();
-    let mut chip = CastFChip::<F>::new(
-        ConvertAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        CastFCoreChip::new(range_checker_chip.clone()),
-        tester.offline_memory_mutex_arc(),
-    );
+//////////////////////////////////////////////////////////////////////////////////////
+// NEGATIVE TESTS
+//
+// Given a fake trace of a single operation, setup a chip and run the test. We replace
+// part of the trace and check that the chip throws the expected error.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#[derive(Clone, Copy, Default)]
+struct CastFPrankValues {
+    pub in_val: Option<u32>,
+    pub out_val: Option<[u32; 4]>,
+    pub a_pointer: Option<u32>,
+    pub b_pointer: Option<u32>,
+}
 
+fn run_negative_castf_test(prank_vals: CastFPrankValues, b: Option<F>, error: VerificationError) {
     let mut rng = create_seeded_rng();
-    let y = generate_uint_number(&mut rng);
-    prepare_castf_rand_write_execute(&mut tester, &mut chip, y, &mut rng);
-    tester.build();
-
-    let chip_air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    let trace = chip_input.raw.common_main.as_mut().unwrap();
-    let row = trace.row_mut(0);
-    let cols: &mut CastFCoreCols<F> = row
-        .split_at_mut(ConvertAdapterCols::<F, 1, 4>::width())
-        .1
-        .borrow_mut();
-    cols.out_val[3] = F::from_canonical_u32(rng.gen_range(1 << FINAL_LIMB_BITS..1 << LIMB_BITS));
-
-    let rc_air = range_checker_chip.air();
-    let rc_p_input = range_checker_chip.generate_air_proof_input();
+    let mut tester = VmChipTestBuilder::volatile(MemoryConfig::default());
+
+    let mut harness = create_test_chip(&tester);
+    set_and_execute(&mut tester, &mut harness, &mut rng, b);
+
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+
+    let modify_trace = |trace: &mut DenseMatrix<F>| {
+        let mut values = trace.row_slice(0).to_vec();
+        let (adapter_row, core_row) = values.split_at_mut(adapter_width);
+        let core_cols: &mut CastFCoreCols<F> = core_row.borrow_mut();
+        let adapter_cols: &mut ConvertAdapterCols<F, READ_SIZE, WRITE_SIZE> =
+            adapter_row.borrow_mut();
+
+        if let Some(in_val) = prank_vals.in_val {
+            // TODO: in_val is actually never used in the AIR, should remove it
+            core_cols.in_val = F::from_canonical_u32(in_val);
+        }
+        if let Some(out_val) = prank_vals.out_val {
+            core_cols.out_val = out_val.map(F::from_canonical_u32);
+        }
+        if let Some(a_pointer) = prank_vals.a_pointer {
+            adapter_cols.a_pointer = F::from_canonical_u32(a_pointer);
+        }
+        if let Some(b_pointer) = prank_vals.b_pointer {
+            adapter_cols.b_pointer = F::from_canonical_u32(b_pointer);
+        }
+        *trace = RowMajorMatrix::new(values, trace.width());
+    };
 
     disable_debug_builder();
-    assert_eq!(
-        BabyBearPoseidon2Engine::run_test_fast(
-            vec![chip_air, rc_air],
-            vec![chip_input, rc_p_input]
-        )
-        .err(),
-        Some(VerificationError::ChallengePhaseError),
-        "Expected verification to fail, but it didn't"
-    );
+    let tester = tester
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .finalize();
+    tester.simple_test_with_expected_error(error);
 }
 
 #[test]
-fn negative_castf_memread_test() {
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let mut chip = CastFChip::<F>::new(
-        ConvertAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        CastFCoreChip::new(range_checker_chip.clone()),
-        tester.offline_memory_mutex_arc(),
+fn casf_invalid_out_val_test() {
+    run_negative_castf_test(
+        CastFPrankValues {
+            out_val: Some([2 << LIMB_BITS, 0, 0, 0]),
+            ..Default::default()
+        },
+        Some(F::from_canonical_u32(2 << LIMB_BITS)),
+        VerificationError::ChallengePhaseError,
     );
 
-    let mut rng = create_seeded_rng();
-    let y = generate_uint_number(&mut rng);
-    prepare_castf_rand_write_execute(&mut tester, &mut chip, y, &mut rng);
-    tester.build();
-
-    let chip_air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    let trace = chip_input.raw.common_main.as_mut().unwrap();
-    let row = trace.row_mut(0);
-    let cols: &mut ConvertAdapterCols<F, 1, 4> = row
-        .split_at_mut(ConvertAdapterCols::<F, 1, 4>::width())
-        .0
-        .borrow_mut();
-    cols.b_pointer += F::ONE;
-
-    let rc_air = range_checker_chip.air();
-    let rc_p_input = range_checker_chip.generate_air_proof_input();
-
-    disable_debug_builder();
-    assert_eq!(
-        BabyBearPoseidon2Engine::run_test_fast(
-            vec![chip_air, rc_air],
-            vec![chip_input, rc_p_input]
-        )
-        .err(),
-        Some(VerificationError::ChallengePhaseError),
-        "Expected verification to fail, but it didn't"
+    let prime = F::NEG_ONE.as_canonical_u32() + 1;
+    run_negative_castf_test(
+        CastFPrankValues {
+            out_val: Some(prime.to_le_bytes().map(|x| x as u32)),
+            ..Default::default()
+        },
+        Some(F::ZERO),
+        VerificationError::ChallengePhaseError,
     );
 }
 
 #[test]
-fn negative_castf_memwrite_test() {
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let mut chip = CastFChip::<F>::new(
-        ConvertAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        CastFCoreChip::new(range_checker_chip.clone()),
-        tester.offline_memory_mutex_arc(),
+fn negative_convert_adapter_test() {
+    // overflowing the memory pointer
+    run_negative_castf_test(
+        CastFPrankValues {
+            b_pointer: Some(1 << 30),
+            ..Default::default()
+        },
+        None,
+        VerificationError::ChallengePhaseError,
     );
 
-    let mut rng = create_seeded_rng();
-    let y = generate_uint_number(&mut rng);
-    prepare_castf_rand_write_execute(&mut tester, &mut chip, y, &mut rng);
-    tester.build();
-
-    let chip_air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    let trace = chip_input.raw.common_main.as_mut().unwrap();
-    let row = trace.row_mut(0);
-    let cols: &mut ConvertAdapterCols<F, 1, 4> = row
-        .split_at_mut(ConvertAdapterCols::<F, 1, 4>::width())
-        .0
-        .borrow_mut();
-    cols.a_pointer += F::ONE;
-
-    let rc_air = range_checker_chip.air();
-    let rc_p_input = range_checker_chip.generate_air_proof_input();
-
-    disable_debug_builder();
-    assert_eq!(
-        BabyBearPoseidon2Engine::run_test_fast(
-            vec![chip_air, rc_air],
-            vec![chip_input, rc_p_input]
-        )
-        .err(),
-        Some(VerificationError::ChallengePhaseError),
-        "Expected verification to fail, but it didn't"
+    // Memory address space pointer has to be 4-byte aligned
+    run_negative_castf_test(
+        CastFPrankValues {
+            a_pointer: Some(1),
+            ..Default::default()
+        },
+        None,
+        VerificationError::ChallengePhaseError,
     );
 }
 
+#[should_panic]
 #[test]
-fn negative_castf_as_test() {
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let mut chip = CastFChip::<F>::new(
-        ConvertAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        CastFCoreChip::new(range_checker_chip.clone()),
-        tester.offline_memory_mutex_arc(),
-    );
-
+fn castf_overflow_in_val_test() {
     let mut rng = create_seeded_rng();
-    let y = generate_uint_number(&mut rng);
-    prepare_castf_rand_write_execute(&mut tester, &mut chip, y, &mut rng);
-    tester.build();
-
-    let chip_air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    let trace = chip_input.raw.common_main.as_mut().unwrap();
-    let row = trace.row_mut(0);
-    let cols: &mut ConvertAdapterCols<F, 1, 4> = row
-        .split_at_mut(ConvertAdapterCols::<F, 1, 4>::width())
-        .0
-        .borrow_mut();
-    cols.a_pointer += F::ONE;
-
-    let rc_air = range_checker_chip.air();
-    let rc_p_input = range_checker_chip.generate_air_proof_input();
+    let mut tester = VmChipTestBuilder::volatile(MemoryConfig::default());
+    let mut harness = create_test_chip(&tester);
+    set_and_execute(&mut tester, &mut harness, &mut rng, Some(F::NEG_ONE));
+}
 
-    disable_debug_builder();
-    assert_eq!(
-        BabyBearPoseidon2Engine::run_test_fast(
-            vec![chip_air, rc_air],
-            vec![chip_input, rc_p_input]
-        )
-        .err(),
-        Some(VerificationError::ChallengePhaseError),
-        "Expected verification to fail, but it didn't"
-    );
+///////////////////////////////////////////////////////////////////////////////////////
+/// SANITY TESTS
+///
+/// Ensure that solve functions produce the correct results.
+///////////////////////////////////////////////////////////////////////////////////////
+
+#[test]
+fn castf_sanity_test() {
+    let b = 160558167;
+    let expected = [87, 236, 145, 9];
+    assert_eq!(run_castf(b), expected);
 }
diff --git a/extensions/native/circuit/src/extension.rs b/extensions/native/circuit/src/extension.rs
index 385c9392ac..b6fc08abe0 100644
--- a/extensions/native/circuit/src/extension.rs
+++ b/extensions/native/circuit/src/extension.rs
@@ -1,18 +1,20 @@
-use air::VerifyBatchBus;
-use alu_native_adapter::AluNativeAdapterChip;
-use branch_native_adapter::BranchNativeAdapterChip;
+use alu_native_adapter::{AluNativeAdapterAir, AluNativeAdapterExecutor};
+use branch_native_adapter::{BranchNativeAdapterAir, BranchNativeAdapterExecutor};
+use convert_adapter::{ConvertAdapterAir, ConvertAdapterExecutor};
 use derive_more::derive::From;
-use loadstore_native_adapter::NativeLoadStoreAdapterChip;
-use native_vectorized_adapter::NativeVectorizedAdapterChip;
+use fri::{FriReducedOpeningAir, FriReducedOpeningChip, FriReducedOpeningExecutor};
+use jal_rangecheck::{JalRangeCheckAir, JalRangeCheckExecutor};
+use loadstore_native_adapter::{NativeLoadStoreAdapterAir, NativeLoadStoreAdapterExecutor};
+use native_vectorized_adapter::{NativeVectorizedAdapterAir, NativeVectorizedAdapterExecutor};
 use openvm_circuit::{
     arch::{
-        ExecutionBridge, InitFileGenerator, MemoryConfig, SystemConfig, SystemPort, VmExtension,
-        VmInventory, VmInventoryBuilder, VmInventoryError,
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutionBridge,
+        ExecutorInventoryBuilder, ExecutorInventoryError, RowMajorMatrixArena, VmCircuitExtension,
+        VmExecutionExtension, VmProverExtension,
     },
-    system::phantom::PhantomChip,
+    system::{memory::SharedMemoryHelper, SystemPort},
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor, VmConfig};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
 use openvm_instructions::{program::DEFAULT_PC_STEP, LocalOpcode, PhantomDiscriminant};
 use openvm_native_compiler::{
     CastfOpcode, FieldArithmeticOpcode, FieldExtensionOpcode, FriOpcode, NativeBranchEqualOpcode,
@@ -20,185 +22,106 @@ use openvm_native_compiler::{
     NativeRangeCheckOpcode, Poseidon2Opcode, VerifyBatchOpcode, BLOCK_LOAD_STORE_SIZE,
 };
 use openvm_poseidon2_air::Poseidon2Config;
-use openvm_rv32im_circuit::{
-    BranchEqualCoreChip, Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32Io, Rv32IoExecutor,
-    Rv32IoPeriphery, Rv32M, Rv32MExecutor, Rv32MPeriphery,
+use openvm_rv32im_circuit::BranchEqualCoreAir;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    p3_field::{Field, PrimeField32},
+    prover::cpu::{CpuBackend, CpuDevice},
 };
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_sdk::engine::StarkEngine;
 use serde::{Deserialize, Serialize};
 use strum::IntoEnumIterator;
 
 use crate::{
-    adapters::{convert_adapter::ConvertAdapterChip, *},
-    chip::NativePoseidon2Chip,
+    adapters::*,
+    air::{NativePoseidon2Air, VerifyBatchBus},
+    chip::{NativePoseidon2Executor, NativePoseidon2Filler},
     phantom::*,
     *,
 };
 
-#[derive(Clone, Debug, Serialize, Deserialize, VmConfig, derive_new::new)]
-pub struct NativeConfig {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub native: Native,
-}
-
-impl NativeConfig {
-    pub fn aggregation(num_public_values: usize, max_constraint_degree: usize) -> Self {
-        Self {
-            system: SystemConfig::new(
-                max_constraint_degree,
-                MemoryConfig {
-                    max_access_adapter_n: 8,
-                    ..Default::default()
-                },
-                num_public_values,
-            )
-            .with_max_segment_len((1 << 24) - 100),
-            native: Default::default(),
-        }
-    }
-}
-
-// Default implementation uses no init file
-impl InitFileGenerator for NativeConfig {}
+// ============ VmExtension Implementations ============
 
 #[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
 pub struct Native;
 
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum NativeExecutor<F: PrimeField32> {
-    LoadStore(NativeLoadStoreChip<F, 1>),
-    BlockLoadStore(NativeLoadStoreChip<F, 4>),
-    BranchEqual(NativeBranchEqChip<F>),
-    Jal(JalRangeCheckChip<F>),
-    FieldArithmetic(FieldArithmeticChip<F>),
-    FieldExtension(FieldExtensionChip<F>),
-    FriReducedOpening(FriReducedOpeningChip<F>),
-    VerifyBatch(NativePoseidon2Chip<F, 1>),
+#[derive(Clone, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum NativeExecutor<F: Field> {
+    LoadStore(NativeLoadStoreExecutor<1>),
+    BlockLoadStore(NativeLoadStoreExecutor<BLOCK_LOAD_STORE_SIZE>),
+    BranchEqual(NativeBranchEqExecutor),
+    Jal(JalRangeCheckExecutor),
+    FieldArithmetic(FieldArithmeticExecutor),
+    FieldExtension(FieldExtensionExecutor),
+    FriReducedOpening(FriReducedOpeningExecutor),
+    VerifyBatch(NativePoseidon2Executor<F, 1>),
 }
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum NativePeriphery<F: PrimeField32> {
-    Phantom(PhantomChip<F>),
-}
-
-impl<F: PrimeField32> VmExtension<F> for Native {
+impl<F: PrimeField32> VmExecutionExtension<F> for Native {
     type Executor = NativeExecutor<F>;
-    type Periphery = NativePeriphery<F>;
 
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<NativeExecutor<F>, NativePeriphery<F>>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
-        let SystemPort {
-            execution_bus,
-            program_bus,
-            memory_bridge,
-        } = builder.system_port();
-        let offline_memory = builder.system_base().offline_memory();
-
-        let mut load_store_chip = NativeLoadStoreChip::<F, 1>::new(
-            NativeLoadStoreAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                NativeLoadStoreOpcode::CLASS_OFFSET,
-            ),
-            NativeLoadStoreCoreChip::new(NativeLoadStoreOpcode::CLASS_OFFSET),
-            offline_memory.clone(),
+        inventory: &mut ExecutorInventoryBuilder<F, NativeExecutor<F>>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let load_store = NativeLoadStoreExecutor::<1>::new(
+            NativeLoadStoreAdapterExecutor::new(NativeLoadStoreOpcode::CLASS_OFFSET),
+            NativeLoadStoreOpcode::CLASS_OFFSET,
         );
-        load_store_chip.core.set_streams(builder.streams().clone());
-
         inventory.add_executor(
-            load_store_chip,
+            load_store,
             NativeLoadStoreOpcode::iter().map(|x| x.global_opcode()),
         )?;
 
-        let mut block_load_store_chip = NativeLoadStoreChip::<F, BLOCK_LOAD_STORE_SIZE>::new(
-            NativeLoadStoreAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                NativeLoadStore4Opcode::CLASS_OFFSET,
-            ),
-            NativeLoadStoreCoreChip::new(NativeLoadStore4Opcode::CLASS_OFFSET),
-            offline_memory.clone(),
+        let block_load_store = NativeLoadStoreExecutor::<BLOCK_LOAD_STORE_SIZE>::new(
+            NativeLoadStoreAdapterExecutor::new(NativeLoadStore4Opcode::CLASS_OFFSET),
+            NativeLoadStore4Opcode::CLASS_OFFSET,
         );
-        block_load_store_chip
-            .core
-            .set_streams(builder.streams().clone());
-
         inventory.add_executor(
-            block_load_store_chip,
+            block_load_store,
             NativeLoadStore4Opcode::iter().map(|x| x.global_opcode()),
         )?;
 
-        let branch_equal_chip = NativeBranchEqChip::new(
-            BranchNativeAdapterChip::<_>::new(execution_bus, program_bus, memory_bridge),
-            BranchEqualCoreChip::new(NativeBranchEqualOpcode::CLASS_OFFSET, DEFAULT_PC_STEP),
-            offline_memory.clone(),
+        let branch_equal = NativeBranchEqExecutor::new(
+            BranchNativeAdapterExecutor::new(),
+            NativeBranchEqualOpcode::CLASS_OFFSET,
+            DEFAULT_PC_STEP,
         );
         inventory.add_executor(
-            branch_equal_chip,
+            branch_equal,
             NativeBranchEqualOpcode::iter().map(|x| x.global_opcode()),
         )?;
 
-        let jal_chip = JalRangeCheckChip::new(
-            ExecutionBridge::new(execution_bus, program_bus),
-            offline_memory.clone(),
-            builder.system_base().range_checker_chip.clone(),
-        );
+        let jal_rangecheck = JalRangeCheckExecutor;
         inventory.add_executor(
-            jal_chip,
+            jal_rangecheck,
             [
                 NativeJalOpcode::JAL.global_opcode(),
                 NativeRangeCheckOpcode::RANGE_CHECK.global_opcode(),
             ],
         )?;
 
-        let field_arithmetic_chip = FieldArithmeticChip::new(
-            AluNativeAdapterChip::<F>::new(execution_bus, program_bus, memory_bridge),
-            FieldArithmeticCoreChip::new(),
-            offline_memory.clone(),
-        );
+        let field_arithmetic = FieldArithmeticExecutor::new(AluNativeAdapterExecutor::new());
         inventory.add_executor(
-            field_arithmetic_chip,
+            field_arithmetic,
             FieldArithmeticOpcode::iter().map(|x| x.global_opcode()),
         )?;
 
-        let field_extension_chip = FieldExtensionChip::new(
-            NativeVectorizedAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            FieldExtensionCoreChip::new(),
-            offline_memory.clone(),
-        );
+        let field_extension = FieldExtensionExecutor::new(NativeVectorizedAdapterExecutor::new());
         inventory.add_executor(
-            field_extension_chip,
+            field_extension,
             FieldExtensionOpcode::iter().map(|x| x.global_opcode()),
         )?;
 
-        let fri_reduced_opening_chip = FriReducedOpeningChip::new(
-            execution_bus,
-            program_bus,
-            memory_bridge,
-            offline_memory.clone(),
-            builder.streams().clone(),
-        );
+        let fri_reduced_opening = FriReducedOpeningExecutor::new();
         inventory.add_executor(
-            fri_reduced_opening_chip,
+            fri_reduced_opening,
             FriOpcode::iter().map(|x| x.global_opcode()),
         )?;
 
-        let poseidon2_chip = NativePoseidon2Chip::new(
-            builder.system_port(),
-            offline_memory.clone(),
-            Poseidon2Config::default(),
-            VerifyBatchBus::new(builder.new_bus_idx()),
-            builder.streams().clone(),
-        );
+        let verify_batch = NativePoseidon2Executor::<F, 1>::new(Poseidon2Config::default());
         inventory.add_executor(
-            poseidon2_chip,
+            verify_batch,
             [
                 VerifyBatchOpcode::VERIFY_BATCH.global_opcode(),
                 Poseidon2Opcode::PERM_POS2.global_opcode(),
@@ -206,32 +129,180 @@ impl<F: PrimeField32> VmExtension<F> for Native {
             ],
         )?;
 
-        builder.add_phantom_sub_executor(
+        inventory.add_phantom_sub_executor(
             NativeHintInputSubEx,
             PhantomDiscriminant(NativePhantom::HintInput as u16),
         )?;
 
-        builder.add_phantom_sub_executor(
+        inventory.add_phantom_sub_executor(
             NativeHintSliceSubEx::<1>,
             PhantomDiscriminant(NativePhantom::HintFelt as u16),
         )?;
 
-        builder.add_phantom_sub_executor(
+        inventory.add_phantom_sub_executor(
             NativeHintBitsSubEx,
             PhantomDiscriminant(NativePhantom::HintBits as u16),
         )?;
 
-        builder.add_phantom_sub_executor(
+        inventory.add_phantom_sub_executor(
             NativePrintSubEx,
             PhantomDiscriminant(NativePhantom::Print as u16),
         )?;
 
-        builder.add_phantom_sub_executor(
+        inventory.add_phantom_sub_executor(
             NativeHintLoadSubEx,
             PhantomDiscriminant(NativePhantom::HintLoad as u16),
         )?;
 
-        Ok(inventory)
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Native
+where
+    Val<SC>: PrimeField32,
+{
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
+        let SystemPort {
+            execution_bus,
+            program_bus,
+            memory_bridge,
+        } = inventory.system().port();
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let range_checker = inventory.range_checker().bus;
+
+        let load_store = NativeLoadStoreAir::<1>::new(
+            NativeLoadStoreAdapterAir::new(memory_bridge, exec_bridge),
+            NativeLoadStoreCoreAir::new(NativeLoadStoreOpcode::CLASS_OFFSET),
+        );
+        inventory.add_air(load_store);
+
+        let block_load_store = NativeLoadStoreAir::<BLOCK_LOAD_STORE_SIZE>::new(
+            NativeLoadStoreAdapterAir::new(memory_bridge, exec_bridge),
+            NativeLoadStoreCoreAir::new(NativeLoadStore4Opcode::CLASS_OFFSET),
+        );
+        inventory.add_air(block_load_store);
+
+        let branch_equal = NativeBranchEqAir::new(
+            BranchNativeAdapterAir::new(exec_bridge, memory_bridge),
+            BranchEqualCoreAir::new(NativeBranchEqualOpcode::CLASS_OFFSET, DEFAULT_PC_STEP),
+        );
+        inventory.add_air(branch_equal);
+
+        let jal_rangecheck = JalRangeCheckAir::new(
+            ExecutionBridge::new(execution_bus, program_bus),
+            memory_bridge,
+            range_checker,
+        );
+        inventory.add_air(jal_rangecheck);
+
+        let field_arithmetic = FieldArithmeticAir::new(
+            AluNativeAdapterAir::new(exec_bridge, memory_bridge),
+            FieldArithmeticCoreAir::new(),
+        );
+        inventory.add_air(field_arithmetic);
+
+        let field_extension = FieldExtensionAir::new(
+            NativeVectorizedAdapterAir::new(exec_bridge, memory_bridge),
+            FieldExtensionCoreAir::new(),
+        );
+        inventory.add_air(field_extension);
+
+        let fri_reduced_opening = FriReducedOpeningAir::new(
+            ExecutionBridge::new(execution_bus, program_bus),
+            memory_bridge,
+        );
+        inventory.add_air(fri_reduced_opening);
+
+        let verify_batch = NativePoseidon2Air::<_, 1>::new(
+            exec_bridge,
+            memory_bridge,
+            VerifyBatchBus::new(inventory.new_bus_idx()),
+            Poseidon2Config::default(),
+        );
+        inventory.add_air(verify_batch);
+
+        Ok(())
+    }
+}
+
+pub struct NativeCpuProverExt;
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, Native> for NativeCpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        _: &Native,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+
+        // These calls to next_air are not strictly necessary to construct the chips, but provide a
+        // safeguard to ensure that chip construction matches the circuit definition
+        inventory.next_air::<NativeLoadStoreAir<1>>()?;
+        let load_store = NativeLoadStoreChip::<_, 1>::new(
+            NativeLoadStoreCoreFiller::new(NativeLoadStoreAdapterFiller),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(load_store);
+
+        inventory.next_air::<NativeLoadStoreAir<BLOCK_LOAD_STORE_SIZE>>()?;
+        let block_load_store = NativeLoadStoreChip::<_, BLOCK_LOAD_STORE_SIZE>::new(
+            NativeLoadStoreCoreFiller::new(NativeLoadStoreAdapterFiller),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(block_load_store);
+
+        inventory.next_air::<NativeBranchEqAir>()?;
+        let branch_eq = NativeBranchEqChip::new(
+            NativeBranchEqualFiller::new(BranchNativeAdapterFiller),
+            mem_helper.clone(),
+        );
+
+        inventory.add_executor_chip(branch_eq);
+
+        inventory.next_air::<JalRangeCheckAir>()?;
+        let jal_rangecheck = NativeJalRangeCheckChip::new(
+            JalRangeCheckFiller::new(range_checker.clone()),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(jal_rangecheck);
+
+        inventory.next_air::<FieldArithmeticAir>()?;
+        let field_arithmetic = FieldArithmeticChip::new(
+            FieldArithmeticCoreFiller::new(AluNativeAdapterFiller),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(field_arithmetic);
+
+        inventory.next_air::<FieldExtensionAir>()?;
+        let field_extension = FieldExtensionChip::new(
+            FieldExtensionCoreFiller::new(NativeVectorizedAdapterFiller),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(field_extension);
+
+        inventory.next_air::<FriReducedOpeningAir>()?;
+        let fri_reduced_opening =
+            FriReducedOpeningChip::new(FriReducedOpeningFiller::new(), mem_helper.clone());
+        inventory.add_executor_chip(fri_reduced_opening);
+
+        inventory.next_air::<NativePoseidon2Air<Val<SC>, 1>>()?;
+        let poseidon2 = NativePoseidon2Chip::<_, 1>::new(
+            NativePoseidon2Filler::new(Poseidon2Config::default()),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(poseidon2);
+
+        Ok(())
     }
 }
 
@@ -239,10 +310,11 @@ pub(crate) mod phantom {
     use eyre::bail;
     use openvm_circuit::{
         arch::{PhantomSubExecutor, Streams},
-        system::memory::MemoryController,
+        system::memory::online::GuestMemory,
     };
     use openvm_instructions::PhantomDiscriminant;
     use openvm_stark_backend::p3_field::{Field, PrimeField32};
+    use rand::rngs::StdRng;
 
     pub struct NativeHintInputSubEx;
     pub struct NativeHintSliceSubEx<const N: usize>;
@@ -252,12 +324,13 @@ pub(crate) mod phantom {
 
     impl<F: Field> PhantomSubExecutor<F> for NativeHintInputSubEx {
         fn phantom_execute(
-            &mut self,
-            _: &MemoryController<F>,
+            &self,
+            _: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            _: F,
-            _: F,
+            _: u32,
+            _: u32,
             _: u16,
         ) -> eyre::Result<()> {
             let hint = match streams.input_stream.pop_front() {
@@ -277,12 +350,13 @@ pub(crate) mod phantom {
 
     impl<F: Field, const N: usize> PhantomSubExecutor<F> for NativeHintSliceSubEx<N> {
         fn phantom_execute(
-            &mut self,
-            _: &MemoryController<F>,
+            &self,
+            _: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            _: F,
-            _: F,
+            _: u32,
+            _: u32,
             _: u16,
         ) -> eyre::Result<()> {
             let hint = match streams.input_stream.pop_front() {
@@ -300,36 +374,35 @@ pub(crate) mod phantom {
 
     impl<F: PrimeField32> PhantomSubExecutor<F> for NativePrintSubEx {
         fn phantom_execute(
-            &mut self,
-            memory: &MemoryController<F>,
+            &self,
+            memory: &GuestMemory,
             _: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            a: F,
-            _: F,
+            a: u32,
+            _: u32,
             c_upper: u16,
         ) -> eyre::Result<()> {
-            let addr_space = F::from_canonical_u16(c_upper);
-            let value = memory.unsafe_read_cell(addr_space, a);
-            println!("{}", value);
+            let [value] = unsafe { memory.read::<F, 1>(c_upper as u32, a) };
+            println!("{value}");
             Ok(())
         }
     }
 
     impl<F: PrimeField32> PhantomSubExecutor<F> for NativeHintBitsSubEx {
         fn phantom_execute(
-            &mut self,
-            memory: &MemoryController<F>,
+            &self,
+            memory: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            a: F,
-            b: F,
+            a: u32,
+            len: u32,
             c_upper: u16,
         ) -> eyre::Result<()> {
-            let addr_space = F::from_canonical_u16(c_upper);
-            let val = memory.unsafe_read_cell(addr_space, a);
+            let [val] = unsafe { memory.read::<F, 1>(c_upper as u32, a) };
             let mut val = val.as_canonical_u32();
 
-            let len = b.as_canonical_u32();
             assert!(streams.hint_stream.is_empty());
             for _ in 0..len {
                 streams
@@ -343,12 +416,13 @@ pub(crate) mod phantom {
 
     impl<F: PrimeField32> PhantomSubExecutor<F> for NativeHintLoadSubEx {
         fn phantom_execute(
-            &mut self,
-            _: &MemoryController<F>,
+            &self,
+            _: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            _: F,
-            _: F,
+            _: u32,
+            _: u32,
             _: u16,
         ) -> eyre::Result<()> {
             let payload = match streams.input_stream.pop_front() {
@@ -370,72 +444,74 @@ pub(crate) mod phantom {
 #[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
 pub struct CastFExtension;
 
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum CastFExtensionExecutor<F: PrimeField32> {
-    CastF(CastFChip<F>),
+#[derive(Clone, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum CastFExtensionExecutor {
+    CastF(CastFExecutor),
 }
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum CastFExtensionPeriphery<F: PrimeField32> {
-    Placeholder(CastFChip<F>),
-}
+impl<F: PrimeField32> VmExecutionExtension<F> for CastFExtension {
+    type Executor = CastFExtensionExecutor;
 
-impl<F: PrimeField32> VmExtension<F> for CastFExtension {
-    type Executor = CastFExtensionExecutor<F>;
-    type Periphery = CastFExtensionPeriphery<F>;
-
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
+        inventory: &mut ExecutorInventoryBuilder<F, CastFExtensionExecutor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let castf = CastFExecutor::new(ConvertAdapterExecutor::new());
+        inventory.add_executor(castf, [CastfOpcode::CASTF.global_opcode()])?;
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for CastFExtension {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
         let SystemPort {
             execution_bus,
             program_bus,
             memory_bridge,
-        } = builder.system_port();
-        let offline_memory = builder.system_base().offline_memory();
-        let range_checker = builder.system_base().range_checker_chip.clone();
-
-        let castf_chip = CastFChip::new(
-            ConvertAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            CastFCoreChip::new(range_checker.clone()),
-            offline_memory.clone(),
-        );
-        inventory.add_executor(castf_chip, [CastfOpcode::CASTF.global_opcode()])?;
+        } = inventory.system().port();
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let range_checker = inventory.range_checker().bus;
 
-        Ok(inventory)
+        let castf = CastFAir::new(
+            ConvertAdapterAir::new(exec_bridge, memory_bridge),
+            CastFCoreAir::new(range_checker),
+        );
+        inventory.add_air(castf);
+        Ok(())
     }
 }
 
-#[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
-pub struct Rv32WithKernelsConfig {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub rv32i: Rv32I,
-    #[extension]
-    pub rv32m: Rv32M,
-    #[extension]
-    pub io: Rv32Io,
-    #[extension]
-    pub native: Native,
-    #[extension]
-    pub castf: CastFExtension,
-}
+impl<E, SC, RA> VmProverExtension<E, RA, CastFExtension> for NativeCpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        _: &CastFExtension,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+
+        inventory.next_air::<CastFAir>()?;
+        let castf = CastFChip::new(
+            CastFCoreFiller::new(ConvertAdapterFiller::new(), range_checker),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(castf);
 
-impl Default for Rv32WithKernelsConfig {
-    fn default() -> Self {
-        Self {
-            system: SystemConfig::default().with_continuations(),
-            rv32i: Rv32I,
-            rv32m: Rv32M::default(),
-            io: Rv32Io,
-            native: Native,
-            castf: CastFExtension,
-        }
+        Ok(())
     }
 }
 
-// Default implementation uses no init file
-impl InitFileGenerator for Rv32WithKernelsConfig {}
+// Pre-computed maximum trace heights for NativeConfig. Found by doubling
+// the actual trace heights of kitchen-sink leaf verification (except for
+// VariableRangeChecker, which has a fixed height).
+pub const NATIVE_MAX_TRACE_HEIGHTS: &[u32] = &[
+    4194304, 4, 128, 2097152, 8388608, 4194304, 262144, 2097152, 16777216, 2097152, 8388608,
+    262144, 2097152, 1048576, 4194304, 65536, 262144,
+];
diff --git a/extensions/native/circuit/src/field_arithmetic/core.rs b/extensions/native/circuit/src/field_arithmetic/core.rs
index c813f6a066..289ab6124e 100644
--- a/extensions/native/circuit/src/field_arithmetic/core.rs
+++ b/extensions/native/circuit/src/field_arithmetic/core.rs
@@ -1,12 +1,13 @@
 use std::borrow::{Borrow, BorrowMut};
 
 use itertools::izip;
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_native_compiler::FieldArithmeticOpcode::{self, *};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -14,7 +15,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -31,7 +31,7 @@ pub struct FieldArithmeticCoreCols<T> {
     pub divisor_inv: T,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(derive_new::new, Copy, Clone, Debug)]
 pub struct FieldArithmeticCoreAir {}
 
 impl<F: Field> BaseAir<F> for FieldArithmeticCoreAir {
@@ -106,120 +106,109 @@ where
 }
 
 #[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(AlignedBytesBorrow, Debug)]
 pub struct FieldArithmeticRecord<F> {
-    pub opcode: FieldArithmeticOpcode,
-    pub a: F,
     pub b: F,
     pub c: F,
+    pub local_opcode: u8,
 }
 
-pub struct FieldArithmeticCoreChip {
-    pub air: FieldArithmeticCoreAir,
+#[derive(derive_new::new, Clone, Copy)]
+pub struct FieldArithmeticCoreExecutor<A> {
+    adapter: A,
 }
 
-impl FieldArithmeticCoreChip {
-    pub fn new() -> Self {
-        Self {
-            air: FieldArithmeticCoreAir {},
-        }
-    }
-}
-
-impl Default for FieldArithmeticCoreChip {
-    fn default() -> Self {
-        Self::new()
-    }
+#[derive(derive_new::new)]
+pub struct FieldArithmeticCoreFiller<A> {
+    adapter: A,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>> VmCoreChip<F, I> for FieldArithmeticCoreChip
+impl<F, A, RA> PreflightExecutor<F, RA> for FieldArithmeticCoreExecutor<A>
 where
-    I::Reads: Into<[[F; 1]; 2]>,
-    I::Writes: From<[[F; 1]; 1]>,
+    F: PrimeField32,
+    A: 'static + AdapterTraceExecutor<F, ReadData = [F; 2], WriteData = [F; 1]>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut FieldArithmeticRecord<F>),
+    >,
 {
-    type Record = FieldArithmeticRecord<F>;
-    type Air = FieldArithmeticCoreAir;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            FieldArithmeticOpcode::from_usize(opcode - FieldArithmeticOpcode::CLASS_OFFSET)
+        )
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let Instruction { opcode, .. } = instruction;
-        let local_opcode = FieldArithmeticOpcode::from_usize(
-            opcode.local_opcode_idx(FieldArithmeticOpcode::CLASS_OFFSET),
-        );
-
-        let data: [[F; 1]; 2] = reads.into();
-        let b = data[0][0];
-        let c = data[1][0];
-        let a = FieldArithmetic::run_field_arithmetic(local_opcode, b, c).unwrap();
-
-        let output: AdapterRuntimeContext<F, I> = AdapterRuntimeContext {
-            to_pc: None,
-            writes: [[a]].into(),
-        };
+    ) -> Result<(), ExecutionError> {
+        let &Instruction { opcode, .. } = instruction;
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        let record = Self::Record {
-            opcode: local_opcode,
-            a,
-            b,
-            c,
-        };
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        Ok((output, record))
-    }
+        [core_record.b, core_record.c] =
+            self.adapter
+                .read(state.memory, instruction, &mut adapter_record);
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            FieldArithmeticOpcode::from_usize(opcode - FieldArithmeticOpcode::CLASS_OFFSET)
-        )
+        core_record.local_opcode =
+            opcode.local_opcode_idx(FieldArithmeticOpcode::CLASS_OFFSET) as u8;
+
+        let opcode = FieldArithmeticOpcode::from_usize(core_record.local_opcode as usize);
+        let a_val = run_field_arithmetic(opcode, core_record.b, core_record.c);
+
+        self.adapter
+            .write(state.memory, instruction, [a_val], &mut adapter_record);
+
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
     }
+}
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let FieldArithmeticRecord { opcode, a, b, c } = record;
-        let row_slice: &mut FieldArithmeticCoreCols<_> = row_slice.borrow_mut();
-        row_slice.a = a;
-        row_slice.b = b;
-        row_slice.c = c;
-
-        row_slice.is_add = F::from_bool(opcode == FieldArithmeticOpcode::ADD);
-        row_slice.is_sub = F::from_bool(opcode == FieldArithmeticOpcode::SUB);
-        row_slice.is_mul = F::from_bool(opcode == FieldArithmeticOpcode::MUL);
-        row_slice.is_div = F::from_bool(opcode == FieldArithmeticOpcode::DIV);
-        row_slice.divisor_inv = if opcode == FieldArithmeticOpcode::DIV {
-            c.inverse()
+impl<F, A> TraceFiller<F> for FieldArithmeticCoreFiller<A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &FieldArithmeticRecord<F> = unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut FieldArithmeticCoreCols<_> = core_row.borrow_mut();
+
+        let opcode = FieldArithmeticOpcode::from_usize(record.local_opcode as usize);
+        let result = run_field_arithmetic(opcode, record.b, record.c);
+
+        // Writing in reverse order to avoid overwriting the `record`
+        core_row.divisor_inv = if opcode == FieldArithmeticOpcode::DIV {
+            record.c.inverse()
         } else {
             F::ZERO
         };
-    }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.is_div = F::from_bool(opcode == FieldArithmeticOpcode::DIV);
+        core_row.is_mul = F::from_bool(opcode == FieldArithmeticOpcode::MUL);
+        core_row.is_sub = F::from_bool(opcode == FieldArithmeticOpcode::SUB);
+        core_row.is_add = F::from_bool(opcode == FieldArithmeticOpcode::ADD);
+
+        core_row.c = record.c;
+        core_row.b = record.b;
+        core_row.a = result;
     }
 }
 
-pub struct FieldArithmetic;
-impl FieldArithmetic {
-    pub(super) fn run_field_arithmetic<F: Field>(
-        opcode: FieldArithmeticOpcode,
-        b: F,
-        c: F,
-    ) -> Option<F> {
-        match opcode {
-            FieldArithmeticOpcode::ADD => Some(b + c),
-            FieldArithmeticOpcode::SUB => Some(b - c),
-            FieldArithmeticOpcode::MUL => Some(b * c),
-            FieldArithmeticOpcode::DIV => {
-                if c.is_zero() {
-                    None
-                } else {
-                    Some(b * c.inverse())
-                }
-            }
+pub(super) fn run_field_arithmetic<F: Field>(opcode: FieldArithmeticOpcode, b: F, c: F) -> F {
+    match opcode {
+        FieldArithmeticOpcode::ADD => b + c,
+        FieldArithmeticOpcode::SUB => b - c,
+        FieldArithmeticOpcode::MUL => b * c,
+        FieldArithmeticOpcode::DIV => {
+            assert!(!c.is_zero(), "Division by zero");
+            b * c.inverse()
         }
     }
 }
diff --git a/extensions/native/circuit/src/field_arithmetic/execution.rs b/extensions/native/circuit/src/field_arithmetic/execution.rs
new file mode 100644
index 0000000000..cac0770181
--- /dev/null
+++ b/extensions/native/circuit/src/field_arithmetic/execution.rs
@@ -0,0 +1,308 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{
+    arch::*,
+    system::memory::online::GuestMemory,
+    utils::{transmute_field_to_u32, transmute_u32_to_field},
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_IMM_AS, LocalOpcode,
+};
+use openvm_native_compiler::{conversion::AS, FieldArithmeticOpcode};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::FieldArithmeticCoreExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct FieldArithmeticPreCompute {
+    a: u32,
+    b_or_imm: u32,
+    c_or_imm: u32,
+    e: u32,
+    f: u32,
+}
+
+impl<A> FieldArithmeticCoreExecutor<A> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut FieldArithmeticPreCompute,
+    ) -> Result<(bool, bool, FieldArithmeticOpcode), StaticProgramError> {
+        let &Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            e,
+            f,
+            ..
+        } = inst;
+
+        let local_opcode = FieldArithmeticOpcode::from_usize(
+            opcode.local_opcode_idx(FieldArithmeticOpcode::CLASS_OFFSET),
+        );
+
+        let a = a.as_canonical_u32();
+        let e = e.as_canonical_u32();
+        let f = f.as_canonical_u32();
+
+        let a_is_imm = e == RV32_IMM_AS;
+        let b_is_imm = f == RV32_IMM_AS;
+
+        let b_or_imm = if a_is_imm {
+            transmute_field_to_u32(&b)
+        } else {
+            b.as_canonical_u32()
+        };
+        let c_or_imm = if b_is_imm {
+            transmute_field_to_u32(&c)
+        } else {
+            c.as_canonical_u32()
+        };
+
+        *data = FieldArithmeticPreCompute {
+            a,
+            b_or_imm,
+            c_or_imm,
+            e,
+            f,
+        };
+
+        Ok((a_is_imm, b_is_imm, local_opcode))
+    }
+}
+
+impl<F, A> Executor<F> for FieldArithmeticCoreExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<FieldArithmeticPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut FieldArithmeticPreCompute = data.borrow_mut();
+
+        let (a_is_imm, b_is_imm, local_opcode) = self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        let fn_ptr = match (local_opcode, a_is_imm, b_is_imm) {
+            (FieldArithmeticOpcode::ADD, true, true) => {
+                execute_e1_impl::<_, _, true, true, { FieldArithmeticOpcode::ADD as u8 }>
+            }
+            (FieldArithmeticOpcode::ADD, true, false) => {
+                execute_e1_impl::<_, _, true, false, { FieldArithmeticOpcode::ADD as u8 }>
+            }
+            (FieldArithmeticOpcode::ADD, false, true) => {
+                execute_e1_impl::<_, _, false, true, { FieldArithmeticOpcode::ADD as u8 }>
+            }
+            (FieldArithmeticOpcode::ADD, false, false) => {
+                execute_e1_impl::<_, _, false, false, { FieldArithmeticOpcode::ADD as u8 }>
+            }
+            (FieldArithmeticOpcode::SUB, true, true) => {
+                execute_e1_impl::<_, _, true, true, { FieldArithmeticOpcode::SUB as u8 }>
+            }
+            (FieldArithmeticOpcode::SUB, true, false) => {
+                execute_e1_impl::<_, _, true, false, { FieldArithmeticOpcode::SUB as u8 }>
+            }
+            (FieldArithmeticOpcode::SUB, false, true) => {
+                execute_e1_impl::<_, _, false, true, { FieldArithmeticOpcode::SUB as u8 }>
+            }
+            (FieldArithmeticOpcode::SUB, false, false) => {
+                execute_e1_impl::<_, _, false, false, { FieldArithmeticOpcode::SUB as u8 }>
+            }
+            (FieldArithmeticOpcode::MUL, true, true) => {
+                execute_e1_impl::<_, _, true, true, { FieldArithmeticOpcode::MUL as u8 }>
+            }
+            (FieldArithmeticOpcode::MUL, true, false) => {
+                execute_e1_impl::<_, _, true, false, { FieldArithmeticOpcode::MUL as u8 }>
+            }
+            (FieldArithmeticOpcode::MUL, false, true) => {
+                execute_e1_impl::<_, _, false, true, { FieldArithmeticOpcode::MUL as u8 }>
+            }
+            (FieldArithmeticOpcode::MUL, false, false) => {
+                execute_e1_impl::<_, _, false, false, { FieldArithmeticOpcode::MUL as u8 }>
+            }
+            (FieldArithmeticOpcode::DIV, true, true) => {
+                execute_e1_impl::<_, _, true, true, { FieldArithmeticOpcode::DIV as u8 }>
+            }
+            (FieldArithmeticOpcode::DIV, true, false) => {
+                execute_e1_impl::<_, _, true, false, { FieldArithmeticOpcode::DIV as u8 }>
+            }
+            (FieldArithmeticOpcode::DIV, false, true) => {
+                execute_e1_impl::<_, _, false, true, { FieldArithmeticOpcode::DIV as u8 }>
+            }
+            (FieldArithmeticOpcode::DIV, false, false) => {
+                execute_e1_impl::<_, _, false, false, { FieldArithmeticOpcode::DIV as u8 }>
+            }
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A> MeteredExecutor<F> for FieldArithmeticCoreExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<FieldArithmeticPreCompute>>()
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut E2PreCompute<FieldArithmeticPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        let (a_is_imm, b_is_imm, local_opcode) =
+            self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        let fn_ptr = match (local_opcode, a_is_imm, b_is_imm) {
+            (FieldArithmeticOpcode::ADD, true, true) => {
+                execute_e2_impl::<_, _, true, true, { FieldArithmeticOpcode::ADD as u8 }>
+            }
+            (FieldArithmeticOpcode::ADD, true, false) => {
+                execute_e2_impl::<_, _, true, false, { FieldArithmeticOpcode::ADD as u8 }>
+            }
+            (FieldArithmeticOpcode::ADD, false, true) => {
+                execute_e2_impl::<_, _, false, true, { FieldArithmeticOpcode::ADD as u8 }>
+            }
+            (FieldArithmeticOpcode::ADD, false, false) => {
+                execute_e2_impl::<_, _, false, false, { FieldArithmeticOpcode::ADD as u8 }>
+            }
+            (FieldArithmeticOpcode::SUB, true, true) => {
+                execute_e2_impl::<_, _, true, true, { FieldArithmeticOpcode::SUB as u8 }>
+            }
+            (FieldArithmeticOpcode::SUB, true, false) => {
+                execute_e2_impl::<_, _, true, false, { FieldArithmeticOpcode::SUB as u8 }>
+            }
+            (FieldArithmeticOpcode::SUB, false, true) => {
+                execute_e2_impl::<_, _, false, true, { FieldArithmeticOpcode::SUB as u8 }>
+            }
+            (FieldArithmeticOpcode::SUB, false, false) => {
+                execute_e2_impl::<_, _, false, false, { FieldArithmeticOpcode::SUB as u8 }>
+            }
+            (FieldArithmeticOpcode::MUL, true, true) => {
+                execute_e2_impl::<_, _, true, true, { FieldArithmeticOpcode::MUL as u8 }>
+            }
+            (FieldArithmeticOpcode::MUL, true, false) => {
+                execute_e2_impl::<_, _, true, false, { FieldArithmeticOpcode::MUL as u8 }>
+            }
+            (FieldArithmeticOpcode::MUL, false, true) => {
+                execute_e2_impl::<_, _, false, true, { FieldArithmeticOpcode::MUL as u8 }>
+            }
+            (FieldArithmeticOpcode::MUL, false, false) => {
+                execute_e2_impl::<_, _, false, false, { FieldArithmeticOpcode::MUL as u8 }>
+            }
+            (FieldArithmeticOpcode::DIV, true, true) => {
+                execute_e2_impl::<_, _, true, true, { FieldArithmeticOpcode::DIV as u8 }>
+            }
+            (FieldArithmeticOpcode::DIV, true, false) => {
+                execute_e2_impl::<_, _, true, false, { FieldArithmeticOpcode::DIV as u8 }>
+            }
+            (FieldArithmeticOpcode::DIV, false, true) => {
+                execute_e2_impl::<_, _, false, true, { FieldArithmeticOpcode::DIV as u8 }>
+            }
+            (FieldArithmeticOpcode::DIV, false, false) => {
+                execute_e2_impl::<_, _, false, false, { FieldArithmeticOpcode::DIV as u8 }>
+            }
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const A_IS_IMM: bool,
+    const B_IS_IMM: bool,
+    const OPCODE: u8,
+>(
+    pre_compute: &FieldArithmeticPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    // Read values based on the adapter logic
+    let b_val = if A_IS_IMM {
+        transmute_u32_to_field(&pre_compute.b_or_imm)
+    } else {
+        vm_state.vm_read::<F, 1>(pre_compute.e, pre_compute.b_or_imm)[0]
+    };
+    let c_val = if B_IS_IMM {
+        transmute_u32_to_field(&pre_compute.c_or_imm)
+    } else {
+        vm_state.vm_read::<F, 1>(pre_compute.f, pre_compute.c_or_imm)[0]
+    };
+
+    let a_val = match OPCODE {
+        0 => b_val + c_val, // ADD
+        1 => b_val - c_val, // SUB
+        2 => b_val * c_val, // MUL
+        3 => {
+            // DIV
+            if c_val.is_zero() {
+                vm_state.exit_code = Err(ExecutionError::Fail {
+                    pc: vm_state.pc,
+                    msg: "DivF divide by zero",
+                });
+                return;
+            }
+            b_val * c_val.inverse()
+        }
+        _ => panic!("Invalid field arithmetic opcode: {OPCODE}"),
+    };
+
+    vm_state.vm_write::<F, 1>(AS::Native as u32, pre_compute.a, &[a_val]);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const A_IS_IMM: bool,
+    const B_IS_IMM: bool,
+    const OPCODE: u8,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &FieldArithmeticPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, A_IS_IMM, B_IS_IMM, OPCODE>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const A_IS_IMM: bool,
+    const B_IS_IMM: bool,
+    const OPCODE: u8,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<FieldArithmeticPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, A_IS_IMM, B_IS_IMM, OPCODE>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/native/circuit/src/field_arithmetic/mod.rs b/extensions/native/circuit/src/field_arithmetic/mod.rs
index 865434cb37..04f0300649 100644
--- a/extensions/native/circuit/src/field_arithmetic/mod.rs
+++ b/extensions/native/circuit/src/field_arithmetic/mod.rs
@@ -1,13 +1,15 @@
 use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use crate::adapters::alu_native_adapter::{AluNativeAdapterAir, AluNativeAdapterChip};
-
-#[cfg(test)]
-mod tests;
+use crate::adapters::{AluNativeAdapterAir, AluNativeAdapterExecutor, AluNativeAdapterFiller};
 
 mod core;
+mod execution;
 pub use core::*;
 
+#[cfg(test)]
+mod tests;
+
 pub type FieldArithmeticAir = VmAirWrapper<AluNativeAdapterAir, FieldArithmeticCoreAir>;
+pub type FieldArithmeticExecutor = FieldArithmeticCoreExecutor<AluNativeAdapterExecutor>;
 pub type FieldArithmeticChip<F> =
-    VmChipWrapper<F, AluNativeAdapterChip<F>, FieldArithmeticCoreChip>;
+    VmChipWrapper<F, FieldArithmeticCoreFiller<AluNativeAdapterFiller>>;
diff --git a/extensions/native/circuit/src/field_arithmetic/tests.rs b/extensions/native/circuit/src/field_arithmetic/tests.rs
index 8e69f8c44b..06e0837d14 100644
--- a/extensions/native/circuit/src/field_arithmetic/tests.rs
+++ b/extensions/native/circuit/src/field_arithmetic/tests.rs
@@ -1,184 +1,254 @@
 use std::borrow::BorrowMut;
 
-use openvm_circuit::arch::testing::{memory::gen_pointer, VmChipTestBuilder};
+use openvm_circuit::arch::testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder};
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_native_compiler::FieldArithmeticOpcode;
+use openvm_native_compiler::{conversion::AS, FieldArithmeticOpcode};
 use openvm_stark_backend::{
+    p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
     utils::disable_debug_builder,
     verifier::VerificationError,
-    Chip,
 };
-use openvm_stark_sdk::{
-    config::baby_bear_poseidon2::BabyBearPoseidon2Engine, engine::StarkFriEngine,
-    p3_baby_bear::BabyBear, utils::create_seeded_rng,
-};
-use rand::Rng;
-use strum::EnumCount;
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
 use super::{
-    core::FieldArithmeticCoreChip, FieldArithmetic, FieldArithmeticChip, FieldArithmeticCoreCols,
+    FieldArithmeticChip, FieldArithmeticCoreAir, FieldArithmeticCoreCols, FieldArithmeticExecutor,
+};
+use crate::{
+    adapters::{AluNativeAdapterAir, AluNativeAdapterExecutor, AluNativeAdapterFiller},
+    field_arithmetic::{run_field_arithmetic, FieldArithmeticAir},
+    test_utils::write_native_or_imm,
+    FieldArithmeticCoreFiller,
 };
-use crate::adapters::alu_native_adapter::{AluNativeAdapterChip, AluNativeAdapterCols};
 
-#[test]
-fn new_field_arithmetic_air_test() {
-    let num_ops = 3; // non-power-of-2 to also test padding
-    let elem_range = || 1..=100;
-    let xy_address_space_range = || 0usize..=1;
-
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = FieldArithmeticChip::new(
-        AluNativeAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
+const MAX_INS_CAPACITY: usize = 128;
+type F = BabyBear;
+type Harness =
+    TestChipHarness<F, FieldArithmeticExecutor, FieldArithmeticAir, FieldArithmeticChip<F>>;
+
+fn create_test_chip(tester: &VmChipTestBuilder<F>) -> Harness {
+    let air = FieldArithmeticAir::new(
+        AluNativeAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        FieldArithmeticCoreAir::new(),
+    );
+    let executor = FieldArithmeticExecutor::new(AluNativeAdapterExecutor::new());
+    let chip = FieldArithmeticChip::<F>::new(
+        FieldArithmeticCoreFiller::new(AluNativeAdapterFiller),
+        tester.memory_helper(),
+    );
+
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
+}
+
+#[allow(clippy::too_many_arguments)]
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    opcode: FieldArithmeticOpcode,
+    b: Option<F>,
+    c: Option<F>,
+) {
+    let b_val = b.unwrap_or(rng.gen());
+    let c_val = c.unwrap_or(if opcode == FieldArithmeticOpcode::DIV {
+        // If division, make sure c is not zero
+        F::from_canonical_u32(rng.gen_range(0..F::NEG_ONE.as_canonical_u32()) + 1)
+    } else {
+        rng.gen()
+    });
+    assert!(!c_val.is_zero(), "Division by zero");
+    let (b, b_as) = write_native_or_imm(tester, rng, b_val, None);
+    let (c, c_as) = write_native_or_imm(tester, rng, c_val, None);
+    let a = gen_pointer(rng, 1);
+
+    tester.execute(
+        harness,
+        &Instruction::new(
+            opcode.global_opcode(),
+            F::from_canonical_usize(a),
+            b,
+            c,
+            F::from_canonical_usize(AS::Native as usize),
+            F::from_canonical_usize(b_as),
+            F::from_canonical_usize(c_as),
+            F::ZERO,
         ),
-        FieldArithmeticCoreChip::new(),
-        tester.offline_memory_mutex_arc(),
     );
 
+    let expected = run_field_arithmetic(opcode, b_val, c_val);
+    let result = tester.read::<1>(AS::Native as usize, a)[0];
+    assert_eq!(result, expected);
+}
+
+//////////////////////////////////////////////////////////////////////////////////////
+// POSITIVE TESTS
+//
+// Randomly generate computations and execute, ensuring that the generated trace
+// passes all constraints.
+//////////////////////////////////////////////////////////////////////////////////////
+#[test_case(FieldArithmeticOpcode::ADD, 100)]
+#[test_case(FieldArithmeticOpcode::SUB, 100)]
+#[test_case(FieldArithmeticOpcode::MUL, 100)]
+#[test_case(FieldArithmeticOpcode::DIV, 100)]
+fn new_field_arithmetic_air_test(opcode: FieldArithmeticOpcode, num_ops: usize) {
     let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
 
     for _ in 0..num_ops {
-        let opcode =
-            FieldArithmeticOpcode::from_usize(rng.gen_range(0..FieldArithmeticOpcode::COUNT));
+        set_and_execute(&mut tester, &mut harness, &mut rng, opcode, None, None);
+    }
 
-        let operand1 = BabyBear::from_canonical_u32(rng.gen_range(elem_range()));
-        let operand2 = BabyBear::from_canonical_u32(rng.gen_range(elem_range()));
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(F::ZERO),
+        None,
+    );
 
-        if opcode == FieldArithmeticOpcode::DIV && operand2.is_zero() {
-            continue;
-        }
+    let tester = tester.build().load(harness).finalize();
+    tester.simple_test().expect("Verification failed");
+}
 
-        let result_as = 4usize;
-        let as1 = rng.gen_range(xy_address_space_range()) * 4;
-        let as2 = rng.gen_range(xy_address_space_range()) * 4;
-        let address1 = if as1 == 0 {
-            operand1.as_canonical_u32() as usize
-        } else {
-            gen_pointer(&mut rng, 1)
-        };
-        let address2 = if as2 == 0 {
-            operand2.as_canonical_u32() as usize
-        } else {
-            gen_pointer(&mut rng, 1)
-        };
-        assert_ne!(address1, address2);
-        let result_address = gen_pointer(&mut rng, 1);
-
-        let result = FieldArithmetic::run_field_arithmetic(opcode, operand1, operand2).unwrap();
-        tracing::debug!(
-            "{opcode:?} d = {}, e = {}, f = {}, result_addr = {}, addr1 = {}, addr2 = {}, z = {}, x = {}, y = {}",
-            result_as, as1, as2, result_address, address1, address2, result, operand1, operand2,
-        );
-
-        if as1 != 0 {
-            tester.write_cell(as1, address1, operand1);
-        }
-        if as2 != 0 {
-            tester.write_cell(as2, address2, operand2);
-        }
-        tester.execute(
-            &mut chip,
-            &Instruction::from_usize(
-                opcode.global_opcode(),
-                [result_address, address1, address2, result_as, as1, as2],
-            ),
-        );
-        assert_eq!(result, tester.read_cell(result_as, result_address));
-    }
+//////////////////////////////////////////////////////////////////////////////////////
+// NEGATIVE TESTS
+//
+// Given a fake trace of a single operation, setup a chip and run the test. We replace
+// part of the trace and check that the chip throws the expected error.
+//////////////////////////////////////////////////////////////////////////////////////
 
-    let mut tester = tester.build().load(chip).finalize();
-    tester.simple_test().expect("Verification failed");
+#[derive(Default)]
+struct FieldExpressionPrankVals {
+    a: Option<F>,
+    b: Option<F>,
+    c: Option<F>,
+    opcode_flags: Option<[bool; 4]>,
+    divisor_inv: Option<F>,
+}
+#[allow(clippy::too_many_arguments)]
+fn run_negative_field_arithmetic_test(
+    opcode: FieldArithmeticOpcode,
+    b: F,
+    c: F,
+    prank_vals: FieldExpressionPrankVals,
+    error: VerificationError,
+) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
 
-    disable_debug_builder();
-    // negative test pranking each IO value
-    for height in 0..num_ops {
-        // TODO: better way to modify existing traces in tester
-        let arith_trace = tester.air_proof_inputs[2]
-            .1
-            .raw
-            .common_main
-            .as_mut()
-            .unwrap();
-        let old_trace = arith_trace.clone();
-        for width in 0..FieldArithmeticCoreCols::<BabyBear>::width() {
-            let prank_value = BabyBear::from_canonical_u32(rng.gen_range(1..=100));
-            arith_trace.row_mut(height)[width] = prank_value;
-        }
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(b),
+        Some(c),
+    );
 
-        // Run a test after pranking each row
-        assert_eq!(
-            tester.simple_test().err(),
-            Some(VerificationError::OodEvaluationMismatch),
-            "Expected constraint to fail"
-        );
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+    let modify_trace = |trace: &mut DenseMatrix<F>| {
+        let mut values = trace.row_slice(0).to_vec();
+        let cols: &mut FieldArithmeticCoreCols<F> =
+            values.split_at_mut(adapter_width).1.borrow_mut();
+        if let Some(a) = prank_vals.a {
+            cols.a = a;
+        }
+        if let Some(b) = prank_vals.b {
+            cols.b = b;
+        }
+        if let Some(c) = prank_vals.c {
+            cols.c = c;
+        }
+        if let Some(opcode_flags) = prank_vals.opcode_flags {
+            [cols.is_add, cols.is_sub, cols.is_mul, cols.is_div] = opcode_flags.map(F::from_bool);
+        }
+        if let Some(divisor_inv) = prank_vals.divisor_inv {
+            cols.divisor_inv = divisor_inv;
+        }
+        *trace = RowMajorMatrix::new(values, trace.width());
+    };
 
-        tester.air_proof_inputs[2].1.raw.common_main = Some(old_trace);
-    }
+    disable_debug_builder();
+    let tester = tester
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .finalize();
+    tester.simple_test_with_expected_error(error);
 }
 
 #[test]
-fn new_field_arithmetic_air_zero_div_zero() {
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = FieldArithmeticChip::new(
-        AluNativeAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        FieldArithmeticCoreChip::new(),
-        tester.offline_memory_mutex_arc(),
+fn field_arithmetic_negative_zero_div_test() {
+    run_negative_field_arithmetic_test(
+        FieldArithmeticOpcode::DIV,
+        F::from_canonical_u32(111),
+        F::from_canonical_u32(222),
+        FieldExpressionPrankVals {
+            b: Some(F::ZERO),
+            ..Default::default()
+        },
+        VerificationError::OodEvaluationMismatch,
     );
-    tester.write_cell(4, 6, BabyBear::from_canonical_u32(111));
-    tester.write_cell(4, 7, BabyBear::from_canonical_u32(222));
 
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(
-            FieldArithmeticOpcode::DIV.global_opcode(),
-            [5, 6, 7, 4, 4, 4],
-        ),
+    run_negative_field_arithmetic_test(
+        FieldArithmeticOpcode::DIV,
+        F::ZERO,
+        F::TWO,
+        FieldExpressionPrankVals {
+            c: Some(F::ZERO),
+            ..Default::default()
+        },
+        VerificationError::OodEvaluationMismatch,
     );
-    tester.build();
-
-    let chip_air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    // set the value of [c]_f to zero, necessary to bypass trace gen checks
-    let row = chip_input.raw.common_main.as_mut().unwrap().row_mut(0);
-    let cols: &mut FieldArithmeticCoreCols<BabyBear> = row
-        .split_at_mut(AluNativeAdapterCols::<BabyBear>::width())
-        .1
-        .borrow_mut();
-    cols.b = BabyBear::ZERO;
 
-    disable_debug_builder();
+    run_negative_field_arithmetic_test(
+        FieldArithmeticOpcode::DIV,
+        F::ZERO,
+        F::TWO,
+        FieldExpressionPrankVals {
+            c: Some(F::ZERO),
+            opcode_flags: Some([false, false, true, false]),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
+}
 
-    assert_eq!(
-        BabyBearPoseidon2Engine::run_test_fast(vec![chip_air], vec![chip_input]).err(),
-        Some(VerificationError::OodEvaluationMismatch),
-        "Expected constraint to fail"
+#[test]
+fn field_arithmetic_negative_rand() {
+    let mut rng = create_seeded_rng();
+    run_negative_field_arithmetic_test(
+        FieldArithmeticOpcode::DIV,
+        F::from_canonical_u32(111),
+        F::from_canonical_u32(222),
+        FieldExpressionPrankVals {
+            a: Some(rng.gen()),
+            b: Some(rng.gen()),
+            c: Some(rng.gen()),
+            opcode_flags: Some([rng.gen(), rng.gen(), rng.gen(), rng.gen()]),
+            divisor_inv: Some(rng.gen()),
+        },
+        VerificationError::OodEvaluationMismatch,
     );
 }
 
 #[should_panic]
 #[test]
 fn new_field_arithmetic_air_test_panic() {
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = FieldArithmeticChip::new(
-        AluNativeAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        FieldArithmeticCoreChip::new(),
-        tester.offline_memory_mutex_arc(),
-    );
-    tester.write_cell(4, 0, BabyBear::ZERO);
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
+    tester.write(4, 0, [BabyBear::ZERO]);
     // should panic
     tester.execute(
-        &mut chip,
+        &mut harness,
         &Instruction::from_usize(
             FieldArithmeticOpcode::DIV.global_opcode(),
             [0, 0, 0, 4, 4, 4],
diff --git a/extensions/native/circuit/src/field_extension/core.rs b/extensions/native/circuit/src/field_extension/core.rs
index d8c83fabdd..692b8b35be 100644
--- a/extensions/native/circuit/src/field_extension/core.rs
+++ b/extensions/native/circuit/src/field_extension/core.rs
@@ -5,12 +5,13 @@ use std::{
 };
 
 use itertools::izip;
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_native_compiler::FieldExtensionOpcode::{self, *};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -18,7 +19,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
 
 pub const BETA: usize = 11;
 pub const EXT_DEG: usize = 4;
@@ -34,11 +34,11 @@ pub struct FieldExtensionCoreCols<T> {
     pub is_sub: T,
     pub is_mul: T,
     pub is_div: T,
-    /// `divisor_inv` is y.inverse() when opcode is FDIV and zero otherwise.
+    /// `divisor_inv` is z.inverse() when opcode is FDIV and zero otherwise.
     pub divisor_inv: [T; EXT_DEG],
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(derive_new::new, Copy, Clone, Debug)]
 pub struct FieldExtensionCoreAir {}
 
 impl<F: Field> BaseAir<F> for FieldExtensionCoreAir {
@@ -78,8 +78,8 @@ where
         // - Each flag in `flags` is a boolean.
         // - Exactly one flag in `flags` is true.
         // - The inner product of the `flags` and `opcodes` equals `io.opcode`.
-        // - The inner product of the `flags` and `results[:,j]` equals `io.z[j]` for each `j`.
-        // - If `is_div` is true, then `aux.divisor_inv` correctly represents the inverse of `io.y`.
+        // - The inner product of the `flags` and `results[:,j]` equals `io.x[j]` for each `j`.
+        // - If `is_div` is true, then `aux.divisor_inv` correctly represents the inverse of `io.z`.
 
         let mut is_valid = AB::Expr::ZERO;
         let mut expected_opcode = AB::Expr::ZERO;
@@ -133,116 +133,122 @@ where
 }
 
 #[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
+#[derive(AlignedBytesBorrow, Debug)]
 pub struct FieldExtensionRecord<F> {
-    pub opcode: FieldExtensionOpcode,
-    pub x: [F; EXT_DEG],
     pub y: [F; EXT_DEG],
     pub z: [F; EXT_DEG],
+    pub local_opcode: u8,
 }
 
-pub struct FieldExtensionCoreChip {
-    pub air: FieldExtensionCoreAir,
+#[derive(derive_new::new, Clone, Copy)]
+pub struct FieldExtensionCoreExecutor<A> {
+    adapter: A,
 }
 
-impl FieldExtensionCoreChip {
-    pub fn new() -> Self {
-        Self {
-            air: FieldExtensionCoreAir {},
-        }
-    }
-}
-
-impl Default for FieldExtensionCoreChip {
-    fn default() -> Self {
-        Self::new()
-    }
+#[derive(derive_new::new)]
+pub struct FieldExtensionCoreFiller<A> {
+    adapter: A,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>> VmCoreChip<F, I> for FieldExtensionCoreChip
+impl<F, A, RA> PreflightExecutor<F, RA> for FieldExtensionCoreExecutor<A>
 where
-    I::Reads: Into<[[F; EXT_DEG]; 2]>,
-    I::Writes: From<[[F; EXT_DEG]; 1]>,
+    F: PrimeField32,
+    A: 'static + AdapterTraceExecutor<F, ReadData = [[F; EXT_DEG]; 2], WriteData = [F; EXT_DEG]>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut FieldExtensionRecord<F>),
+    >,
 {
-    type Record = FieldExtensionRecord<F>;
-    type Air = FieldExtensionCoreAir;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            FieldExtensionOpcode::from_usize(opcode - FieldExtensionOpcode::CLASS_OFFSET)
+        )
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let Instruction { opcode, .. } = instruction;
-        let local_opcode_idx = opcode.local_opcode_idx(FieldExtensionOpcode::CLASS_OFFSET);
+    ) -> Result<(), ExecutionError> {
+        let &Instruction { opcode, .. } = instruction;
 
-        let data: [[F; EXT_DEG]; 2] = reads.into();
-        let y: [F; EXT_DEG] = data[0];
-        let z: [F; EXT_DEG] = data[1];
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        let x = FieldExtension::solve(FieldExtensionOpcode::from_usize(local_opcode_idx), y, z)
-            .unwrap();
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        let output = AdapterRuntimeContext {
-            to_pc: None,
-            writes: [x].into(),
-        };
+        core_record.local_opcode =
+            opcode.local_opcode_idx(FieldExtensionOpcode::CLASS_OFFSET) as u8;
 
-        let record = Self::Record {
-            opcode: FieldExtensionOpcode::from_usize(local_opcode_idx),
-            x,
-            y,
-            z,
-        };
+        [core_record.y, core_record.z] =
+            self.adapter
+                .read(state.memory, instruction, &mut adapter_record);
 
-        Ok((output, record))
-    }
+        let x = run_field_extension(
+            FieldExtensionOpcode::from_usize(core_record.local_opcode as usize),
+            core_record.y,
+            core_record.z,
+        );
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            FieldExtensionOpcode::from_usize(opcode - FieldExtensionOpcode::CLASS_OFFSET)
-        )
+        self.adapter
+            .write(state.memory, instruction, x, &mut adapter_record);
+
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
     }
+}
+
+impl<F, A> TraceFiller<F> for FieldExtensionCoreFiller<A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: &FieldExtensionRecord<F> = unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut FieldExtensionCoreCols<_> = core_row.borrow_mut();
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let FieldExtensionRecord { opcode, x, y, z } = record;
-        let cols: &mut FieldExtensionCoreCols<_> = row_slice.borrow_mut();
-        cols.x = x;
-        cols.y = y;
-        cols.z = z;
-        cols.is_add = F::from_bool(opcode == FieldExtensionOpcode::FE4ADD);
-        cols.is_sub = F::from_bool(opcode == FieldExtensionOpcode::FE4SUB);
-        cols.is_mul = F::from_bool(opcode == FieldExtensionOpcode::BBE4MUL);
-        cols.is_div = F::from_bool(opcode == FieldExtensionOpcode::BBE4DIV);
-        cols.divisor_inv = if opcode == FieldExtensionOpcode::BBE4DIV {
-            FieldExtension::invert(z)
+        // Writing in reverse order to avoid overwriting the `record`
+        let opcode = FieldExtensionOpcode::from_usize(record.local_opcode as usize);
+        if opcode == FieldExtensionOpcode::BBE4DIV {
+            core_row.divisor_inv = FieldExtension::invert(record.z);
         } else {
-            [F::ZERO; EXT_DEG]
-        };
-    }
+            core_row.divisor_inv = [F::ZERO; EXT_DEG];
+        }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.is_div = F::from_bool(opcode == FieldExtensionOpcode::BBE4DIV);
+        core_row.is_mul = F::from_bool(opcode == FieldExtensionOpcode::BBE4MUL);
+        core_row.is_sub = F::from_bool(opcode == FieldExtensionOpcode::FE4SUB);
+        core_row.is_add = F::from_bool(opcode == FieldExtensionOpcode::FE4ADD);
+
+        core_row.z = record.z;
+        core_row.y = record.y;
+        core_row.x = run_field_extension(opcode, core_row.y, core_row.z);
     }
 }
 
-pub struct FieldExtension;
-impl FieldExtension {
-    pub(super) fn solve<F: Field>(
-        opcode: FieldExtensionOpcode,
-        x: [F; EXT_DEG],
-        y: [F; EXT_DEG],
-    ) -> Option<[F; EXT_DEG]> {
-        match opcode {
-            FieldExtensionOpcode::FE4ADD => Some(Self::add(x, y)),
-            FieldExtensionOpcode::FE4SUB => Some(Self::subtract(x, y)),
-            FieldExtensionOpcode::BBE4MUL => Some(Self::multiply(x, y)),
-            FieldExtensionOpcode::BBE4DIV => Some(Self::divide(x, y)),
-        }
+// Returns the result of the field extension operation.
+// Will panic if divide by zero.
+pub(super) fn run_field_extension<F: Field>(
+    opcode: FieldExtensionOpcode,
+    y: [F; EXT_DEG],
+    z: [F; EXT_DEG],
+) -> [F; EXT_DEG] {
+    match opcode {
+        FieldExtensionOpcode::FE4ADD => FieldExtension::add(y, z),
+        FieldExtensionOpcode::FE4SUB => FieldExtension::subtract(y, z),
+        FieldExtensionOpcode::BBE4MUL => FieldExtension::multiply(y, z),
+        FieldExtensionOpcode::BBE4DIV => FieldExtension::divide(y, z),
     }
+}
 
+pub(crate) struct FieldExtension;
+
+impl FieldExtension {
     pub(crate) fn add<V, E>(x: [V; EXT_DEG], y: [V; EXT_DEG]) -> [E; EXT_DEG]
     where
         V: Copy,
diff --git a/extensions/native/circuit/src/field_extension/execution.rs b/extensions/native/circuit/src/field_extension/execution.rs
new file mode 100644
index 0000000000..7b4802987e
--- /dev/null
+++ b/extensions/native/circuit/src/field_extension/execution.rs
@@ -0,0 +1,165 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
+use openvm_native_compiler::{conversion::AS, FieldExtensionOpcode};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::{FieldExtension, FieldExtensionCoreExecutor, EXT_DEG};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct FieldExtensionPreCompute {
+    a: u32,
+    b: u32,
+    c: u32,
+}
+
+impl<A> FieldExtensionCoreExecutor<A> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut FieldExtensionPreCompute,
+    ) -> Result<u8, StaticProgramError> {
+        let &Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+
+        let local_opcode = FieldExtensionOpcode::from_usize(
+            opcode.local_opcode_idx(FieldExtensionOpcode::CLASS_OFFSET),
+        );
+
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        let c = c.as_canonical_u32();
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+
+        if d != AS::Native as u32 {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        if e != AS::Native as u32 {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        *data = FieldExtensionPreCompute { a, b, c };
+
+        Ok(local_opcode as u8)
+    }
+}
+
+impl<F, A> Executor<F> for FieldExtensionCoreExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<FieldExtensionPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut FieldExtensionPreCompute = data.borrow_mut();
+
+        let opcode = self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        let fn_ptr = match opcode {
+            0 => execute_e1_impl::<_, _, 0>, // FE4ADD
+            1 => execute_e1_impl::<_, _, 1>, // FE4SUB
+            2 => execute_e1_impl::<_, _, 2>, // BBE4MUL
+            3 => execute_e1_impl::<_, _, 3>, // BBE4DIV
+            _ => panic!("Invalid field extension opcode: {opcode}"),
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A> MeteredExecutor<F> for FieldExtensionCoreExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<FieldExtensionPreCompute>>()
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut E2PreCompute<FieldExtensionPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        let opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        let fn_ptr = match opcode {
+            0 => execute_e2_impl::<_, _, 0>, // FE4ADD
+            1 => execute_e2_impl::<_, _, 1>, // FE4SUB
+            2 => execute_e2_impl::<_, _, 2>, // BBE4MUL
+            3 => execute_e2_impl::<_, _, 3>, // BBE4DIV
+            _ => panic!("Invalid field extension opcode: {opcode}"),
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const OPCODE: u8>(
+    pre_compute: &FieldExtensionPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let y: [F; EXT_DEG] = vm_state.vm_read::<F, EXT_DEG>(AS::Native as u32, pre_compute.b);
+    let z: [F; EXT_DEG] = vm_state.vm_read::<F, EXT_DEG>(AS::Native as u32, pre_compute.c);
+
+    let x = match OPCODE {
+        0 => FieldExtension::add(y, z),      // FE4ADD
+        1 => FieldExtension::subtract(y, z), // FE4SUB
+        2 => FieldExtension::multiply(y, z), // BBE4MUL
+        3 => FieldExtension::divide(y, z),   // BBE4DIV
+        _ => panic!("Invalid field extension opcode: {OPCODE}"),
+    };
+
+    vm_state.vm_write(AS::Native as u32, pre_compute.a, &x);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const OPCODE: u8>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &FieldExtensionPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, OPCODE>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, const OPCODE: u8>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<FieldExtensionPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, OPCODE>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/native/circuit/src/field_extension/mod.rs b/extensions/native/circuit/src/field_extension/mod.rs
index d109deb528..cb15ef75f7 100644
--- a/extensions/native/circuit/src/field_extension/mod.rs
+++ b/extensions/native/circuit/src/field_extension/mod.rs
@@ -1,16 +1,19 @@
 use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::native_vectorized_adapter::{
-    NativeVectorizedAdapterAir, NativeVectorizedAdapterChip,
+use crate::adapters::{
+    NativeVectorizedAdapterAir, NativeVectorizedAdapterExecutor, NativeVectorizedAdapterFiller,
 };
 
-#[cfg(test)]
-mod tests;
-
 mod core;
+mod execution;
 pub use core::*;
 
+#[cfg(test)]
+mod tests;
+
 pub type FieldExtensionAir =
     VmAirWrapper<NativeVectorizedAdapterAir<EXT_DEG>, FieldExtensionCoreAir>;
+pub type FieldExtensionExecutor =
+    FieldExtensionCoreExecutor<NativeVectorizedAdapterExecutor<EXT_DEG>>;
 pub type FieldExtensionChip<F> =
-    VmChipWrapper<F, NativeVectorizedAdapterChip<F, EXT_DEG>, FieldExtensionCoreChip>;
+    VmChipWrapper<F, FieldExtensionCoreFiller<NativeVectorizedAdapterFiller<EXT_DEG>>>;
diff --git a/extensions/native/circuit/src/field_extension/tests.rs b/extensions/native/circuit/src/field_extension/tests.rs
index 66d6c94004..afe6b649ba 100644
--- a/extensions/native/circuit/src/field_extension/tests.rs
+++ b/extensions/native/circuit/src/field_extension/tests.rs
@@ -1,102 +1,228 @@
 use std::{
     array,
+    borrow::BorrowMut,
     ops::{Add, Div, Mul, Sub},
 };
 
-use openvm_circuit::arch::testing::{memory::gen_pointer, VmChipTestBuilder};
+use openvm_circuit::arch::testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder};
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_native_compiler::FieldExtensionOpcode;
+use openvm_native_compiler::{conversion::AS, FieldExtensionOpcode};
 use openvm_stark_backend::{
+    p3_air::BaseAir,
     p3_field::{extension::BinomialExtensionField, FieldAlgebra, FieldExtensionAlgebra},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
     utils::disable_debug_builder,
     verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::Rng;
-use strum::EnumCount;
+use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::{
-    super::adapters::native_vectorized_adapter::NativeVectorizedAdapterChip, FieldExtension,
-    FieldExtensionChip, FieldExtensionCoreChip,
+use crate::{
+    adapters::{
+        NativeVectorizedAdapterAir, NativeVectorizedAdapterExecutor, NativeVectorizedAdapterFiller,
+    },
+    field_extension::run_field_extension,
+    test_utils::write_native_array,
+    FieldExtension, FieldExtensionAir, FieldExtensionChip, FieldExtensionCoreAir,
+    FieldExtensionCoreCols, FieldExtensionCoreFiller, FieldExtensionExecutor, EXT_DEG,
 };
 
-#[test]
-fn new_field_extension_air_test() {
-    type F = BabyBear;
+const MAX_INS_CAPACITY: usize = 128;
+type F = BabyBear;
+type Harness = TestChipHarness<F, FieldExtensionExecutor, FieldExtensionAir, FieldExtensionChip<F>>;
+
+fn create_test_chip(tester: &VmChipTestBuilder<F>) -> Harness {
+    let air = FieldExtensionAir::new(
+        NativeVectorizedAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        FieldExtensionCoreAir::new(),
+    );
+    let executor = FieldExtensionExecutor::new(NativeVectorizedAdapterExecutor::new());
+    let chip = FieldExtensionChip::<F>::new(
+        FieldExtensionCoreFiller::new(NativeVectorizedAdapterFiller),
+        tester.memory_helper(),
+    );
 
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = FieldExtensionChip::new(
-        NativeVectorizedAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
+}
+
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    opcode: FieldExtensionOpcode,
+    y: Option<[F; EXT_DEG]>,
+    z: Option<[F; EXT_DEG]>,
+) {
+    let (y_val, y_ptr) = write_native_array(tester, rng, y);
+    let (z_val, z_ptr) = write_native_array(tester, rng, z);
+
+    let x_ptr = gen_pointer(rng, EXT_DEG);
+
+    tester.execute(
+        harness,
+        &Instruction::from_usize(
+            opcode.global_opcode(),
+            [
+                x_ptr,
+                y_ptr,
+                z_ptr,
+                AS::Native as usize,
+                AS::Native as usize,
+            ],
         ),
-        FieldExtensionCoreChip::new(),
-        tester.offline_memory_mutex_arc(),
     );
-    let trace_width = chip.trace_width();
 
+    let result = tester.read::<EXT_DEG>(AS::Native as usize, x_ptr);
+    let expected = run_field_extension(opcode, y_val, z_val);
+    assert_eq!(result, expected);
+}
+
+///////////////////////////////////////////////////////////////////////////////////////
+/// POSITIVE TESTS
+///
+/// Randomly generate computations and execute, ensuring that the generated trace
+/// passes all constraints.
+///////////////////////////////////////////////////////////////////////////////////////
+
+#[test_case(FieldExtensionOpcode::FE4ADD, 100)]
+#[test_case(FieldExtensionOpcode::FE4SUB, 100)]
+#[test_case(FieldExtensionOpcode::BBE4MUL, 100)]
+#[test_case(FieldExtensionOpcode::BBE4DIV, 100)]
+fn rand_field_extension_test(opcode: FieldExtensionOpcode, num_ops: usize) {
     let mut rng = create_seeded_rng();
-    let num_ops: usize = 7; // test padding with dummy row
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
 
     for _ in 0..num_ops {
-        let opcode =
-            FieldExtensionOpcode::from_usize(rng.gen_range(0..FieldExtensionOpcode::COUNT));
+        set_and_execute(&mut tester, &mut harness, &mut rng, opcode, None, None);
+    }
 
-        let as_d = 4usize;
-        let as_e = 4usize;
-        let address1 = gen_pointer(&mut rng, 4);
-        let address2 = gen_pointer(&mut rng, 4);
-        let result_address = gen_pointer(&mut rng, 4);
+    let tester = tester.build().load(harness).finalize();
+    tester.simple_test().expect("Verification failed");
+}
 
-        let operand1 = array::from_fn(|_| rng.gen::<F>());
-        let operand2 = array::from_fn(|_| rng.gen::<F>());
+//////////////////////////////////////////////////////////////////////////////////////
+// NEGATIVE TESTS
+//
+// Given a fake trace of a single operation, setup a chip and run the test. We replace
+// part of the trace and check that the chip throws the expected error.
+//////////////////////////////////////////////////////////////////////////////////////
 
-        assert!(address1.abs_diff(address2) >= 4);
+#[derive(Clone, Copy, Default)]
+struct FieldExtensionPrankValues {
+    pub x: Option<[F; EXT_DEG]>,
+    pub y: Option<[F; EXT_DEG]>,
+    pub z: Option<[F; EXT_DEG]>,
+    pub opcode_flags: Option<[bool; 4]>,
+    pub divisor_inv: Option<[F; EXT_DEG]>,
+}
 
-        tester.write(as_d, address1, operand1);
-        tester.write(as_e, address2, operand2);
+fn run_negative_field_extension_test(
+    opcode: FieldExtensionOpcode,
+    y: Option<[F; EXT_DEG]>,
+    z: Option<[F; EXT_DEG]>,
+    prank_vals: FieldExtensionPrankValues,
+    error: VerificationError,
+) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
+    set_and_execute(&mut tester, &mut harness, &mut rng, opcode, y, z);
 
-        let result = FieldExtension::solve(opcode, operand1, operand2).unwrap();
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+    let modify_trace = |trace: &mut DenseMatrix<F>| {
+        let mut values = trace.row_slice(0).to_vec();
+        let core_cols: &mut FieldExtensionCoreCols<F> =
+            values.split_at_mut(adapter_width).1.borrow_mut();
 
-        tester.execute(
-            &mut chip,
-            &Instruction::from_usize(
-                opcode.global_opcode(),
-                [result_address, address1, address2, as_d, as_e],
-            ),
-        );
-        assert_eq!(result, tester.read(as_d, result_address));
-    }
+        if let Some(x) = prank_vals.x {
+            core_cols.x = x;
+        }
+        if let Some(y) = prank_vals.y {
+            core_cols.y = y;
+        }
+        if let Some(z) = prank_vals.z {
+            core_cols.z = z;
+        }
+        if let Some(opcode_flags) = prank_vals.opcode_flags {
+            [
+                core_cols.is_add,
+                core_cols.is_sub,
+                core_cols.is_mul,
+                core_cols.is_div,
+            ] = opcode_flags.map(F::from_bool);
+        }
+        if let Some(divisor_inv) = prank_vals.divisor_inv {
+            core_cols.divisor_inv = divisor_inv;
+        }
 
-    // positive test
-    let mut tester = tester.build().load(chip).finalize();
-    tester.simple_test().expect("Verification failed");
+        *trace = RowMajorMatrix::new(values, trace.width());
+    };
 
     disable_debug_builder();
-    // negative test pranking each IO value
-    for height in [0, num_ops - 1] {
-        // TODO: better way to modify existing traces in tester
-        let extension_trace = tester.air_proof_inputs[2]
-            .1
-            .raw
-            .common_main
-            .as_mut()
-            .unwrap();
-        let original_trace = extension_trace.clone();
-        for width in 0..trace_width {
-            let prank_value = BabyBear::from_canonical_u32(rng.gen_range(1..=100));
-            extension_trace.row_mut(height)[width] = prank_value;
-        }
+    let tester = tester
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .finalize();
+    tester.simple_test_with_expected_error(error);
+}
 
-        assert_eq!(
-            tester.simple_test().err(),
-            Some(VerificationError::OodEvaluationMismatch),
-            "Expected constraint to fail"
-        );
-        tester.air_proof_inputs[2].1.raw.common_main = Some(original_trace);
-    }
+#[test]
+fn rand_negative_field_extension_test() {
+    let mut rng = create_seeded_rng();
+    run_negative_field_extension_test(
+        FieldExtensionOpcode::FE4ADD,
+        None,
+        None,
+        FieldExtensionPrankValues {
+            x: Some(array::from_fn(|_| rng.gen::<F>())),
+            y: Some(array::from_fn(|_| rng.gen::<F>())),
+            z: Some(array::from_fn(|_| rng.gen::<F>())),
+            opcode_flags: Some(array::from_fn(|_| rng.gen_bool(0.5))),
+            divisor_inv: Some(array::from_fn(|_| rng.gen::<F>())),
+        },
+        VerificationError::OodEvaluationMismatch,
+    );
+}
+
+#[test]
+fn field_extension_negative_tests() {
+    run_negative_field_extension_test(
+        FieldExtensionOpcode::BBE4DIV,
+        None,
+        None,
+        FieldExtensionPrankValues {
+            z: Some([F::ZERO; EXT_DEG]),
+            ..Default::default()
+        },
+        VerificationError::OodEvaluationMismatch,
+    );
+
+    run_negative_field_extension_test(
+        FieldExtensionOpcode::BBE4DIV,
+        None,
+        None,
+        FieldExtensionPrankValues {
+            divisor_inv: Some([F::ZERO; EXT_DEG]),
+            ..Default::default()
+        },
+        VerificationError::OodEvaluationMismatch,
+    );
+
+    run_negative_field_extension_test(
+        FieldExtensionOpcode::BBE4MUL,
+        Some([F::ZERO; EXT_DEG]),
+        None,
+        FieldExtensionPrankValues {
+            z: Some([F::ZERO; EXT_DEG]),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
 }
 
 #[test]
diff --git a/extensions/native/circuit/src/fri/execution.rs b/extensions/native/circuit/src/fri/execution.rs
new file mode 100644
index 0000000000..7af4034ed9
--- /dev/null
+++ b/extensions/native/circuit/src/fri/execution.rs
@@ -0,0 +1,193 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP};
+use openvm_native_compiler::conversion::AS;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::{elem_to_ext, FriReducedOpeningExecutor};
+use crate::field_extension::{FieldExtension, EXT_DEG};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct FriReducedOpeningPreCompute {
+    a_ptr_ptr: u32,
+    b_ptr_ptr: u32,
+    length_ptr: u32,
+    alpha_ptr: u32,
+    result_ptr: u32,
+    hint_id_ptr: u32,
+    is_init_ptr: u32,
+}
+
+impl FriReducedOpeningExecutor {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut FriReducedOpeningPreCompute,
+    ) -> Result<(), StaticProgramError> {
+        let &Instruction {
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g,
+            ..
+        } = inst;
+
+        let a_ptr_ptr = a.as_canonical_u32();
+        let b_ptr_ptr = b.as_canonical_u32();
+        let length_ptr = c.as_canonical_u32();
+        let alpha_ptr = d.as_canonical_u32();
+        let result_ptr = e.as_canonical_u32();
+        let hint_id_ptr = f.as_canonical_u32();
+        let is_init_ptr = g.as_canonical_u32();
+
+        *data = FriReducedOpeningPreCompute {
+            a_ptr_ptr,
+            b_ptr_ptr,
+            length_ptr,
+            alpha_ptr,
+            result_ptr,
+            hint_id_ptr,
+            is_init_ptr,
+        };
+
+        Ok(())
+    }
+}
+
+impl<F> Executor<F> for FriReducedOpeningExecutor
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<FriReducedOpeningPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut FriReducedOpeningPreCompute = data.borrow_mut();
+
+        self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        let fn_ptr = execute_e1_impl;
+        Ok(fn_ptr)
+    }
+}
+
+impl<F> MeteredExecutor<F> for FriReducedOpeningExecutor
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<FriReducedOpeningPreCompute>>()
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut E2PreCompute<FriReducedOpeningPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        let fn_ptr = execute_e2_impl;
+        Ok(fn_ptr)
+    }
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &FriReducedOpeningPreCompute = pre_compute.borrow();
+    execute_e12_impl(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<FriReducedOpeningPreCompute> = pre_compute.borrow();
+    let height = execute_e12_impl(&pre_compute.data, vm_state);
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, height);
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &FriReducedOpeningPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) -> u32 {
+    let alpha = vm_state.vm_read(AS::Native as u32, pre_compute.alpha_ptr);
+
+    let [length]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.length_ptr);
+    let length = length.as_canonical_u32() as usize;
+
+    let [a_ptr]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.a_ptr_ptr);
+    let [b_ptr]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.b_ptr_ptr);
+
+    let [is_init_read]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.is_init_ptr);
+    let is_init = is_init_read.as_canonical_u32();
+
+    let [hint_id_f]: [F; 1] = vm_state.host_read(AS::Native as u32, pre_compute.hint_id_ptr);
+    let hint_id = hint_id_f.as_canonical_u32() as usize;
+
+    let data = if is_init == 0 {
+        let hint_steam = &mut vm_state.streams.hint_space[hint_id];
+        hint_steam.drain(0..length).collect()
+    } else {
+        vec![]
+    };
+
+    let mut as_and_bs = Vec::with_capacity(length);
+    #[allow(clippy::needless_range_loop)]
+    for i in 0..length {
+        let a_ptr_i = (a_ptr + F::from_canonical_usize(i)).as_canonical_u32();
+        let [a]: [F; 1] = if is_init == 0 {
+            vm_state.vm_write(AS::Native as u32, a_ptr_i, &[data[i]]);
+            [data[i]]
+        } else {
+            vm_state.vm_read(AS::Native as u32, a_ptr_i)
+        };
+        let b_ptr_i = (b_ptr + F::from_canonical_usize(EXT_DEG * i)).as_canonical_u32();
+        let b = vm_state.vm_read(AS::Native as u32, b_ptr_i);
+
+        as_and_bs.push((a, b));
+    }
+
+    let mut result = [F::ZERO; EXT_DEG];
+    for (a, b) in as_and_bs.into_iter().rev() {
+        // result = result * alpha + (b - a)
+        result = FieldExtension::add(
+            FieldExtension::multiply(result, alpha),
+            FieldExtension::subtract(b, elem_to_ext(a)),
+        );
+    }
+
+    vm_state.vm_write(AS::Native as u32, pre_compute.result_ptr, &result);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+
+    length as u32 + 2
+}
diff --git a/extensions/native/circuit/src/fri/mod.rs b/extensions/native/circuit/src/fri/mod.rs
index 7dbc3fd851..e7ce39def9 100644
--- a/extensions/native/circuit/src/fri/mod.rs
+++ b/extensions/native/circuit/src/fri/mod.rs
@@ -2,38 +2,35 @@ use core::ops::Deref;
 use std::{
     borrow::{Borrow, BorrowMut},
     mem::offset_of,
-    sync::{Arc, Mutex},
 };
 
-use itertools::{zip_eq, Itertools};
+use itertools::zip_eq;
 use openvm_circuit::{
-    arch::{
-        ExecutionBridge, ExecutionBus, ExecutionError, ExecutionState, InstructionExecutor, Streams,
-    },
+    arch::*,
     system::{
         memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryAuxColsFactory, MemoryController, OfflineMemory, RecordId,
+            offline_checker::{
+                MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+                MemoryWriteAuxRecord,
+            },
+            online::TracingMemory,
+            MemoryAddress, MemoryAuxColsFactory,
         },
-        program::ProgramBus,
+        native_adapter::util::{memory_read_native, tracing_read_native, tracing_write_native},
     },
 };
-use openvm_circuit_primitives::utils::next_power_of_two_or_zero;
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_native_compiler::{conversion::AS, FriOpcode::FRI_REDUCED_OPENING};
 use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
     interaction::InteractionBuilder,
     p3_air::{Air, AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
     p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
     rap::{BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
 };
-use serde::{Deserialize, Serialize};
 use static_assertions::const_assert_eq;
 
 use crate::{
@@ -41,6 +38,8 @@ use crate::{
     utils::const_max,
 };
 
+mod execution;
+
 #[cfg(test)]
 mod tests;
 
@@ -219,8 +218,8 @@ const INSTRUCTION_READS: usize = 5;
 /// it starts with a Workload row (T1) and ends with either a Disabled or Instruction2 row (T7).
 /// The other transition constraints then ensure the proper state transitions from Workload to
 /// Instruction2.
-#[derive(Copy, Clone, Debug)]
-struct FriReducedOpeningAir {
+#[derive(Copy, Clone, Debug, derive_new::new)]
+pub struct FriReducedOpeningAir {
     execution_bridge: ExecutionBridge,
     memory_bridge: MemoryBridge,
 }
@@ -544,355 +543,546 @@ fn elem_to_ext<F: Field>(elem: F) -> [F; EXT_DEG] {
     ret
 }
 
-#[derive(Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct FriReducedOpeningRecord<F: Field> {
-    pub pc: F,
-    pub start_timestamp: F,
-    pub instruction: Instruction<F>,
-    pub alpha_read: RecordId,
-    pub length_read: RecordId,
-    pub a_ptr_read: RecordId,
-    pub is_init_read: RecordId,
-    pub b_ptr_read: RecordId,
-    pub a_rws: Vec<RecordId>,
-    pub b_reads: Vec<RecordId>,
-    pub result_write: RecordId,
+#[derive(Copy, Clone, Debug)]
+pub struct FriReducedOpeningMetadata {
+    length: usize,
+    is_init: bool,
 }
 
-impl<F: Field> FriReducedOpeningRecord<F> {
-    pub fn get_height(&self) -> usize {
-        // 2 for instruction rows
-        self.a_rws.len() + 2
+impl MultiRowMetadata for FriReducedOpeningMetadata {
+    #[inline(always)]
+    fn get_num_rows(&self) -> usize {
+        // Allocates `length` workload rows + 1 Instruction1 row + 1 Instruction2 row
+        self.length + 2
     }
 }
 
-pub struct FriReducedOpeningChip<F: Field> {
-    air: FriReducedOpeningAir,
-    pub records: Vec<FriReducedOpeningRecord<F>>,
-    pub height: usize,
-    offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    streams: Arc<Mutex<Streams<F>>>,
+type FriReducedOpeningLayout = MultiRowLayout<FriReducedOpeningMetadata>;
+
+// Header of record that is common for all trace rows for an instruction
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct FriReducedOpeningHeaderRecord {
+    pub length: u32,
+    pub is_init: bool,
+}
+
+// Part of record that is common for all trace rows for an instruction
+// NOTE: Order for fields is important here to prevent overwriting.
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct FriReducedOpeningCommonRecord<F> {
+    pub timestamp: u32,
+
+    pub a_ptr: u32,
+
+    pub b_ptr: u32,
+
+    pub alpha: [F; EXT_DEG],
+
+    pub from_pc: u32,
+
+    pub a_ptr_ptr: F,
+    pub a_ptr_aux: MemoryReadAuxRecord,
+
+    pub b_ptr_ptr: F,
+    pub b_ptr_aux: MemoryReadAuxRecord,
+
+    pub length_ptr: F,
+    pub length_aux: MemoryReadAuxRecord,
+
+    pub alpha_ptr: F,
+    pub alpha_aux: MemoryReadAuxRecord,
+
+    pub result_ptr: F,
+    pub result_aux: MemoryWriteAuxRecord<F, EXT_DEG>,
+
+    pub hint_id_ptr: F,
+
+    pub is_init_ptr: F,
+    pub is_init_aux: MemoryReadAuxRecord,
+}
+
+// Part of record for each workload row that calculates the partial `result`
+// NOTE: Order for fields is important here to prevent overwriting.
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct FriReducedOpeningWorkloadRowRecord<F> {
+    pub a: F,
+    pub a_aux: MemoryReadAuxRecord,
+    // The result of this workload row
+    // b can be computed from a, alpha, result, and previous result:
+    // b = result + a - prev_result * alpha
+    pub result: [F; EXT_DEG],
+    pub b_aux: MemoryReadAuxRecord,
+}
+
+// NOTE: Order for fields is important here to prevent overwriting.
+#[derive(Debug)]
+pub struct FriReducedOpeningRecordMut<'a, F> {
+    pub header: &'a mut FriReducedOpeningHeaderRecord,
+    pub workload: &'a mut [FriReducedOpeningWorkloadRowRecord<F>],
+    // if is_init this will be an empty slice, otherwise it will be the previous data of writing
+    // `a`s
+    pub a_write_prev_data: &'a mut [F],
+    pub common: &'a mut FriReducedOpeningCommonRecord<F>,
 }
-impl<F: PrimeField32> FriReducedOpeningChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-        streams: Arc<Mutex<Streams<F>>>,
-    ) -> Self {
-        let air = FriReducedOpeningAir {
-            execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-            memory_bridge,
+
+impl<'a, F> CustomBorrow<'a, FriReducedOpeningRecordMut<'a, F>, FriReducedOpeningLayout>
+    for [u8]
+{
+    fn custom_borrow(
+        &'a mut self,
+        layout: FriReducedOpeningLayout,
+    ) -> FriReducedOpeningRecordMut<'a, F> {
+        let (header_buf, rest) =
+            unsafe { self.split_at_mut_unchecked(size_of::<FriReducedOpeningHeaderRecord>()) };
+        let header: &mut FriReducedOpeningHeaderRecord = header_buf.borrow_mut();
+
+        let workload_size =
+            layout.metadata.length * size_of::<FriReducedOpeningWorkloadRowRecord<F>>();
+
+        let (workload_buf, rest) = unsafe { rest.split_at_mut_unchecked(workload_size) };
+        let a_prev_size = if layout.metadata.is_init {
+            0
+        } else {
+            layout.metadata.length * size_of::<F>()
         };
-        Self {
-            records: vec![],
-            air,
-            height: 0,
-            offline_memory,
-            streams,
+
+        let (a_prev_buf, common_buf) = unsafe { rest.split_at_mut_unchecked(a_prev_size) };
+
+        let (_, a_prev_records, _) = unsafe { a_prev_buf.align_to_mut::<F>() };
+        let (_, workload_records, _) =
+            unsafe { workload_buf.align_to_mut::<FriReducedOpeningWorkloadRowRecord<F>>() };
+
+        let common: &mut FriReducedOpeningCommonRecord<F> = common_buf.borrow_mut();
+
+        FriReducedOpeningRecordMut {
+            header,
+            workload: &mut workload_records[..layout.metadata.length],
+            a_write_prev_data: &mut a_prev_records[..],
+            common,
+        }
+    }
+
+    unsafe fn extract_layout(&self) -> FriReducedOpeningLayout {
+        let header: &FriReducedOpeningHeaderRecord = self.borrow();
+        FriReducedOpeningLayout::new(FriReducedOpeningMetadata {
+            length: header.length as usize,
+            is_init: header.is_init,
+        })
+    }
+}
+
+impl<F> SizedRecord<FriReducedOpeningLayout> for FriReducedOpeningRecordMut<'_, F> {
+    fn size(layout: &FriReducedOpeningLayout) -> usize {
+        let mut total_len = size_of::<FriReducedOpeningHeaderRecord>();
+        total_len += layout.metadata.length * size_of::<FriReducedOpeningWorkloadRowRecord<F>>();
+        if !layout.metadata.is_init {
+            total_len += layout.metadata.length * size_of::<F>();
         }
+        total_len += size_of::<FriReducedOpeningCommonRecord<F>>();
+        total_len
+    }
+
+    fn alignment(_layout: &FriReducedOpeningLayout) -> usize {
+        align_of::<FriReducedOpeningHeaderRecord>()
+    }
+}
+
+#[derive(derive_new::new, Copy, Clone)]
+pub struct FriReducedOpeningExecutor;
+
+#[derive(derive_new::new)]
+pub struct FriReducedOpeningFiller;
+
+pub type FriReducedOpeningChip<F> = VmChipWrapper<F, FriReducedOpeningFiller>;
+
+impl Default for FriReducedOpeningExecutor {
+    fn default() -> Self {
+        Self::new()
     }
 }
-impl<F: PrimeField32> InstructionExecutor<F> for FriReducedOpeningChip<F> {
+
+impl<F, RA> PreflightExecutor<F, RA> for FriReducedOpeningExecutor
+where
+    F: PrimeField32,
+    for<'buf> RA: RecordArena<'buf, FriReducedOpeningLayout, FriReducedOpeningRecordMut<'buf, F>>,
+{
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        assert_eq!(opcode, FRI_REDUCED_OPENING.global_opcode().as_usize());
+        String::from("FRI_REDUCED_OPENING")
+    }
+
     fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>, ExecutionError> {
+    ) -> Result<(), ExecutionError> {
         let &Instruction {
-            a: a_ptr_ptr,
-            b: b_ptr_ptr,
-            c: length_ptr,
-            d: alpha_ptr,
-            e: result_ptr,
-            f: hint_id_ptr,
-            g: is_init_ptr,
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g,
             ..
         } = instruction;
 
-        let addr_space = F::from_canonical_u32(AS::Native as u32);
-        let alpha_read = memory.read(addr_space, alpha_ptr);
-        let length_read = memory.read_cell(addr_space, length_ptr);
-        let a_ptr_read = memory.read_cell(addr_space, a_ptr_ptr);
-        let b_ptr_read = memory.read_cell(addr_space, b_ptr_ptr);
-        let is_init_read = memory.read_cell(addr_space, is_init_ptr);
-        let is_init = is_init_read.1.as_canonical_u32();
+        let timestamp_start = state.memory.timestamp;
 
-        let hint_id_f = memory.unsafe_read_cell(addr_space, hint_id_ptr);
-        let hint_id = hint_id_f.as_canonical_u32() as usize;
+        // Read length from memory to allocate record
+        let length_ptr = c.as_canonical_u32();
+        let [length]: [F; 1] = memory_read_native(&state.memory.data, length_ptr);
+        let length = length.as_canonical_u32();
+        let is_init_ptr = g.as_canonical_u32();
+        let [is_init]: [F; 1] = memory_read_native(&state.memory.data, is_init_ptr);
+        let is_init = is_init != F::ZERO;
 
-        let alpha = alpha_read.1;
-        let length = length_read.1.as_canonical_u32() as usize;
-        let a_ptr = a_ptr_read.1;
-        let b_ptr = b_ptr_read.1;
+        let metadata = FriReducedOpeningMetadata {
+            length: length as usize,
+            is_init,
+        };
+        let record = state.ctx.alloc(MultiRowLayout::new(metadata));
 
-        let mut a_rws = Vec::with_capacity(length);
-        let mut b_reads = Vec::with_capacity(length);
-        let mut result = [F::ZERO; EXT_DEG];
+        record.common.from_pc = *state.pc;
+        record.common.timestamp = timestamp_start;
+
+        let alpha_ptr = d.as_canonical_u32();
+        let alpha = tracing_read_native(
+            state.memory,
+            alpha_ptr,
+            &mut record.common.alpha_aux.prev_timestamp,
+        );
+        record.common.alpha_ptr = d;
+        record.common.alpha = alpha;
+
+        tracing_read_native::<F, 1>(
+            state.memory,
+            length_ptr,
+            &mut record.common.length_aux.prev_timestamp,
+        );
+        record.common.length_ptr = c;
+        record.header.length = length;
+
+        let a_ptr_ptr = a.as_canonical_u32();
+        let [a_ptr]: [F; 1] = tracing_read_native(
+            state.memory,
+            a_ptr_ptr,
+            &mut record.common.a_ptr_aux.prev_timestamp,
+        );
+        record.common.a_ptr_ptr = a;
+        record.common.a_ptr = a_ptr.as_canonical_u32();
+
+        let b_ptr_ptr = b.as_canonical_u32();
+        let [b_ptr]: [F; 1] = tracing_read_native(
+            state.memory,
+            b_ptr_ptr,
+            &mut record.common.b_ptr_aux.prev_timestamp,
+        );
+        record.common.b_ptr_ptr = b;
+        record.common.b_ptr = b_ptr.as_canonical_u32();
+
+        tracing_read_native::<F, 1>(
+            state.memory,
+            is_init_ptr,
+            &mut record.common.is_init_aux.prev_timestamp,
+        );
+        record.common.is_init_ptr = g;
+        record.header.is_init = is_init;
+
+        let hint_id_ptr = f.as_canonical_u32();
+        let [hint_id]: [F; 1] = memory_read_native(state.memory.data(), hint_id_ptr);
+        let hint_id = hint_id.as_canonical_u32() as usize;
+        record.common.hint_id_ptr = f;
 
-        let data = if is_init == 0 {
-            let mut streams = self.streams.lock().unwrap();
-            let hint_steam = &mut streams.hint_space[hint_id];
+        let length = length as usize;
+
+        let data = if !is_init {
+            let hint_steam = &mut state.streams.hint_space[hint_id];
             hint_steam.drain(0..length).collect()
         } else {
             vec![]
         };
+
+        let mut as_and_bs = Vec::with_capacity(length);
         #[allow(clippy::needless_range_loop)]
         for i in 0..length {
-            let a_rw = if is_init == 0 {
-                let (record_id, _) =
-                    memory.write_cell(addr_space, a_ptr + F::from_canonical_usize(i), data[i]);
-                (record_id, data[i])
+            let workload_row = &mut record.workload[length - i - 1];
+
+            let a_ptr_i = record.common.a_ptr + i as u32;
+            let [a]: [F; 1] = if !is_init {
+                let mut prev = [F::ZERO; 1];
+                tracing_write_native(
+                    state.memory,
+                    a_ptr_i,
+                    [data[i]],
+                    &mut workload_row.a_aux.prev_timestamp,
+                    &mut prev,
+                );
+                record.a_write_prev_data[length - i - 1] = prev[0];
+                [data[i]]
             } else {
-                memory.read_cell(addr_space, a_ptr + F::from_canonical_usize(i))
+                tracing_read_native(
+                    state.memory,
+                    a_ptr_i,
+                    &mut workload_row.a_aux.prev_timestamp,
+                )
             };
-            let b_read =
-                memory.read::<EXT_DEG>(addr_space, b_ptr + F::from_canonical_usize(EXT_DEG * i));
-            a_rws.push(a_rw);
-            b_reads.push(b_read);
+            let b_ptr_i = record.common.b_ptr + (EXT_DEG * i) as u32;
+            let b = tracing_read_native::<F, EXT_DEG>(
+                state.memory,
+                b_ptr_i,
+                &mut workload_row.b_aux.prev_timestamp,
+            );
+
+            as_and_bs.push((a, b));
         }
 
-        for (a_rw, b_read) in a_rws.iter().rev().zip_eq(b_reads.iter().rev()) {
-            let a = a_rw.1;
-            let b = b_read.1;
+        let mut result = [F::ZERO; EXT_DEG];
+        for (i, (a, b)) in as_and_bs.into_iter().rev().enumerate() {
+            let workload_row = &mut record.workload[i];
+
             // result = result * alpha + (b - a)
             result = FieldExtension::add(
                 FieldExtension::multiply(result, alpha),
                 FieldExtension::subtract(b, elem_to_ext(a)),
             );
+            workload_row.a = a;
+            workload_row.result = result;
         }
 
-        let (result_write, _) = memory.write(addr_space, result_ptr, result);
-
-        let record = FriReducedOpeningRecord {
-            pc: F::from_canonical_u32(from_state.pc),
-            start_timestamp: F::from_canonical_u32(from_state.timestamp),
-            instruction: instruction.clone(),
-            alpha_read: alpha_read.0,
-            length_read: length_read.0,
-            a_ptr_read: a_ptr_read.0,
-            is_init_read: is_init_read.0,
-            b_ptr_read: b_ptr_read.0,
-            a_rws: a_rws.into_iter().map(|r| r.0).collect(),
-            b_reads: b_reads.into_iter().map(|r| r.0).collect(),
-            result_write,
-        };
-        self.height += record.get_height();
-        self.records.push(record);
+        let result_ptr = e.as_canonical_u32();
+        tracing_write_native(
+            state.memory,
+            result_ptr,
+            result,
+            &mut record.common.result_aux.prev_timestamp,
+            &mut record.common.result_aux.prev_data,
+        );
+        record.common.result_ptr = e;
 
-        Ok(ExecutionState {
-            pc: from_state.pc + DEFAULT_PC_STEP,
-            timestamp: memory.timestamp(),
-        })
-    }
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        assert_eq!(opcode, FRI_REDUCED_OPENING.global_opcode().as_usize());
-        String::from("FRI_REDUCED_OPENING")
+        Ok(())
     }
 }
 
-fn record_to_rows<F: PrimeField32>(
-    record: FriReducedOpeningRecord<F>,
-    aux_cols_factory: &MemoryAuxColsFactory<F>,
-    slice: &mut [F],
-    memory: &OfflineMemory<F>,
-) {
-    let Instruction {
-        a: a_ptr_ptr,
-        b: b_ptr_ptr,
-        c: length_ptr,
-        d: alpha_ptr,
-        e: result_ptr,
-        f: hint_id_ptr,
-        g: is_init_ptr,
-        ..
-    } = record.instruction;
-
-    let length_read = memory.record_by_id(record.length_read);
-    let alpha_read = memory.record_by_id(record.alpha_read);
-    let a_ptr_read = memory.record_by_id(record.a_ptr_read);
-    let b_ptr_read = memory.record_by_id(record.b_ptr_read);
-    let is_init_read = memory.record_by_id(record.is_init_read);
-    let is_init = is_init_read.data_at(0);
-    let write_a = F::ONE - is_init;
-
-    let length = length_read.data_at(0).as_canonical_u32() as usize;
-    let alpha: [F; EXT_DEG] = alpha_read.data_slice().try_into().unwrap();
-    let a_ptr = a_ptr_read.data_at(0);
-    let b_ptr = b_ptr_read.data_at(0);
-
-    let mut result = [F::ZERO; EXT_DEG];
-
-    let alpha_aux = aux_cols_factory.make_read_aux_cols(alpha_read);
-    let length_aux = aux_cols_factory.make_read_aux_cols(length_read);
-    let a_ptr_aux = aux_cols_factory.make_read_aux_cols(a_ptr_read);
-    let b_ptr_aux = aux_cols_factory.make_read_aux_cols(b_ptr_read);
-    let is_init_aux = aux_cols_factory.make_read_aux_cols(is_init_read);
-
-    let result_aux = aux_cols_factory.make_write_aux_cols(memory.record_by_id(record.result_write));
-
-    // WorkloadCols
-    for (i, (&a_record_id, &b_record_id)) in record
-        .a_rws
-        .iter()
-        .rev()
-        .zip_eq(record.b_reads.iter().rev())
-        .enumerate()
-    {
-        let a_rw = memory.record_by_id(a_record_id);
-        let b_read = memory.record_by_id(b_record_id);
-        let a = a_rw.data_at(0);
-        let b: [F; EXT_DEG] = b_read.data_slice().try_into().unwrap();
-
-        let start = i * OVERALL_WIDTH;
-        let cols: &mut WorkloadCols<F> = slice[start..start + WL_WIDTH].borrow_mut();
-        *cols = WorkloadCols {
-            prefix: PrefixCols {
-                general: GeneralCols {
-                    is_workload_row: F::ONE,
-                    is_ins_row: F::ZERO,
-                    timestamp: record.start_timestamp + F::from_canonical_usize((length - i) * 2),
-                },
-                a_or_is_first: a,
-                data: DataCols {
-                    a_ptr: a_ptr + F::from_canonical_usize(length - i),
-                    write_a,
-                    b_ptr: b_ptr + F::from_canonical_usize((length - i) * EXT_DEG),
-                    idx: F::from_canonical_usize(i),
-                    result,
-                    alpha,
-                },
-            },
-            // Generate write aux columns no matter `a` is read or written. When `a` is written,
-            // `prev_data` is not constrained.
-            a_aux: if a_rw.prev_data_slice().is_some() {
-                aux_cols_factory.make_write_aux_cols(a_rw)
+impl<F: PrimeField32> TraceFiller<F> for FriReducedOpeningFiller {
+    fn fill_trace(
+        &self,
+        mem_helper: &MemoryAuxColsFactory<F>,
+        trace: &mut RowMajorMatrix<F>,
+        rows_used: usize,
+    ) {
+        if rows_used == 0 {
+            return;
+        }
+        debug_assert_eq!(trace.width, OVERALL_WIDTH);
+
+        let mut remaining_trace = &mut trace.values[..OVERALL_WIDTH * rows_used];
+        let mut chunks = Vec::with_capacity(rows_used);
+        while !remaining_trace.is_empty() {
+            let header: &FriReducedOpeningHeaderRecord =
+                unsafe { get_record_from_slice(&mut remaining_trace, ()) };
+            let num_rows = header.length as usize + 2;
+            let chunk_size = OVERALL_WIDTH * num_rows;
+            let (chunk, rest) = remaining_trace.split_at_mut(chunk_size);
+            chunks.push((chunk, header.is_init));
+            remaining_trace = rest;
+        }
+
+        chunks.into_par_iter().for_each(|(mut chunk, is_init)| {
+            let num_rows = chunk.len() / OVERALL_WIDTH;
+            let metadata = FriReducedOpeningMetadata {
+                length: num_rows - 2,
+                is_init,
+            };
+            let record: FriReducedOpeningRecordMut<F> =
+                unsafe { get_record_from_slice(&mut chunk, MultiRowLayout::new(metadata)) };
+
+            let timestamp = record.common.timestamp;
+            let length = record.header.length as usize;
+            let alpha = record.common.alpha;
+            let is_init = record.header.is_init;
+            let write_a = F::from_bool(!is_init);
+
+            let a_ptr = record.common.a_ptr;
+            let b_ptr = record.common.b_ptr;
+
+            let (workload_chunk, rest) = chunk.split_at_mut(length * OVERALL_WIDTH);
+            let (ins1_chunk, ins2_chunk) = rest.split_at_mut(OVERALL_WIDTH);
+
+            {
+                // ins2 row
+                let cols: &mut Instruction2Cols<F> = ins2_chunk[..INS_2_WIDTH].borrow_mut();
+
+                cols.write_a_x_is_first = F::ZERO;
+
+                mem_helper.fill(
+                    record.common.is_init_aux.prev_timestamp,
+                    timestamp + 4,
+                    cols.is_init_aux.as_mut(),
+                );
+                cols.is_init_ptr = record.common.is_init_ptr;
+
+                cols.hint_id_ptr = record.common.hint_id_ptr;
+
+                cols.result_aux
+                    .set_prev_data(record.common.result_aux.prev_data);
+                mem_helper.fill(
+                    record.common.result_aux.prev_timestamp,
+                    timestamp + 5 + 2 * length as u32,
+                    cols.result_aux.as_mut(),
+                );
+                cols.result_ptr = record.common.result_ptr;
+
+                mem_helper.fill(
+                    record.common.alpha_aux.prev_timestamp,
+                    timestamp,
+                    cols.alpha_aux.as_mut(),
+                );
+                cols.alpha_ptr = record.common.alpha_ptr;
+
+                mem_helper.fill(
+                    record.common.length_aux.prev_timestamp,
+                    timestamp + 1,
+                    cols.length_aux.as_mut(),
+                );
+                cols.length_ptr = record.common.length_ptr;
+
+                cols.is_first = F::ZERO;
+
+                cols.general.timestamp = F::from_canonical_u32(timestamp);
+                cols.general.is_ins_row = F::ONE;
+                cols.general.is_workload_row = F::ZERO;
+
+                ins2_chunk[INS_2_WIDTH..OVERALL_WIDTH].fill(F::ZERO);
+            }
+
+            {
+                // ins 1 row
+                let cols: &mut Instruction1Cols<F> = ins1_chunk[..INS_1_WIDTH].borrow_mut();
+
+                cols.write_a_x_is_first = write_a;
+
+                mem_helper.fill(
+                    record.common.b_ptr_aux.prev_timestamp,
+                    timestamp + 3,
+                    cols.b_ptr_aux.as_mut(),
+                );
+                cols.b_ptr_ptr = record.common.b_ptr_ptr;
+
+                mem_helper.fill(
+                    record.common.a_ptr_aux.prev_timestamp,
+                    timestamp + 2,
+                    cols.a_ptr_aux.as_mut(),
+                );
+                cols.a_ptr_ptr = record.common.a_ptr_ptr;
+
+                cols.pc = F::from_canonical_u32(record.common.from_pc);
+
+                cols.prefix.data.alpha = alpha;
+                cols.prefix.data.result = record.workload.last().unwrap().result;
+                cols.prefix.data.idx = F::from_canonical_usize(length);
+                cols.prefix.data.b_ptr = F::from_canonical_u32(b_ptr);
+                cols.prefix.data.write_a = write_a;
+                cols.prefix.data.a_ptr = F::from_canonical_u32(a_ptr);
+
+                cols.prefix.a_or_is_first = F::ONE;
+
+                cols.prefix.general.timestamp = F::from_canonical_u32(timestamp);
+                cols.prefix.general.is_ins_row = F::ONE;
+                cols.prefix.general.is_workload_row = F::ZERO;
+                ins1_chunk[INS_1_WIDTH..OVERALL_WIDTH].fill(F::ZERO);
+            }
+
+            // To fill the WorkloadRows we do 2 passes:
+            // - First, a serial pass to fill some of the records into the trace
+            // - Then, a parallel pass to fill the rest of the records into the trace
+            // Note, the first pass is done to avoid overwriting the records
+
+            // Copy of `a_write_prev_data` to avoid overwriting it and to use it in the parallel
+            // pass
+            let a_prev_data = if !is_init {
+                let mut tmp = Vec::with_capacity(length);
+                tmp.extend_from_slice(record.a_write_prev_data);
+                tmp
             } else {
-                let read_aux = aux_cols_factory.make_read_aux_cols(a_rw);
-                MemoryWriteAuxCols::from_base(read_aux.get_base(), [F::ZERO])
-            },
-            b,
-            b_aux: aux_cols_factory.make_read_aux_cols(b_read),
-        };
-        // result = result * alpha + (b - a)
-        result = FieldExtension::add(
-            FieldExtension::multiply(result, alpha),
-            FieldExtension::subtract(b, elem_to_ext(a)),
-        );
-    }
-    // Instruction1Cols
-    {
-        let start = length * OVERALL_WIDTH;
-        let cols: &mut Instruction1Cols<F> = slice[start..start + INS_1_WIDTH].borrow_mut();
-        *cols = Instruction1Cols {
-            prefix: PrefixCols {
-                general: GeneralCols {
-                    is_workload_row: F::ZERO,
-                    is_ins_row: F::ONE,
-                    timestamp: record.start_timestamp,
-                },
-                a_or_is_first: F::ONE,
-                data: DataCols {
-                    a_ptr,
-                    write_a,
-                    b_ptr,
-                    idx: F::from_canonical_usize(length),
-                    result,
-                    alpha,
-                },
-            },
-            pc: record.pc,
-            a_ptr_ptr,
-            a_ptr_aux,
-            b_ptr_ptr,
-            b_ptr_aux,
-            write_a_x_is_first: write_a,
-        };
-    }
-    // Instruction2Cols
-    {
-        let start = (length + 1) * OVERALL_WIDTH;
-        let cols: &mut Instruction2Cols<F> = slice[start..start + INS_2_WIDTH].borrow_mut();
-        *cols = Instruction2Cols {
-            general: GeneralCols {
-                is_workload_row: F::ZERO,
-                is_ins_row: F::ONE,
-                timestamp: record.start_timestamp,
-            },
-            is_first: F::ZERO,
-            length_ptr,
-            length_aux,
-            alpha_ptr,
-            alpha_aux,
-            result_ptr,
-            result_aux,
-            hint_id_ptr,
-            is_init_ptr,
-            is_init_aux,
-            write_a_x_is_first: F::ZERO,
-        };
-    }
-}
+                vec![]
+            };
 
-impl<F: Field> ChipUsageGetter for FriReducedOpeningChip<F> {
-    fn air_name(&self) -> String {
-        "FriReducedOpeningAir".to_string()
-    }
+            for (i, (workload_row, row_chunk)) in record
+                .workload
+                .iter()
+                .zip(workload_chunk.chunks_exact_mut(OVERALL_WIDTH))
+                .enumerate()
+                .rev()
+            {
+                let cols: &mut WorkloadCols<F> = row_chunk[..WL_WIDTH].borrow_mut();
 
-    fn current_trace_height(&self) -> usize {
-        self.height
-    }
+                let timestamp = timestamp + ((length - i) * 2) as u32;
 
-    fn trace_width(&self) -> usize {
-        OVERALL_WIDTH
-    }
-}
+                // fill in reverse order
+                mem_helper.fill(
+                    workload_row.b_aux.prev_timestamp,
+                    timestamp + 4,
+                    cols.b_aux.as_mut(),
+                );
 
-impl<SC: StarkGenericConfig> Chip<SC> for FriReducedOpeningChip<Val<SC>>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air)
-    }
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let height = next_power_of_two_or_zero(self.height);
-        let mut flat_trace = Val::<SC>::zero_vec(OVERALL_WIDTH * height);
-        let chunked_trace = {
-            let sizes: Vec<_> = self
-                .records
-                .par_iter()
-                .map(|record| OVERALL_WIDTH * record.get_height())
-                .collect();
-            variable_chunks_mut(&mut flat_trace, &sizes)
-        };
+                // We temporarily store the result here
+                // the correct value of b is computed during the serial pass below
+                cols.b = record.workload[i].result;
 
-        let memory = self.offline_memory.lock().unwrap();
-        let aux_cols_factory = memory.aux_cols_factory();
+                mem_helper.fill(
+                    workload_row.a_aux.prev_timestamp,
+                    timestamp + 3,
+                    cols.a_aux.as_mut(),
+                );
+                cols.prefix.a_or_is_first = workload_row.a;
 
-        self.records
-            .into_par_iter()
-            .zip_eq(chunked_trace.into_par_iter())
-            .for_each(|(record, slice)| {
-                record_to_rows(record, &aux_cols_factory, slice, &memory);
-            });
+                if i > 0 {
+                    cols.prefix.data.result = record.workload[i - 1].result;
+                }
+            }
 
-        let matrix = RowMajorMatrix::new(flat_trace, OVERALL_WIDTH);
-        AirProofInput::simple_no_pis(matrix)
-    }
-}
+            workload_chunk
+                .par_chunks_exact_mut(OVERALL_WIDTH)
+                .enumerate()
+                .for_each(|(i, row_chunk)| {
+                    let cols: &mut WorkloadCols<F> = row_chunk[..WL_WIDTH].borrow_mut();
+                    let timestamp = timestamp + ((length - i) * 2) as u32;
+                    if is_init {
+                        cols.a_aux.set_prev_data([F::ZERO; 1]);
+                    } else {
+                        cols.a_aux.set_prev_data([a_prev_data[i]]);
+                    }
 
-fn variable_chunks_mut<'a, T>(mut slice: &'a mut [T], sizes: &[usize]) -> Vec<&'a mut [T]> {
-    let mut result = Vec::with_capacity(sizes.len());
-    for &size in sizes {
-        // split_at_mut guarantees disjoint slices
-        let (left, right) = slice.split_at_mut(size);
-        result.push(left);
-        slice = right; // move forward for the next chunk
+                    // DataCols
+                    cols.prefix.data.a_ptr = F::from_canonical_u32(a_ptr + (length - i) as u32);
+                    cols.prefix.data.write_a = write_a;
+                    cols.prefix.data.b_ptr =
+                        F::from_canonical_u32(b_ptr + ((length - i) * EXT_DEG) as u32);
+                    cols.prefix.data.idx = F::from_canonical_usize(i);
+                    if i == 0 {
+                        cols.prefix.data.result = [F::ZERO; EXT_DEG];
+                    }
+                    cols.prefix.data.alpha = alpha;
+
+                    // GeneralCols
+                    cols.prefix.general.is_workload_row = F::ONE;
+                    cols.prefix.general.is_ins_row = F::ZERO;
+
+                    // WorkloadCols
+                    cols.prefix.general.timestamp = F::from_canonical_u32(timestamp);
+
+                    cols.b = FieldExtension::subtract(
+                        FieldExtension::add(cols.b, elem_to_ext(cols.prefix.a_or_is_first)),
+                        FieldExtension::multiply(cols.prefix.data.result, alpha),
+                    );
+                    row_chunk[WL_WIDTH..OVERALL_WIDTH].fill(F::ZERO);
+                });
+        });
     }
-    result
 }
diff --git a/extensions/native/circuit/src/fri/tests.rs b/extensions/native/circuit/src/fri/tests.rs
index 97dcdbc532..5910f69e93 100644
--- a/extensions/native/circuit/src/fri/tests.rs
+++ b/extensions/native/circuit/src/fri/tests.rs
@@ -1,22 +1,42 @@
-use std::sync::{Arc, Mutex};
+use std::borrow::BorrowMut;
 
 use itertools::Itertools;
-use openvm_circuit::arch::{
-    testing::{memory::gen_pointer, VmChipTestBuilder},
-    Streams,
-};
+use openvm_circuit::arch::testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder};
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_native_compiler::FriOpcode::FRI_REDUCED_OPENING;
+use openvm_native_compiler::{conversion::AS, FriOpcode::FRI_REDUCED_OPENING};
 use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
     utils::disable_debug_builder,
     verifier::VerificationError,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::Rng;
+use rand::{rngs::StdRng, Rng};
+
+use super::{
+    super::field_extension::FieldExtension, elem_to_ext, FriReducedOpeningAir,
+    FriReducedOpeningChip, FriReducedOpeningExecutor, EXT_DEG,
+};
+use crate::{
+    fri::{WorkloadCols, OVERALL_WIDTH, WL_WIDTH},
+    write_native_array, FriReducedOpeningFiller,
+};
+
+const MAX_INS_CAPACITY: usize = 1024;
+type F = BabyBear;
+type Harness =
+    TestChipHarness<F, FriReducedOpeningExecutor, FriReducedOpeningAir, FriReducedOpeningChip<F>>;
 
-use super::{super::field_extension::FieldExtension, elem_to_ext, FriReducedOpeningChip, EXT_DEG};
-use crate::OVERALL_WIDTH;
+fn create_test_chip(tester: &VmChipTestBuilder<F>) -> Harness {
+    let air = FriReducedOpeningAir::new(tester.execution_bridge(), tester.memory_bridge());
+    let step = FriReducedOpeningExecutor::new();
+    let chip = FriReducedOpeningChip::new(FriReducedOpeningFiller, tester.memory_helper());
+
+    Harness::with_capacity(step, air, chip, MAX_INS_CAPACITY)
+}
 
 fn compute_fri_mat_opening<F: Field>(
     alpha: [F; EXT_DEG],
@@ -35,146 +55,111 @@ fn compute_fri_mat_opening<F: Field>(
     result
 }
 
-#[test]
-fn fri_mat_opening_air_test() {
-    let num_ops = 14; // non-power-of-2 to also test padding
-    let elem_range = || 1..=100;
-    let length_range = || 1..=49;
-
-    let mut tester = VmChipTestBuilder::default();
-
-    let streams = Arc::new(Mutex::new(Streams::default()));
-    let mut chip = FriReducedOpeningChip::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.offline_memory_mutex_arc(),
-        streams.clone(),
+fn set_and_execute(tester: &mut VmChipTestBuilder<F>, harness: &mut Harness, rng: &mut StdRng) {
+    let len = rng.gen_range(1..=28);
+    let a_ptr = gen_pointer(rng, len);
+    let b_ptr = gen_pointer(rng, len);
+    let a_ptr_ptr =
+        write_native_array::<F, 1>(tester, rng, Some([F::from_canonical_usize(a_ptr)])).1;
+    let b_ptr_ptr =
+        write_native_array::<F, 1>(tester, rng, Some([F::from_canonical_usize(b_ptr)])).1;
+
+    let len_ptr = write_native_array::<F, 1>(tester, rng, Some([F::from_canonical_usize(len)])).1;
+    let (alpha, alpha_ptr) = write_native_array::<F, EXT_DEG>(tester, rng, None);
+    let out_ptr = gen_pointer(rng, EXT_DEG);
+    let is_init = true;
+    let is_init_ptr = write_native_array::<F, 1>(tester, rng, Some([F::from_bool(is_init)])).1;
+
+    let mut vec_a = Vec::with_capacity(len);
+    let mut vec_b = Vec::with_capacity(len);
+    for i in 0..len {
+        let a = rng.gen();
+        let b: [F; EXT_DEG] = std::array::from_fn(|_| rng.gen());
+        vec_a.push(a);
+        vec_b.push(b);
+        if !is_init {
+            tester.streams.hint_space[0].push(a);
+        } else {
+            tester.write(AS::Native as usize, a_ptr + i, [a]);
+        }
+        tester.write(AS::Native as usize, b_ptr + (EXT_DEG * i), b);
+    }
+
+    tester.execute(
+        harness,
+        &Instruction::from_usize(
+            FRI_REDUCED_OPENING.global_opcode(),
+            [
+                a_ptr_ptr,
+                b_ptr_ptr,
+                len_ptr,
+                alpha_ptr,
+                out_ptr,
+                0, // hint id, will just use 0 for testing
+                is_init_ptr,
+            ],
+        ),
     );
 
-    let mut rng = create_seeded_rng();
+    let expected_result = compute_fri_mat_opening(alpha, &vec_a, &vec_b);
+    assert_eq!(expected_result, tester.read(AS::Native as usize, out_ptr));
 
-    macro_rules! gen_ext {
-        () => {
-            std::array::from_fn::<_, EXT_DEG, _>(|_| {
-                BabyBear::from_canonical_u32(rng.gen_range(elem_range()))
-            })
-        };
+    for (i, ai) in vec_a.iter().enumerate() {
+        let [found] = tester.read(AS::Native as usize, a_ptr + i);
+        assert_eq!(*ai, found);
     }
+}
 
-    streams.lock().unwrap().hint_space = vec![vec![]];
-
-    for _ in 0..num_ops {
-        let alpha = gen_ext!();
-        let length = rng.gen_range(length_range());
-        let a = (0..length)
-            .map(|_| BabyBear::from_canonical_u32(rng.gen_range(elem_range())))
-            .collect_vec();
-        let b = (0..length).map(|_| gen_ext!()).collect_vec();
-
-        let result = compute_fri_mat_opening(alpha, &a, &b);
-
-        let alpha_pointer = gen_pointer(&mut rng, 4);
-        let length_pointer = gen_pointer(&mut rng, 1);
-        let a_pointer_pointer = gen_pointer(&mut rng, 1);
-        let b_pointer_pointer = gen_pointer(&mut rng, 1);
-        let result_pointer = gen_pointer(&mut rng, 4);
-        let a_pointer = gen_pointer(&mut rng, 1);
-        let b_pointer = gen_pointer(&mut rng, 4);
-        let is_init_ptr = gen_pointer(&mut rng, 1);
-
-        let address_space = 4usize;
-
-        /*tracing::debug!(
-            "{opcode:?} d = {}, e = {}, f = {}, result_addr = {}, addr1 = {}, addr2 = {}, z = {}, x = {}, y = {}",
-            result_as, as1, as2, result_pointer, address1, address2, result, operand1, operand2,
-        );*/
-
-        tester.write(address_space, alpha_pointer, alpha);
-        tester.write_cell(
-            address_space,
-            length_pointer,
-            BabyBear::from_canonical_usize(length),
-        );
-        tester.write_cell(
-            address_space,
-            a_pointer_pointer,
-            BabyBear::from_canonical_usize(a_pointer),
-        );
-        tester.write_cell(
-            address_space,
-            b_pointer_pointer,
-            BabyBear::from_canonical_usize(b_pointer),
-        );
-        let is_init = rng.gen_range(0..2);
-        tester.write_cell(
-            address_space,
-            is_init_ptr,
-            BabyBear::from_canonical_u32(is_init),
-        );
+///////////////////////////////////////////////////////////////////////////////////////
+/// POSITIVE TESTS
+///
+/// Randomly generate computations and execute, ensuring that the generated trace
+/// passes all constraints.
+///////////////////////////////////////////////////////////////////////////////////////
 
-        if is_init == 0 {
-            streams.lock().unwrap().hint_space[0].extend_from_slice(&a);
-        } else {
-            for (i, ai) in a.iter().enumerate() {
-                tester.write_cell(address_space, a_pointer + i, *ai);
-            }
-        }
-        for (i, bi) in b.iter().enumerate() {
-            tester.write(address_space, b_pointer + (4 * i), *bi);
-        }
+#[test]
+fn fri_mat_opening_air_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
 
-        tester.execute(
-            &mut chip,
-            &Instruction::from_usize(
-                FRI_REDUCED_OPENING.global_opcode(),
-                [
-                    a_pointer_pointer,
-                    b_pointer_pointer,
-                    length_pointer,
-                    alpha_pointer,
-                    result_pointer,
-                    0, // hint id
-                    is_init_ptr,
-                ],
-            ),
-        );
-        assert_eq!(result, tester.read(address_space, result_pointer));
-        // Check that `a` was populated.
-        for (i, ai) in a.iter().enumerate() {
-            let found = tester.read_cell(address_space, a_pointer + i);
-            assert_eq!(*ai, found);
-        }
+    let num_ops = 28; // non-power-of-2 to also test padding
+    for _ in 0..num_ops {
+        set_and_execute(&mut tester, &mut harness, &mut rng);
     }
 
-    let mut tester = tester.build().load(chip).finalize();
+    let tester = tester.build().load(harness).finalize();
     tester.simple_test().expect("Verification failed");
+}
 
-    disable_debug_builder();
-    // negative test pranking each value
-    for height in 0..num_ops {
-        // TODO: better way to modify existing traces in tester
-        let trace = tester.air_proof_inputs[2]
-            .1
-            .raw
-            .common_main
-            .as_mut()
-            .unwrap();
-        let old_trace = trace.clone();
-        for width in 0..OVERALL_WIDTH
-        /* num operands */
-        {
-            let prank_value = BabyBear::from_canonical_u32(rng.gen_range(1..=100));
-            trace.row_mut(height)[width] = prank_value;
-        }
+//////////////////////////////////////////////////////////////////////////////////////
+// NEGATIVE TESTS
+//
+// Given a fake trace of a single operation, setup a chip and run the test. We replace
+// part of the trace and check that the chip throws the expected error.
+//////////////////////////////////////////////////////////////////////////////////////
 
-        // Run a test after pranking each row
-        assert_eq!(
-            tester.simple_test().err(),
-            Some(VerificationError::OodEvaluationMismatch),
-            "Expected constraint to fail"
-        );
+#[test]
+fn run_negative_fri_mat_opening_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
 
-        tester.air_proof_inputs[2].1.raw.common_main = Some(old_trace);
-    }
+    set_and_execute(&mut tester, &mut harness, &mut rng);
+
+    let modify_trace = |trace: &mut DenseMatrix<F>| {
+        let mut values = trace.row_slice(0).to_vec();
+        let cols: &mut WorkloadCols<F> = values[..WL_WIDTH].borrow_mut();
+
+        cols.prefix.a_or_is_first = F::from_canonical_u32(42);
+
+        *trace = RowMajorMatrix::new(values, OVERALL_WIDTH);
+    };
+
+    disable_debug_builder();
+    let tester = tester
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .finalize();
+    tester.simple_test_with_expected_error(VerificationError::OodEvaluationMismatch);
 }
diff --git a/extensions/native/circuit/src/jal/mod.rs b/extensions/native/circuit/src/jal/mod.rs
deleted file mode 100644
index 28322834a2..0000000000
--- a/extensions/native/circuit/src/jal/mod.rs
+++ /dev/null
@@ -1,342 +0,0 @@
-use std::{
-    borrow::{Borrow, BorrowMut},
-    ops::Deref,
-    sync::{Arc, Mutex},
-};
-
-use openvm_circuit::{
-    arch::{ExecutionBridge, ExecutionError, ExecutionState, InstructionExecutor, PcIncOrSet},
-    system::memory::{
-        offline_checker::{MemoryBridge, MemoryWriteAuxCols},
-        MemoryAddress, MemoryAuxColsFactory, MemoryController, OfflineMemory, RecordId,
-    },
-};
-use openvm_circuit_primitives::{
-    utils::next_power_of_two_or_zero,
-    var_range::{
-        SharedVariableRangeCheckerChip, VariableRangeCheckerBus, VariableRangeCheckerChip,
-    },
-};
-use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
-use openvm_native_compiler::{conversion::AS, NativeJalOpcode, NativeRangeCheckOpcode};
-use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
-    interaction::InteractionBuilder,
-    p3_air::{Air, AirBuilder, BaseAir},
-    p3_field::{Field, FieldAlgebra, PrimeField32},
-    p3_matrix::{dense::RowMajorMatrix, Matrix},
-    p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
-    rap::{BaseAirWithPublicValues, PartitionedBaseAir},
-    AirRef, Chip, ChipUsageGetter,
-};
-use serde::{Deserialize, Serialize};
-use static_assertions::const_assert_eq;
-use AS::Native;
-
-#[cfg(test)]
-mod tests;
-
-#[repr(C)]
-#[derive(AlignedBorrow)]
-struct JalRangeCheckCols<T> {
-    is_jal: T,
-    is_range_check: T,
-    a_pointer: T,
-    state: ExecutionState<T>,
-    // Write when is_jal, read when is_range_check.
-    writes_aux: MemoryWriteAuxCols<T, 1>,
-    b: T,
-    // Only used by range check.
-    c: T,
-    // Only used by range check.
-    y: T,
-}
-
-const OVERALL_WIDTH: usize = JalRangeCheckCols::<u8>::width();
-const_assert_eq!(OVERALL_WIDTH, 12);
-
-#[derive(Copy, Clone, Debug)]
-pub struct JalRangeCheckAir {
-    execution_bridge: ExecutionBridge,
-    memory_bridge: MemoryBridge,
-    range_bus: VariableRangeCheckerBus,
-}
-
-impl<F: Field> BaseAir<F> for JalRangeCheckAir {
-    fn width(&self) -> usize {
-        OVERALL_WIDTH
-    }
-}
-
-impl<F: Field> BaseAirWithPublicValues<F> for JalRangeCheckAir {}
-impl<F: Field> PartitionedBaseAir<F> for JalRangeCheckAir {}
-impl<AB: InteractionBuilder> Air<AB> for JalRangeCheckAir
-where
-    AB::F: PrimeField32,
-{
-    fn eval(&self, builder: &mut AB) {
-        let main = builder.main();
-        let local = main.row_slice(0);
-        let local_slice = local.deref();
-        let local: &JalRangeCheckCols<AB::Var> = local_slice.borrow();
-        builder.assert_bool(local.is_jal);
-        builder.assert_bool(local.is_range_check);
-        let is_valid = local.is_jal + local.is_range_check;
-        builder.assert_bool(is_valid.clone());
-
-        let d = AB::Expr::from_canonical_u32(Native as u32);
-        let a_val = local.writes_aux.prev_data()[0];
-        // if is_jal, write pc + DEFAULT_PC_STEP, else if is_range_check, read a_val.
-        let write_val = local.is_jal
-            * (local.state.pc + AB::Expr::from_canonical_u32(DEFAULT_PC_STEP))
-            + local.is_range_check * a_val;
-        self.memory_bridge
-            .write(
-                MemoryAddress::new(d.clone(), local.a_pointer),
-                [write_val],
-                local.state.timestamp,
-                &local.writes_aux,
-            )
-            .eval(builder, is_valid.clone());
-
-        let opcode = local.is_jal
-            * AB::F::from_canonical_usize(NativeJalOpcode::JAL.global_opcode().as_usize())
-            + local.is_range_check
-                * AB::F::from_canonical_usize(
-                    NativeRangeCheckOpcode::RANGE_CHECK
-                        .global_opcode()
-                        .as_usize(),
-                );
-        // Increment pc by b if is_jal, else by DEFAULT_PC_STEP if is_range_check.
-        let pc_inc = local.is_jal * local.b
-            + local.is_range_check * AB::F::from_canonical_u32(DEFAULT_PC_STEP);
-        builder.when(local.is_jal).assert_zero(local.c);
-        self.execution_bridge
-            .execute_and_increment_or_set_pc(
-                opcode,
-                [local.a_pointer.into(), local.b.into(), local.c.into(), d],
-                local.state,
-                AB::F::ONE,
-                PcIncOrSet::Inc(pc_inc),
-            )
-            .eval(builder, is_valid);
-
-        // Range check specific:
-        // a_val = x + y * (1 << 16)
-        let x = a_val - local.y * AB::Expr::from_canonical_u32(1 << 16);
-        self.range_bus
-            .send(x.clone(), local.b)
-            .eval(builder, local.is_range_check);
-        // Assert y < (1 << c), where c <= 14.
-        self.range_bus
-            .send(local.y, local.c)
-            .eval(builder, local.is_range_check);
-    }
-}
-
-impl JalRangeCheckAir {
-    fn new(
-        execution_bridge: ExecutionBridge,
-        memory_bridge: MemoryBridge,
-        range_bus: VariableRangeCheckerBus,
-    ) -> Self {
-        Self {
-            execution_bridge,
-            memory_bridge,
-            range_bus,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Serialize, Deserialize)]
-pub struct JalRangeCheckRecord {
-    pub state: ExecutionState<u32>,
-    pub a_rw: RecordId,
-    pub b: u32,
-    pub c: u8,
-    pub is_jal: bool,
-}
-
-/// Chip for JAL and RANGE_CHECK. These opcodes are logically irrelevant. Putting these opcodes into
-/// the same chip is just to save columns.
-pub struct JalRangeCheckChip<F> {
-    air: JalRangeCheckAir,
-    pub records: Vec<JalRangeCheckRecord>,
-    offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    range_checker_chip: SharedVariableRangeCheckerChip,
-    /// If true, ignore execution errors.
-    debug: bool,
-}
-
-impl<F: PrimeField32> JalRangeCheckChip<F> {
-    pub fn new(
-        execution_bridge: ExecutionBridge,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-        range_checker_chip: SharedVariableRangeCheckerChip,
-    ) -> Self {
-        let memory_bridge = offline_memory.lock().unwrap().memory_bridge();
-        let air = JalRangeCheckAir::new(execution_bridge, memory_bridge, range_checker_chip.bus());
-        Self {
-            air,
-            records: vec![],
-            offline_memory,
-            range_checker_chip,
-            debug: false,
-        }
-    }
-    pub fn with_debug(mut self) -> Self {
-        self.debug = true;
-        self
-    }
-}
-
-impl<F: PrimeField32> InstructionExecutor<F> for JalRangeCheckChip<F> {
-    fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>, ExecutionError> {
-        if instruction.opcode == NativeJalOpcode::JAL.global_opcode() {
-            let (record_id, _) = memory.write(
-                F::from_canonical_u32(AS::Native as u32),
-                instruction.a,
-                [F::from_canonical_u32(from_state.pc + DEFAULT_PC_STEP)],
-            );
-            let b = instruction.b.as_canonical_u32();
-            self.records.push(JalRangeCheckRecord {
-                state: from_state,
-                a_rw: record_id,
-                b,
-                c: 0,
-                is_jal: true,
-            });
-            return Ok(ExecutionState {
-                pc: (F::from_canonical_u32(from_state.pc) + instruction.b).as_canonical_u32(),
-                timestamp: memory.timestamp(),
-            });
-        } else if instruction.opcode == NativeRangeCheckOpcode::RANGE_CHECK.global_opcode() {
-            let d = F::from_canonical_u32(AS::Native as u32);
-            // This is a read, but we make the record have prev_data
-            let a_val = memory.unsafe_read_cell(d, instruction.a);
-            let (record_id, _) = memory.write(d, instruction.a, [a_val]);
-            let a_val = a_val.as_canonical_u32();
-            let b = instruction.b.as_canonical_u32();
-            let c = instruction.c.as_canonical_u32();
-            debug_assert!(!self.debug || b <= 16);
-            debug_assert!(!self.debug || c <= 14);
-            let x = a_val & ((1 << 16) - 1);
-            if !self.debug && x >= 1 << b {
-                return Err(ExecutionError::Fail { pc: from_state.pc });
-            }
-            let y = a_val >> 16;
-            if !self.debug && y >= 1 << c {
-                return Err(ExecutionError::Fail { pc: from_state.pc });
-            }
-            self.records.push(JalRangeCheckRecord {
-                state: from_state,
-                a_rw: record_id,
-                b,
-                c: c as u8,
-                is_jal: false,
-            });
-            return Ok(ExecutionState {
-                pc: from_state.pc + DEFAULT_PC_STEP,
-                timestamp: memory.timestamp(),
-            });
-        }
-        panic!("Unknown opcode {}", instruction.opcode);
-    }
-
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        let jal_opcode = NativeJalOpcode::JAL.global_opcode().as_usize();
-        let range_check_opcode = NativeRangeCheckOpcode::RANGE_CHECK
-            .global_opcode()
-            .as_usize();
-        if opcode == jal_opcode {
-            return String::from("JAL");
-        }
-        if opcode == range_check_opcode {
-            return String::from("RANGE_CHECK");
-        }
-        panic!("Unknown opcode {}", opcode);
-    }
-}
-
-impl<F: Field> ChipUsageGetter for JalRangeCheckChip<F> {
-    fn air_name(&self) -> String {
-        "JalRangeCheck".to_string()
-    }
-
-    fn current_trace_height(&self) -> usize {
-        self.records.len()
-    }
-
-    fn trace_width(&self) -> usize {
-        OVERALL_WIDTH
-    }
-}
-
-impl<SC: StarkGenericConfig> Chip<SC> for JalRangeCheckChip<Val<SC>>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air)
-    }
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let height = next_power_of_two_or_zero(self.records.len());
-        let mut flat_trace = Val::<SC>::zero_vec(OVERALL_WIDTH * height);
-        let memory = self.offline_memory.lock().unwrap();
-        let aux_cols_factory = memory.aux_cols_factory();
-
-        self.records
-            .into_par_iter()
-            .zip(flat_trace.par_chunks_mut(OVERALL_WIDTH))
-            .for_each(|(record, slice)| {
-                record_to_row(
-                    record,
-                    &aux_cols_factory,
-                    self.range_checker_chip.as_ref(),
-                    slice,
-                    &memory,
-                );
-            });
-
-        let matrix = RowMajorMatrix::new(flat_trace, OVERALL_WIDTH);
-        AirProofInput::simple_no_pis(matrix)
-    }
-}
-
-fn record_to_row<F: PrimeField32>(
-    record: JalRangeCheckRecord,
-    aux_cols_factory: &MemoryAuxColsFactory<F>,
-    range_checker_chip: &VariableRangeCheckerChip,
-    slice: &mut [F],
-    memory: &OfflineMemory<F>,
-) {
-    let a_record = memory.record_by_id(record.a_rw);
-    let col: &mut JalRangeCheckCols<_> = slice.borrow_mut();
-    col.is_jal = F::from_bool(record.is_jal);
-    col.is_range_check = F::from_bool(!record.is_jal);
-    col.a_pointer = a_record.pointer;
-    col.state = ExecutionState {
-        pc: F::from_canonical_u32(record.state.pc),
-        timestamp: F::from_canonical_u32(record.state.timestamp),
-    };
-    aux_cols_factory.generate_write_aux(a_record, &mut col.writes_aux);
-    col.b = F::from_canonical_u32(record.b);
-    if !record.is_jal {
-        let a_val = a_record.data_at(0);
-        let a_val_u32 = a_val.as_canonical_u32();
-        let y = a_val_u32 >> 16;
-        let x = a_val_u32 & ((1 << 16) - 1);
-        range_checker_chip.add_count(x, record.b as usize);
-        range_checker_chip.add_count(y, record.c as usize);
-        col.c = F::from_canonical_u32(record.c as u32);
-        col.y = F::from_canonical_u32(y);
-    }
-}
diff --git a/extensions/native/circuit/src/jal/tests.rs b/extensions/native/circuit/src/jal/tests.rs
deleted file mode 100644
index dd56b73c8f..0000000000
--- a/extensions/native/circuit/src/jal/tests.rs
+++ /dev/null
@@ -1,198 +0,0 @@
-use std::borrow::BorrowMut;
-
-use openvm_circuit::arch::{testing::VmChipTestBuilder, ExecutionBridge};
-use openvm_instructions::{
-    instruction::Instruction,
-    program::{DEFAULT_PC_STEP, PC_BITS},
-    LocalOpcode,
-};
-use openvm_native_compiler::{NativeJalOpcode::*, NativeRangeCheckOpcode::RANGE_CHECK};
-use openvm_stark_backend::{
-    p3_field::{FieldAlgebra, PrimeField32},
-    utils::disable_debug_builder,
-    verifier::VerificationError,
-    Chip,
-};
-use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::{rngs::StdRng, Rng};
-
-use crate::{jal::JalRangeCheckCols, JalRangeCheckChip};
-type F = BabyBear;
-
-fn set_and_execute(
-    tester: &mut VmChipTestBuilder<F>,
-    chip: &mut JalRangeCheckChip<F>,
-    rng: &mut StdRng,
-    initial_imm: Option<u32>,
-    initial_pc: Option<u32>,
-) {
-    let imm = initial_imm.unwrap_or(rng.gen_range(0..20));
-    let a = rng.gen_range(0..32) << 2;
-    let d = 4usize;
-
-    tester.execute_with_pc(
-        chip,
-        &Instruction::from_usize(JAL.global_opcode(), [a, imm as usize, 0, d, 0, 0, 0]),
-        initial_pc.unwrap_or(rng.gen_range(0..(1 << PC_BITS))),
-    );
-    let initial_pc = tester.execution.last_from_pc().as_canonical_u32();
-    let final_pc = tester.execution.last_to_pc().as_canonical_u32();
-
-    let next_pc = initial_pc + imm;
-    let rd_data = initial_pc + DEFAULT_PC_STEP;
-
-    assert_eq!(next_pc, final_pc);
-    assert_eq!(rd_data, tester.read::<1>(d, a)[0].as_canonical_u32());
-}
-
-struct RangeCheckTestCase {
-    val: u32,
-    x_bit: u32,
-    y_bit: u32,
-}
-
-fn set_and_execute_range_check(
-    tester: &mut VmChipTestBuilder<F>,
-    chip: &mut JalRangeCheckChip<F>,
-    rng: &mut StdRng,
-    test_cases: Vec<RangeCheckTestCase>,
-) {
-    let a = rng.gen_range(0..32) << 2;
-    for RangeCheckTestCase { val, x_bit, y_bit } in test_cases {
-        let d = 4usize;
-
-        tester.write_cell(d, a, F::from_canonical_u32(val));
-        tester.execute_with_pc(
-            chip,
-            &Instruction::from_usize(
-                RANGE_CHECK.global_opcode(),
-                [a, x_bit as usize, y_bit as usize, d, 0, 0, 0],
-            ),
-            rng.gen_range(0..(1 << PC_BITS)),
-        );
-    }
-}
-
-fn setup() -> (StdRng, VmChipTestBuilder<F>, JalRangeCheckChip<F>) {
-    let rng = create_seeded_rng();
-    let tester = VmChipTestBuilder::default();
-    let execution_bridge = ExecutionBridge::new(tester.execution_bus(), tester.program_bus());
-    let offline_memory = tester.offline_memory_mutex_arc();
-    let range_checker = tester.range_checker();
-    let chip = JalRangeCheckChip::<F>::new(execution_bridge, offline_memory, range_checker);
-    (rng, tester, chip)
-}
-
-#[test]
-fn rand_jal_test() {
-    let (mut rng, mut tester, mut chip) = setup();
-    let num_tests: usize = 100;
-    for _ in 0..num_tests {
-        set_and_execute(&mut tester, &mut chip, &mut rng, None, None);
-    }
-
-    let tester = tester.build().load(chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
-
-#[test]
-fn rand_range_check_test() {
-    let (mut rng, mut tester, mut chip) = setup();
-    let f = |x: u32, y: u32| RangeCheckTestCase {
-        val: x + y * (1 << 16),
-        x_bit: 32 - x.leading_zeros(),
-        y_bit: 32 - y.leading_zeros(),
-    };
-    let mut test_cases: Vec<_> = (0..10)
-        .map(|_| {
-            let x = 0;
-            let y = rng.gen_range(0..1 << 14);
-            f(x, y)
-        })
-        .collect();
-    test_cases.extend((0..10).map(|_| {
-        let x = rng.gen_range(0..1 << 16);
-        let y = 0;
-        f(x, y)
-    }));
-    test_cases.extend((0..10).map(|_| {
-        let x = rng.gen_range(0..1 << 16);
-        let y = rng.gen_range(0..1 << 14);
-        f(x, y)
-    }));
-    test_cases.push(f((1 << 16) - 1, (1 << 14) - 1));
-    set_and_execute_range_check(&mut tester, &mut chip, &mut rng, test_cases);
-    let tester = tester.build().load(chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
-
-#[test]
-fn negative_range_check_test() {
-    {
-        let (mut rng, mut tester, chip) = setup();
-        let mut chip = chip.with_debug();
-        set_and_execute_range_check(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            vec![RangeCheckTestCase {
-                x_bit: 1,
-                y_bit: 1,
-                val: 2,
-            }],
-        );
-        let tester = tester.build().load(chip).finalize();
-        disable_debug_builder();
-        let result = tester.simple_test();
-        assert!(result.is_err());
-    }
-    {
-        let (mut rng, mut tester, chip) = setup();
-        let mut chip = chip.with_debug();
-        set_and_execute_range_check(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            vec![RangeCheckTestCase {
-                x_bit: 1,
-                y_bit: 0,
-                val: 1 << 16,
-            }],
-        );
-        let tester = tester.build().load(chip).finalize();
-        disable_debug_builder();
-        let result = tester.simple_test();
-        assert!(result.is_err());
-    }
-}
-
-#[test]
-fn negative_jal_test() {
-    let (mut rng, mut tester, mut chip) = setup();
-    set_and_execute(&mut tester, &mut chip, &mut rng, None, None);
-
-    let tester = tester.build();
-
-    let chip_air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    let jal_trace = chip_input.raw.common_main.as_mut().unwrap();
-    {
-        let col: &mut JalRangeCheckCols<_> = jal_trace.row_mut(0).borrow_mut();
-        col.b = F::from_canonical_u32(rng.gen_range(1 << 11..1 << 12));
-    }
-    disable_debug_builder();
-    let tester = tester
-        .load_air_proof_input((chip_air, chip_input))
-        .finalize();
-    let msg = format!(
-        "Expected verification to fail with {:?}, but it didn't",
-        VerificationError::ChallengePhaseError
-    );
-    let result = tester.simple_test();
-    assert_eq!(
-        result.err(),
-        Some(VerificationError::ChallengePhaseError),
-        "{}",
-        msg
-    );
-}
diff --git a/extensions/native/circuit/src/jal_rangecheck/execution.rs b/extensions/native/circuit/src/jal_rangecheck/execution.rs
new file mode 100644
index 0000000000..f9cf17d7af
--- /dev/null
+++ b/extensions/native/circuit/src/jal_rangecheck/execution.rs
@@ -0,0 +1,229 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
+use openvm_native_compiler::{conversion::AS, NativeJalOpcode, NativeRangeCheckOpcode};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::JalRangeCheckExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct JalPreCompute<F> {
+    a: u32,
+    b: F,
+    return_pc: F,
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct RangeCheckPreCompute {
+    a: u32,
+    b: u8,
+    c: u8,
+}
+
+impl JalRangeCheckExecutor {
+    #[inline(always)]
+    fn pre_compute_jal_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        jal_data: &mut JalPreCompute<F>,
+    ) -> Result<(), StaticProgramError> {
+        let &Instruction { opcode, a, b, .. } = inst;
+
+        if opcode != NativeJalOpcode::JAL.global_opcode() {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let a = a.as_canonical_u32();
+        let return_pc = F::from_canonical_u32(pc.wrapping_add(DEFAULT_PC_STEP));
+
+        *jal_data = JalPreCompute { a, b, return_pc };
+        Ok(())
+    }
+
+    #[inline(always)]
+    fn pre_compute_range_check_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        range_check_data: &mut RangeCheckPreCompute,
+    ) -> Result<(), StaticProgramError> {
+        let &Instruction {
+            opcode, a, b, c, ..
+        } = inst;
+
+        if opcode != NativeRangeCheckOpcode::RANGE_CHECK.global_opcode() {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        let c = c.as_canonical_u32();
+        if b > 16 || c > 14 {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        *range_check_data = RangeCheckPreCompute {
+            a,
+            b: b as u8,
+            c: c as u8,
+        };
+        Ok(())
+    }
+}
+
+impl<F> Executor<F> for JalRangeCheckExecutor
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        std::cmp::max(
+            size_of::<JalPreCompute<F>>(),
+            size_of::<RangeCheckPreCompute>(),
+        )
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let &Instruction { opcode, .. } = inst;
+
+        let is_jal = opcode == NativeJalOpcode::JAL.global_opcode();
+
+        if is_jal {
+            let jal_data: &mut JalPreCompute<F> = data.borrow_mut();
+            self.pre_compute_jal_impl(pc, inst, jal_data)?;
+            Ok(execute_jal_e1_impl)
+        } else {
+            let range_check_data: &mut RangeCheckPreCompute = data.borrow_mut();
+            self.pre_compute_range_check_impl(pc, inst, range_check_data)?;
+            Ok(execute_range_check_e1_impl)
+        }
+    }
+}
+
+impl<F> MeteredExecutor<F> for JalRangeCheckExecutor
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        std::cmp::max(
+            size_of::<E2PreCompute<JalPreCompute<F>>>(),
+            size_of::<E2PreCompute<RangeCheckPreCompute>>(),
+        )
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let &Instruction { opcode, .. } = inst;
+
+        let is_jal = opcode == NativeJalOpcode::JAL.global_opcode();
+
+        if is_jal {
+            let pre_compute: &mut E2PreCompute<JalPreCompute<F>> = data.borrow_mut();
+            pre_compute.chip_idx = chip_idx as u32;
+
+            self.pre_compute_jal_impl(pc, inst, &mut pre_compute.data)?;
+            Ok(execute_jal_e2_impl)
+        } else {
+            let pre_compute: &mut E2PreCompute<RangeCheckPreCompute> = data.borrow_mut();
+            pre_compute.chip_idx = chip_idx as u32;
+
+            self.pre_compute_range_check_impl(pc, inst, &mut pre_compute.data)?;
+            Ok(execute_range_check_e2_impl)
+        }
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_jal_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &JalPreCompute<F>,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    vm_state.vm_write(AS::Native as u32, pre_compute.a, &[pre_compute.return_pc]);
+    // TODO(ayush): better way to do this
+    vm_state.pc = (F::from_canonical_u32(vm_state.pc) + pre_compute.b).as_canonical_u32();
+    vm_state.instret += 1;
+}
+
+#[inline(always)]
+unsafe fn execute_range_check_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &RangeCheckPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let [a_val]: [F; 1] = vm_state.host_read(AS::Native as u32, pre_compute.a);
+
+    vm_state.vm_write(AS::Native as u32, pre_compute.a, &[a_val]);
+    {
+        let a_val = a_val.as_canonical_u32();
+        let b = pre_compute.b;
+        let c = pre_compute.c;
+        let x = a_val & 0xffff;
+        let y = a_val >> 16;
+
+        // The range of `b`,`c` had already been checked in `pre_compute_e1`.
+        if !(x < (1 << b) && y < (1 << c)) {
+            vm_state.exit_code = Err(ExecutionError::Fail {
+                pc: vm_state.pc,
+                msg: "NativeRangeCheck",
+            });
+            return;
+        }
+    }
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_jal_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &JalPreCompute<F> = pre_compute.borrow();
+    execute_jal_e12_impl(pre_compute, vm_state);
+}
+
+unsafe fn execute_jal_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<JalPreCompute<F>> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_jal_e12_impl(&pre_compute.data, vm_state);
+}
+
+unsafe fn execute_range_check_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &RangeCheckPreCompute = pre_compute.borrow();
+    execute_range_check_e12_impl(pre_compute, vm_state);
+}
+
+unsafe fn execute_range_check_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<RangeCheckPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_range_check_e12_impl(&pre_compute.data, vm_state);
+}
diff --git a/extensions/native/circuit/src/jal_rangecheck/mod.rs b/extensions/native/circuit/src/jal_rangecheck/mod.rs
new file mode 100644
index 0000000000..894b1d3fee
--- /dev/null
+++ b/extensions/native/circuit/src/jal_rangecheck/mod.rs
@@ -0,0 +1,290 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    ops::Deref,
+};
+
+use openvm_circuit::{
+    arch::*,
+    system::{
+        memory::{
+            offline_checker::{MemoryBridge, MemoryWriteAuxCols, MemoryWriteAuxRecord},
+            online::TracingMemory,
+            MemoryAddress, MemoryAuxColsFactory,
+        },
+        native_adapter::util::{memory_read_native, tracing_write_native},
+    },
+};
+use openvm_circuit_primitives::{
+    var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+    AlignedBytesBorrow,
+};
+use openvm_circuit_primitives_derive::AlignedBorrow;
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
+use openvm_native_compiler::{conversion::AS, NativeJalOpcode, NativeRangeCheckOpcode};
+use openvm_stark_backend::{
+    interaction::InteractionBuilder,
+    p3_air::{Air, AirBuilder, BaseAir},
+    p3_field::{Field, FieldAlgebra, PrimeField32},
+    p3_matrix::Matrix,
+    rap::{BaseAirWithPublicValues, PartitionedBaseAir},
+};
+use static_assertions::const_assert_eq;
+use AS::Native;
+
+mod execution;
+
+#[cfg(test)]
+mod tests;
+
+#[repr(C)]
+#[derive(AlignedBorrow)]
+pub struct JalRangeCheckCols<T> {
+    is_jal: T,
+    is_range_check: T,
+    a_pointer: T,
+    state: ExecutionState<T>,
+    // Write when is_jal, read when is_range_check.
+    writes_aux: MemoryWriteAuxCols<T, 1>,
+    b: T,
+    // Only used by range check.
+    c: T,
+    // Only used by range check.
+    y: T,
+}
+
+const OVERALL_WIDTH: usize = JalRangeCheckCols::<u8>::width();
+const_assert_eq!(OVERALL_WIDTH, 12);
+
+#[derive(Copy, Clone, Debug, derive_new::new)]
+pub struct JalRangeCheckAir {
+    execution_bridge: ExecutionBridge,
+    memory_bridge: MemoryBridge,
+    range_bus: VariableRangeCheckerBus,
+}
+
+impl<F: Field> BaseAir<F> for JalRangeCheckAir {
+    fn width(&self) -> usize {
+        OVERALL_WIDTH
+    }
+}
+
+impl<F: Field> BaseAirWithPublicValues<F> for JalRangeCheckAir {}
+impl<F: Field> PartitionedBaseAir<F> for JalRangeCheckAir {}
+impl<AB: InteractionBuilder> Air<AB> for JalRangeCheckAir
+where
+    AB::F: PrimeField32,
+{
+    fn eval(&self, builder: &mut AB) {
+        let main = builder.main();
+        let local = main.row_slice(0);
+        let local_slice = local.deref();
+        let local: &JalRangeCheckCols<AB::Var> = local_slice.borrow();
+        builder.assert_bool(local.is_jal);
+        builder.assert_bool(local.is_range_check);
+        let is_valid = local.is_jal + local.is_range_check;
+        builder.assert_bool(is_valid.clone());
+
+        let d = AB::Expr::from_canonical_u32(Native as u32);
+        let a_val = local.writes_aux.prev_data()[0];
+        // if is_jal, write pc + DEFAULT_PC_STEP, else if is_range_check, read a_val.
+        let write_val = local.is_jal
+            * (local.state.pc + AB::Expr::from_canonical_u32(DEFAULT_PC_STEP))
+            + local.is_range_check * a_val;
+        self.memory_bridge
+            .write(
+                MemoryAddress::new(d.clone(), local.a_pointer),
+                [write_val],
+                local.state.timestamp,
+                &local.writes_aux,
+            )
+            .eval(builder, is_valid.clone());
+
+        let opcode = local.is_jal
+            * AB::F::from_canonical_usize(NativeJalOpcode::JAL.global_opcode().as_usize())
+            + local.is_range_check
+                * AB::F::from_canonical_usize(
+                    NativeRangeCheckOpcode::RANGE_CHECK
+                        .global_opcode()
+                        .as_usize(),
+                );
+        // Increment pc by b if is_jal, else by DEFAULT_PC_STEP if is_range_check.
+        let pc_inc = local.is_jal * local.b
+            + local.is_range_check * AB::F::from_canonical_u32(DEFAULT_PC_STEP);
+        builder.when(local.is_jal).assert_zero(local.c);
+        self.execution_bridge
+            .execute_and_increment_or_set_pc(
+                opcode,
+                [local.a_pointer.into(), local.b.into(), local.c.into(), d],
+                local.state,
+                AB::F::ONE,
+                PcIncOrSet::Inc(pc_inc),
+            )
+            .eval(builder, is_valid);
+
+        // Range check specific:
+        // a_val = x + y * (1 << 16)
+        let x = a_val - local.y * AB::Expr::from_canonical_u32(1 << 16);
+        self.range_bus
+            .send(x.clone(), local.b)
+            .eval(builder, local.is_range_check);
+        // Assert y < (1 << c), where c <= 14.
+        self.range_bus
+            .send(local.y, local.c)
+            .eval(builder, local.is_range_check);
+    }
+}
+
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct JalRangeCheckRecord<F> {
+    pub is_jal: bool,
+    pub a: F,
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+    pub write: MemoryWriteAuxRecord<F, 1>,
+    pub b: F,
+    pub c: F,
+}
+
+/// Chip for JAL and RANGE_CHECK. These opcodes are logically irrelevant. Putting these opcodes into
+/// the same chip is just to save columns.
+#[derive(derive_new::new, Clone, Copy)]
+pub struct JalRangeCheckExecutor;
+
+#[derive(derive_new::new)]
+pub struct JalRangeCheckFiller {
+    range_checker_chip: SharedVariableRangeCheckerChip,
+}
+pub type NativeJalRangeCheckChip<F> = VmChipWrapper<F, JalRangeCheckFiller>;
+
+impl<F, RA> PreflightExecutor<F, RA> for JalRangeCheckExecutor
+where
+    F: PrimeField32,
+    for<'buf> RA: RecordArena<'buf, EmptyMultiRowLayout, &'buf mut JalRangeCheckRecord<F>>,
+{
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        let jal_opcode = NativeJalOpcode::JAL.global_opcode().as_usize();
+        let range_check_opcode = NativeRangeCheckOpcode::RANGE_CHECK
+            .global_opcode()
+            .as_usize();
+        if opcode == jal_opcode {
+            return String::from("JAL");
+        }
+        if opcode == range_check_opcode {
+            return String::from("RANGE_CHECK");
+        }
+        panic!("Unknown opcode {opcode}");
+    }
+
+    fn execute(
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
+        instruction: &Instruction<F>,
+    ) -> Result<(), ExecutionError> {
+        let &Instruction {
+            opcode, a, b, c, ..
+        } = instruction;
+
+        debug_assert!(
+            opcode == NativeJalOpcode::JAL.global_opcode()
+                || opcode == NativeRangeCheckOpcode::RANGE_CHECK.global_opcode()
+        );
+
+        let record = state.ctx.alloc(EmptyMultiRowLayout::default());
+
+        record.from_pc = *state.pc;
+        record.from_timestamp = state.memory.timestamp;
+
+        record.a = a;
+        record.b = b;
+
+        if opcode == NativeJalOpcode::JAL.global_opcode() {
+            record.is_jal = true;
+            record.c = F::ZERO;
+
+            tracing_write_native(
+                state.memory,
+                a.as_canonical_u32(),
+                [F::from_canonical_u32(
+                    state.pc.wrapping_add(DEFAULT_PC_STEP),
+                )],
+                &mut record.write.prev_timestamp,
+                &mut record.write.prev_data,
+            );
+            *state.pc = (F::from_canonical_u32(*state.pc) + b).as_canonical_u32();
+        } else if opcode == NativeRangeCheckOpcode::RANGE_CHECK.global_opcode() {
+            record.is_jal = false;
+            record.c = c;
+
+            let a_ptr = a.as_canonical_u32();
+            let [a_val]: [F; 1] = memory_read_native(state.memory.data(), a_ptr);
+            tracing_write_native(
+                state.memory,
+                a_ptr,
+                [a_val],
+                &mut record.write.prev_timestamp,
+                &mut record.write.prev_data,
+            );
+            *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+        }
+
+        Ok(())
+    }
+}
+
+impl<F: PrimeField32> TraceFiller<F> for JalRangeCheckFiller {
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut row_slice: &mut [F]) {
+        let record: &mut JalRangeCheckRecord<F> =
+            unsafe { get_record_from_slice(&mut row_slice, ()) };
+        let cols: &mut JalRangeCheckCols<F> = row_slice.borrow_mut();
+
+        // Writing in reverse order to avoid overwriting the `record`
+        if record.is_jal {
+            cols.y = F::ZERO;
+            cols.c = F::ZERO;
+            cols.b = record.b;
+            cols.writes_aux.set_prev_data(record.write.prev_data);
+            mem_helper.fill(
+                record.write.prev_timestamp,
+                record.from_timestamp,
+                cols.writes_aux.as_mut(),
+            );
+            cols.state.timestamp = F::from_canonical_u32(record.from_timestamp);
+            cols.state.pc = F::from_canonical_u32(record.from_pc);
+            cols.a_pointer = record.a;
+            cols.is_range_check = F::ZERO;
+            cols.is_jal = F::ONE;
+        } else {
+            let a_val = record.write.prev_data[0].as_canonical_u32();
+            let b = record.b.as_canonical_u32();
+            let c = record.c.as_canonical_u32();
+            let x = a_val & 0xffff;
+            let y = a_val >> 16;
+            #[cfg(debug_assertions)]
+            {
+                assert!(b <= 16);
+                assert!(c <= 14);
+                assert!(x < (1 << b));
+                assert!(y < (1 << c));
+            }
+
+            self.range_checker_chip.add_count(x, b as usize);
+            self.range_checker_chip.add_count(y, c as usize);
+
+            cols.y = F::from_canonical_u32(y);
+            cols.c = record.c;
+            cols.b = record.b;
+            cols.writes_aux.set_prev_data(record.write.prev_data);
+            mem_helper.fill(
+                record.write.prev_timestamp,
+                record.from_timestamp,
+                cols.writes_aux.as_mut(),
+            );
+            cols.state.timestamp = F::from_canonical_u32(record.from_timestamp);
+            cols.state.pc = F::from_canonical_u32(record.from_pc);
+            cols.a_pointer = record.a;
+            cols.is_range_check = F::ONE;
+            cols.is_jal = F::ZERO;
+        }
+    }
+}
diff --git a/extensions/native/circuit/src/jal_rangecheck/tests.rs b/extensions/native/circuit/src/jal_rangecheck/tests.rs
new file mode 100644
index 0000000000..6109b9373d
--- /dev/null
+++ b/extensions/native/circuit/src/jal_rangecheck/tests.rs
@@ -0,0 +1,307 @@
+use std::borrow::BorrowMut;
+
+use openvm_circuit::arch::testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder};
+use openvm_instructions::{
+    instruction::Instruction,
+    program::{DEFAULT_PC_STEP, PC_BITS},
+    LocalOpcode, VmOpcode,
+};
+use openvm_native_compiler::{
+    conversion::AS, NativeJalOpcode::*, NativeRangeCheckOpcode::RANGE_CHECK,
+};
+use openvm_stark_backend::{
+    p3_field::{FieldAlgebra, PrimeField32},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
+    utils::disable_debug_builder,
+    verifier::VerificationError,
+};
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
+
+use super::{JalRangeCheckAir, JalRangeCheckExecutor};
+use crate::{
+    jal_rangecheck::{JalRangeCheckCols, NativeJalRangeCheckChip},
+    test_utils::write_native_array,
+    JalRangeCheckFiller,
+};
+
+const MAX_INS_CAPACITY: usize = 128;
+type F = BabyBear;
+type Harness =
+    TestChipHarness<F, JalRangeCheckExecutor, JalRangeCheckAir, NativeJalRangeCheckChip<F>>;
+
+fn create_test_chip(tester: &VmChipTestBuilder<F>) -> Harness {
+    let range_checker = tester.range_checker().clone();
+    let air = JalRangeCheckAir::new(
+        tester.execution_bridge(),
+        tester.memory_bridge(),
+        range_checker.bus(),
+    );
+    let executor = JalRangeCheckExecutor::new();
+    let chip = NativeJalRangeCheckChip::<F>::new(
+        JalRangeCheckFiller::new(range_checker),
+        tester.memory_helper(),
+    );
+
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
+}
+
+// `a_val` and `c` will be disregarded if opcode is JAL
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    opcode: VmOpcode,
+    a_val: Option<u32>,
+    b: Option<u32>,
+    c: Option<u32>,
+) {
+    if opcode == JAL.global_opcode() {
+        let initial_pc = rng.gen_range(0..(1 << PC_BITS));
+        let a = gen_pointer(rng, 1);
+        let final_pc = F::from_canonical_u32(rng.gen_range(0..(1 << PC_BITS)));
+        let b = b.unwrap_or((final_pc - F::from_canonical_u32(initial_pc)).as_canonical_u32());
+        tester.execute_with_pc(
+            harness,
+            &Instruction::from_usize(opcode, [a, b as usize, 0, AS::Native as usize, 0, 0, 0]),
+            initial_pc,
+        );
+
+        let final_pc = tester.execution.last_to_pc();
+        let expected_final_pc = F::from_canonical_u32(initial_pc) + F::from_canonical_u32(b);
+        assert_eq!(final_pc, expected_final_pc);
+        let result_a_val = tester.read::<1>(AS::Native as usize, a)[0].as_canonical_u32();
+        let expected_a_val = initial_pc + DEFAULT_PC_STEP;
+        assert_eq!(result_a_val, expected_a_val);
+    } else {
+        let a_val = a_val.unwrap_or(rng.gen_range(0..(1 << 30)));
+        let a = write_native_array(tester, rng, Some([F::from_canonical_u32(a_val)])).1;
+        let x = a_val & 0xffff;
+        let y = a_val >> 16;
+
+        let min_b = 32 - x.leading_zeros();
+        let min_c = 32 - y.leading_zeros();
+        let b = b.unwrap_or(rng.gen_range(min_b..=16));
+        let c = c.unwrap_or(rng.gen_range(min_c..=14));
+        tester.execute(
+            harness,
+            &Instruction::from_usize(
+                opcode,
+                [a, b as usize, c as usize, AS::Native as usize, 0, 0, 0],
+            ),
+        );
+        // There is nothing to assert for range check since it doesn't write to the memory
+    };
+}
+
+///////////////////////////////////////////////////////////////////////////////////////
+/// POSITIVE TESTS
+///
+/// Randomly generate computations and execute, ensuring that the generated trace
+/// passes all constraints.
+///////////////////////////////////////////////////////////////////////////////////////
+
+#[test_case(JAL.global_opcode(), 100)]
+#[test_case(RANGE_CHECK.global_opcode(), 100)]
+fn rand_jal_range_check_test(opcode: VmOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
+
+    for _ in 0..num_ops {
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode,
+            None,
+            None,
+            None,
+        );
+    }
+    let tester = tester.build().load(harness).finalize();
+    tester.simple_test().expect("Verification failed");
+}
+
+#[test]
+fn range_check_edge_cases_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        RANGE_CHECK.global_opcode(),
+        Some(0),
+        None,
+        None,
+    );
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        RANGE_CHECK.global_opcode(),
+        Some((1 << 30) - 1),
+        None,
+        None,
+    );
+
+    // x = 0
+    let a = rng.gen_range(0..(1 << 14)) << 16;
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        RANGE_CHECK.global_opcode(),
+        Some(a),
+        None,
+        None,
+    );
+
+    // y = 0
+    let a = rng.gen_range(0..(1 << 16));
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        RANGE_CHECK.global_opcode(),
+        Some(a),
+        None,
+        None,
+    );
+
+    let tester = tester.build().load(harness).finalize();
+    tester.simple_test().expect("Verification failed");
+}
+
+//////////////////////////////////////////////////////////////////////////////////////
+// NEGATIVE TESTS
+//
+// Given a fake trace of a single operation, setup a chip and run the test. We replace
+// part of the trace and check that the chip throws the expected error.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#[derive(Clone, Copy, Default)]
+struct JalRangeCheckPrankValues {
+    pub flags: Option<[bool; 2]>,
+    pub a_val: Option<u32>,
+    pub b: Option<u32>,
+    pub c: Option<u32>,
+    pub y: Option<u32>,
+}
+
+fn run_negative_jal_range_check_test(
+    opcode: VmOpcode,
+    a_val: Option<u32>,
+    b: Option<u32>,
+    c: Option<u32>,
+    prank_vals: JalRangeCheckPrankValues,
+    error: VerificationError,
+) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip(&tester);
+    set_and_execute(&mut tester, &mut harness, &mut rng, opcode, a_val, b, c);
+
+    let modify_trace = |trace: &mut DenseMatrix<F>| {
+        let mut values = trace.row_slice(0).to_vec();
+        let cols: &mut JalRangeCheckCols<F> = values[..].borrow_mut();
+
+        if let Some(flags) = prank_vals.flags {
+            cols.is_jal = F::from_bool(flags[0]);
+            cols.is_range_check = F::from_bool(flags[1]);
+        }
+        if let Some(a_val) = prank_vals.a_val {
+            cols.writes_aux
+                .set_prev_data([F::from_canonical_u32(a_val)]);
+        }
+
+        if let Some(b) = prank_vals.b {
+            cols.b = F::from_canonical_u32(b);
+        }
+        if let Some(c) = prank_vals.c {
+            cols.c = F::from_canonical_u32(c);
+        }
+        if let Some(y) = prank_vals.y {
+            cols.y = F::from_canonical_u32(y);
+        }
+
+        *trace = RowMajorMatrix::new(values, trace.width());
+    };
+
+    disable_debug_builder();
+    let tester = tester
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .finalize();
+    tester.simple_test_with_expected_error(error);
+}
+
+#[test]
+fn negative_range_check_test() {
+    run_negative_jal_range_check_test(
+        RANGE_CHECK.global_opcode(),
+        Some(2),
+        Some(2),
+        Some(1),
+        JalRangeCheckPrankValues {
+            b: Some(1),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
+    run_negative_jal_range_check_test(
+        RANGE_CHECK.global_opcode(),
+        Some(1 << 16),
+        None,
+        None,
+        JalRangeCheckPrankValues {
+            c: Some(0),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
+    run_negative_jal_range_check_test(
+        RANGE_CHECK.global_opcode(),
+        Some((1 << 30) - 1),
+        None,
+        None,
+        JalRangeCheckPrankValues {
+            a_val: Some(1 << 30),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
+    run_negative_jal_range_check_test(
+        RANGE_CHECK.global_opcode(),
+        Some(1 << 17),
+        None,
+        None,
+        JalRangeCheckPrankValues {
+            y: Some(1),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
+}
+
+#[test]
+fn negative_jal_test() {
+    run_negative_jal_range_check_test(
+        JAL.global_opcode(),
+        None,
+        None,
+        None,
+        JalRangeCheckPrankValues {
+            b: Some(0),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
+}
diff --git a/extensions/native/circuit/src/lib.rs b/extensions/native/circuit/src/lib.rs
index 46c6bc890f..01c0d0ba5b 100644
--- a/extensions/native/circuit/src/lib.rs
+++ b/extensions/native/circuit/src/lib.rs
@@ -1,3 +1,22 @@
+use openvm_circuit::{
+    arch::{
+        AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, MemoryConfig,
+        SystemConfig, VmBuilder, VmChipComplex, VmProverExtension,
+    },
+    system::{SystemChipInventory, SystemCpuBuilder, SystemExecutor},
+};
+use openvm_circuit_derive::VmConfig;
+use openvm_rv32im_circuit::{
+    Rv32I, Rv32IExecutor, Rv32ImCpuProverExt, Rv32Io, Rv32IoExecutor, Rv32M, Rv32MExecutor,
+};
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use openvm_stark_sdk::engine::StarkEngine;
+use serde::{Deserialize, Serialize};
+
 pub mod adapters;
 
 mod branch_eq;
@@ -5,7 +24,7 @@ mod castf;
 mod field_arithmetic;
 mod field_extension;
 mod fri;
-mod jal;
+mod jal_rangecheck;
 mod loadstore;
 mod poseidon2;
 
@@ -14,7 +33,7 @@ pub use castf::*;
 pub use field_arithmetic::*;
 pub use field_extension::*;
 pub use fri::*;
-pub use jal::*;
+pub use jal_rangecheck::*;
 pub use loadstore::*;
 pub use poseidon2::*;
 
@@ -22,4 +41,134 @@ mod extension;
 pub use extension::*;
 
 mod utils;
-pub use utils::*;
+#[cfg(any(test, feature = "test-utils"))]
+pub use utils::test_utils::*;
+pub(crate) use utils::*;
+
+#[derive(Clone, Debug, derive_new::new, VmConfig, Serialize, Deserialize)]
+pub struct NativeConfig {
+    #[config(executor = "SystemExecutor<F>")]
+    pub system: SystemConfig,
+    #[extension(generics = true)]
+    pub native: Native,
+}
+
+impl NativeConfig {
+    pub fn aggregation(num_public_values: usize, max_constraint_degree: usize) -> Self {
+        Self {
+            system: SystemConfig::new(
+                max_constraint_degree,
+                MemoryConfig::aggregation(),
+                num_public_values,
+            )
+            .without_continuations()
+            .with_max_segment_len((1 << 24) - 100),
+            native: Default::default(),
+        }
+    }
+}
+
+// Default implementation uses no init file
+impl InitFileGenerator for NativeConfig {}
+
+#[derive(Clone, Default)]
+pub struct NativeCpuBuilder;
+
+impl<E, SC> VmBuilder<E> for NativeCpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = NativeConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &NativeConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &NativeCpuProverExt,
+            &config.native,
+            inventory,
+        )?;
+        Ok(chip_complex)
+    }
+}
+
+#[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
+pub struct Rv32WithKernelsConfig {
+    #[config(executor = "SystemExecutor<F>")]
+    pub system: SystemConfig,
+    #[extension]
+    pub rv32i: Rv32I,
+    #[extension]
+    pub rv32m: Rv32M,
+    #[extension]
+    pub io: Rv32Io,
+    #[extension(generics = true)]
+    pub native: Native,
+    #[extension]
+    pub castf: CastFExtension,
+}
+
+impl Default for Rv32WithKernelsConfig {
+    fn default() -> Self {
+        Self {
+            system: SystemConfig::default(),
+            rv32i: Rv32I,
+            rv32m: Rv32M::default(),
+            io: Rv32Io,
+            native: Native,
+            castf: CastFExtension,
+        }
+    }
+}
+
+// Default implementation uses no init file
+impl InitFileGenerator for Rv32WithKernelsConfig {}
+
+#[derive(Clone)]
+pub struct Rv32WithKernelsCpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Rv32WithKernelsCpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Rv32WithKernelsConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Rv32WithKernelsConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32i, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32m, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.io, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &NativeCpuProverExt,
+            &config.native,
+            inventory,
+        )?;
+        VmProverExtension::<E, _, _>::extend_prover(&NativeCpuProverExt, &config.castf, inventory)?;
+        Ok(chip_complex)
+    }
+}
diff --git a/extensions/native/circuit/src/loadstore/core.rs b/extensions/native/circuit/src/loadstore/core.rs
index 094a57dccc..997a57c55d 100644
--- a/extensions/native/circuit/src/loadstore/core.rs
+++ b/extensions/native/circuit/src/loadstore/core.rs
@@ -1,15 +1,15 @@
 use std::{
     array,
     borrow::{Borrow, BorrowMut},
-    sync::{Arc, Mutex, OnceLock},
 };
 
-use openvm_circuit::arch::{
-    instructions::LocalOpcode, AdapterAirContext, AdapterRuntimeContext, ExecutionError, Result,
-    Streams, VmAdapterInterface, VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::instruction::Instruction;
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_native_compiler::NativeLoadStoreOpcode;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -17,11 +17,9 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
 use strum::IntoEnumIterator;
 
-use super::super::adapters::loadstore_native_adapter::NativeLoadStoreInstruction;
+use crate::adapters::NativeLoadStoreInstruction;
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -34,17 +32,7 @@ pub struct NativeLoadStoreCoreCols<T, const NUM_CELLS: usize> {
     pub data: [T; NUM_CELLS],
 }
 
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct NativeLoadStoreCoreRecord<F, const NUM_CELLS: usize> {
-    pub opcode: NativeLoadStoreOpcode,
-
-    pub pointer_read: F,
-    #[serde(with = "BigArray")]
-    pub data: [F; NUM_CELLS],
-}
-
-#[derive(Clone, Debug)]
+#[derive(Clone, Debug, derive_new::new)]
 pub struct NativeLoadStoreCoreAir<const NUM_CELLS: usize> {
     pub offset: usize,
 }
@@ -113,89 +101,106 @@ where
     }
 }
 
-pub struct NativeLoadStoreCoreChip<F: Field, const NUM_CELLS: usize> {
-    pub air: NativeLoadStoreCoreAir<NUM_CELLS>,
-    pub streams: OnceLock<Arc<Mutex<Streams<F>>>>,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct NativeLoadStoreCoreRecord<F, const NUM_CELLS: usize> {
+    pub pointer_read: F,
+    pub data: [F; NUM_CELLS],
+    pub local_opcode: u8,
 }
 
-impl<F: Field, const NUM_CELLS: usize> NativeLoadStoreCoreChip<F, NUM_CELLS> {
-    pub fn new(offset: usize) -> Self {
-        Self {
-            air: NativeLoadStoreCoreAir::<NUM_CELLS> { offset },
-            streams: OnceLock::new(),
-        }
-    }
-    pub fn set_streams(&mut self, streams: Arc<Mutex<Streams<F>>>) {
-        self.streams
-            .set(streams)
-            .map_err(|_| "streams have already been set.")
-            .unwrap();
-    }
+#[derive(derive_new::new, Debug, Clone, Copy)]
+pub struct NativeLoadStoreCoreExecutor<A, const NUM_CELLS: usize> {
+    adapter: A,
+    pub(crate) offset: usize,
 }
 
-impl<F: Field, const NUM_CELLS: usize> Default for NativeLoadStoreCoreChip<F, NUM_CELLS> {
-    fn default() -> Self {
-        Self::new(NativeLoadStoreOpcode::CLASS_OFFSET)
-    }
+#[derive(derive_new::new)]
+pub struct NativeLoadStoreCoreFiller<A, const NUM_CELLS: usize> {
+    adapter: A,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_CELLS: usize> VmCoreChip<F, I>
-    for NativeLoadStoreCoreChip<F, NUM_CELLS>
+impl<F, A, RA, const NUM_CELLS: usize> PreflightExecutor<F, RA>
+    for NativeLoadStoreCoreExecutor<A, NUM_CELLS>
 where
-    I::Reads: Into<(F, [F; NUM_CELLS])>,
-    I::Writes: From<[F; NUM_CELLS]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<F, ReadData = (F, [F; NUM_CELLS]), WriteData = [F; NUM_CELLS]>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut NativeLoadStoreCoreRecord<F, NUM_CELLS>,
+        ),
+    >,
 {
-    type Record = NativeLoadStoreCoreRecord<F, NUM_CELLS>;
-    type Air = NativeLoadStoreCoreAir<NUM_CELLS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            NativeLoadStoreOpcode::from_usize(opcode - self.offset)
+        )
+    }
 
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let Instruction { opcode, .. } = *instruction;
-        let local_opcode =
-            NativeLoadStoreOpcode::from_usize(opcode.local_opcode_idx(self.air.offset));
-        let (pointer_read, data_read) = reads.into();
-
-        let data = if local_opcode == NativeLoadStoreOpcode::HINT_STOREW {
-            let mut streams = self.streams.get().unwrap().lock().unwrap();
-            if streams.hint_stream.len() < NUM_CELLS {
-                return Err(ExecutionError::HintOutOfBounds { pc: from_pc });
+    ) -> Result<(), ExecutionError> {
+        let &Instruction { opcode, .. } = instruction;
+
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        let (pointer_read, data_read) =
+            self.adapter
+                .read(state.memory, instruction, &mut adapter_record);
+
+        core_record.local_opcode = opcode.local_opcode_idx(self.offset) as u8;
+        let opcode = NativeLoadStoreOpcode::from_usize(core_record.local_opcode as usize);
+
+        let data = if opcode == NativeLoadStoreOpcode::HINT_STOREW {
+            if state.streams.hint_stream.len() < NUM_CELLS {
+                return Err(ExecutionError::HintOutOfBounds { pc: *state.pc });
             }
-            array::from_fn(|_| streams.hint_stream.pop_front().unwrap())
+            array::from_fn(|_| state.streams.hint_stream.pop_front().unwrap())
         } else {
             data_read
         };
 
-        let output = AdapterRuntimeContext::without_pc(data);
-        let record = NativeLoadStoreCoreRecord {
-            opcode: NativeLoadStoreOpcode::from_usize(opcode.local_opcode_idx(self.air.offset)),
-            pointer_read,
-            data,
-        };
-        Ok((output, record))
-    }
+        self.adapter
+            .write(state.memory, instruction, data, &mut adapter_record);
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            NativeLoadStoreOpcode::from_usize(opcode - self.air.offset)
-        )
-    }
+        core_record.pointer_read = pointer_read;
+        core_record.data = data;
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let cols: &mut NativeLoadStoreCoreCols<_, NUM_CELLS> = row_slice.borrow_mut();
-        cols.is_loadw = F::from_bool(record.opcode == NativeLoadStoreOpcode::LOADW);
-        cols.is_storew = F::from_bool(record.opcode == NativeLoadStoreOpcode::STOREW);
-        cols.is_hint_storew = F::from_bool(record.opcode == NativeLoadStoreOpcode::HINT_STOREW);
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
 
-        cols.pointer_read = record.pointer_read;
-        cols.data = record.data;
+        Ok(())
     }
+}
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+impl<F, A, const NUM_CELLS: usize> TraceFiller<F> for NativeLoadStoreCoreFiller<A, NUM_CELLS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: &NativeLoadStoreCoreRecord<F, NUM_CELLS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut NativeLoadStoreCoreCols<F, NUM_CELLS> = core_row.borrow_mut();
+
+        let opcode = NativeLoadStoreOpcode::from_usize(record.local_opcode as usize);
+
+        // Writing in reverse order to avoid overwriting the `record`
+        core_row.data = record.data;
+        core_row.pointer_read = record.pointer_read;
+        core_row.is_hint_storew = F::from_bool(opcode == NativeLoadStoreOpcode::HINT_STOREW);
+        core_row.is_storew = F::from_bool(opcode == NativeLoadStoreOpcode::STOREW);
+        core_row.is_loadw = F::from_bool(opcode == NativeLoadStoreOpcode::LOADW);
     }
 }
diff --git a/extensions/native/circuit/src/loadstore/execution.rs b/extensions/native/circuit/src/loadstore/execution.rs
new file mode 100644
index 0000000000..a31efb831e
--- /dev/null
+++ b/extensions/native/circuit/src/loadstore/execution.rs
@@ -0,0 +1,246 @@
+use std::{
+    array,
+    borrow::{Borrow, BorrowMut},
+};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
+use openvm_native_compiler::{conversion::AS, NativeLoadStoreOpcode};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::NativeLoadStoreCoreExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct NativeLoadStorePreCompute<F> {
+    a: u32,
+    b: F,
+    c: u32,
+}
+
+impl<A, const NUM_CELLS: usize> NativeLoadStoreCoreExecutor<A, NUM_CELLS> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut NativeLoadStorePreCompute<F>,
+    ) -> Result<NativeLoadStoreOpcode, StaticProgramError> {
+        let &Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+
+        let local_opcode = NativeLoadStoreOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+
+        let a = a.as_canonical_u32();
+        let c = c.as_canonical_u32();
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+
+        if d != AS::Native as u32 || e != AS::Native as u32 {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        *data = NativeLoadStorePreCompute { a, b, c };
+
+        Ok(local_opcode)
+    }
+}
+
+impl<F, A, const NUM_CELLS: usize> Executor<F> for NativeLoadStoreCoreExecutor<A, NUM_CELLS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<NativeLoadStorePreCompute<F>>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut NativeLoadStorePreCompute<F> = data.borrow_mut();
+
+        let local_opcode = self.pre_compute_impl(pc, inst, pre_compute)?;
+
+        let fn_ptr = match local_opcode {
+            NativeLoadStoreOpcode::LOADW => execute_e1_loadw::<F, Ctx, NUM_CELLS>,
+            NativeLoadStoreOpcode::STOREW => execute_e1_storew::<F, Ctx, NUM_CELLS>,
+            NativeLoadStoreOpcode::HINT_STOREW => execute_e1_hint_storew::<F, Ctx, NUM_CELLS>,
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const NUM_CELLS: usize> MeteredExecutor<F> for NativeLoadStoreCoreExecutor<A, NUM_CELLS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<NativeLoadStorePreCompute<F>>>()
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut E2PreCompute<NativeLoadStorePreCompute<F>> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+
+        let fn_ptr = match local_opcode {
+            NativeLoadStoreOpcode::LOADW => execute_e2_loadw::<F, Ctx, NUM_CELLS>,
+            NativeLoadStoreOpcode::STOREW => execute_e2_storew::<F, Ctx, NUM_CELLS>,
+            NativeLoadStoreOpcode::HINT_STOREW => execute_e2_hint_storew::<F, Ctx, NUM_CELLS>,
+        };
+
+        Ok(fn_ptr)
+    }
+}
+
+unsafe fn execute_e1_loadw<F: PrimeField32, CTX: ExecutionCtxTrait, const NUM_CELLS: usize>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &NativeLoadStorePreCompute<F> = pre_compute.borrow();
+    execute_e12_loadw::<_, _, NUM_CELLS>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e1_storew<F: PrimeField32, CTX: ExecutionCtxTrait, const NUM_CELLS: usize>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &NativeLoadStorePreCompute<F> = pre_compute.borrow();
+    execute_e12_storew::<_, _, NUM_CELLS>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e1_hint_storew<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const NUM_CELLS: usize,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &NativeLoadStorePreCompute<F> = pre_compute.borrow();
+    execute_e12_hint_storew::<_, _, NUM_CELLS>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_loadw<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const NUM_CELLS: usize,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<NativeLoadStorePreCompute<F>> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_loadw::<_, _, NUM_CELLS>(&pre_compute.data, vm_state);
+}
+
+unsafe fn execute_e2_storew<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const NUM_CELLS: usize,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<NativeLoadStorePreCompute<F>> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_storew::<_, _, NUM_CELLS>(&pre_compute.data, vm_state);
+}
+
+unsafe fn execute_e2_hint_storew<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const NUM_CELLS: usize,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<NativeLoadStorePreCompute<F>> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_hint_storew::<_, _, NUM_CELLS>(&pre_compute.data, vm_state);
+}
+
+#[inline(always)]
+unsafe fn execute_e12_loadw<F: PrimeField32, CTX: ExecutionCtxTrait, const NUM_CELLS: usize>(
+    pre_compute: &NativeLoadStorePreCompute<F>,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let [read_cell]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.c);
+
+    let data_read_ptr = (read_cell + pre_compute.b).as_canonical_u32();
+    let data_read: [F; NUM_CELLS] = vm_state.vm_read(AS::Native as u32, data_read_ptr);
+
+    vm_state.vm_write(AS::Native as u32, pre_compute.a, &data_read);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+#[inline(always)]
+unsafe fn execute_e12_storew<F: PrimeField32, CTX: ExecutionCtxTrait, const NUM_CELLS: usize>(
+    pre_compute: &NativeLoadStorePreCompute<F>,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let [read_cell]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.c);
+    let data_read: [F; NUM_CELLS] = vm_state.vm_read(AS::Native as u32, pre_compute.a);
+
+    let data_write_ptr = (read_cell + pre_compute.b).as_canonical_u32();
+    vm_state.vm_write(AS::Native as u32, data_write_ptr, &data_read);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+#[inline(always)]
+unsafe fn execute_e12_hint_storew<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const NUM_CELLS: usize,
+>(
+    pre_compute: &NativeLoadStorePreCompute<F>,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let [read_cell]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.c);
+
+    if vm_state.streams.hint_stream.len() < NUM_CELLS {
+        vm_state.exit_code = Err(ExecutionError::HintOutOfBounds { pc: vm_state.pc });
+        return;
+    }
+    let data: [F; NUM_CELLS] =
+        array::from_fn(|_| vm_state.streams.hint_stream.pop_front().unwrap());
+
+    let data_write_ptr = (read_cell + pre_compute.b).as_canonical_u32();
+    vm_state.vm_write(AS::Native as u32, data_write_ptr, &data);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
diff --git a/extensions/native/circuit/src/loadstore/mod.rs b/extensions/native/circuit/src/loadstore/mod.rs
index 3dd51113a9..7674e54d36 100644
--- a/extensions/native/circuit/src/loadstore/mod.rs
+++ b/extensions/native/circuit/src/loadstore/mod.rs
@@ -1,19 +1,19 @@
 use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-#[cfg(test)]
-mod tests;
+use crate::adapters::{
+    NativeLoadStoreAdapterAir, NativeLoadStoreAdapterExecutor, NativeLoadStoreAdapterFiller,
+};
 
 mod core;
+mod execution;
 pub use core::*;
 
-use super::adapters::loadstore_native_adapter::{
-    NativeLoadStoreAdapterAir, NativeLoadStoreAdapterChip,
-};
+#[cfg(test)]
+mod tests;
 
 pub type NativeLoadStoreAir<const NUM_CELLS: usize> =
     VmAirWrapper<NativeLoadStoreAdapterAir<NUM_CELLS>, NativeLoadStoreCoreAir<NUM_CELLS>>;
-pub type NativeLoadStoreChip<F, const NUM_CELLS: usize> = VmChipWrapper<
-    F,
-    NativeLoadStoreAdapterChip<F, NUM_CELLS>,
-    NativeLoadStoreCoreChip<F, NUM_CELLS>,
->;
+pub type NativeLoadStoreExecutor<const NUM_CELLS: usize> =
+    NativeLoadStoreCoreExecutor<NativeLoadStoreAdapterExecutor<NUM_CELLS>, NUM_CELLS>;
+pub type NativeLoadStoreChip<F, const NUM_CELLS: usize> =
+    VmChipWrapper<F, NativeLoadStoreCoreFiller<NativeLoadStoreAdapterFiller<NUM_CELLS>, NUM_CELLS>>;
diff --git a/extensions/native/circuit/src/loadstore/tests.rs b/extensions/native/circuit/src/loadstore/tests.rs
index cd653c2fc0..9c4c5ee587 100644
--- a/extensions/native/circuit/src/loadstore/tests.rs
+++ b/extensions/native/circuit/src/loadstore/tests.rs
@@ -1,175 +1,258 @@
-use std::sync::{Arc, Mutex};
+use std::{array, borrow::BorrowMut};
 
-use openvm_circuit::arch::{testing::VmChipTestBuilder, Streams};
+use openvm_circuit::arch::testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder};
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_native_compiler::NativeLoadStoreOpcode::{self, *};
-use openvm_stark_backend::p3_field::{FieldAlgebra, PrimeField32};
-use openvm_stark_sdk::{config::setup_tracing, p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use openvm_native_compiler::{
+    conversion::AS,
+    NativeLoadStoreOpcode::{self, *},
+};
+use openvm_stark_backend::{
+    p3_air::BaseAir,
+    p3_field::{FieldAlgebra, PrimeField32},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
+    utils::disable_debug_builder,
+    verifier::VerificationError,
+};
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::{
-    super::adapters::loadstore_native_adapter::NativeLoadStoreAdapterChip, NativeLoadStoreChip,
-    NativeLoadStoreCoreChip,
+use super::{NativeLoadStoreChip, NativeLoadStoreCoreAir};
+use crate::{
+    adapters::{
+        NativeLoadStoreAdapterAir, NativeLoadStoreAdapterCols, NativeLoadStoreAdapterExecutor,
+        NativeLoadStoreAdapterFiller,
+    },
+    test_utils::write_native_array,
+    NativeLoadStoreAir, NativeLoadStoreCoreCols, NativeLoadStoreCoreFiller,
+    NativeLoadStoreExecutor,
 };
 
+const MAX_INS_CAPACITY: usize = 128;
 type F = BabyBear;
+type Harness<const NUM_CELLS: usize> = TestChipHarness<
+    F,
+    NativeLoadStoreExecutor<NUM_CELLS>,
+    NativeLoadStoreAir<NUM_CELLS>,
+    NativeLoadStoreChip<F, NUM_CELLS>,
+>;
+
+fn create_test_chip<const NUM_CELLS: usize>(tester: &VmChipTestBuilder<F>) -> Harness<NUM_CELLS> {
+    let air = NativeLoadStoreAir::new(
+        NativeLoadStoreAdapterAir::new(tester.memory_bridge(), tester.execution_bridge()),
+        NativeLoadStoreCoreAir::new(NativeLoadStoreOpcode::CLASS_OFFSET),
+    );
+    let executor = NativeLoadStoreExecutor::new(
+        NativeLoadStoreAdapterExecutor::new(NativeLoadStoreOpcode::CLASS_OFFSET),
+        NativeLoadStoreOpcode::CLASS_OFFSET,
+    );
+    let chip = NativeLoadStoreChip::<F, NUM_CELLS>::new(
+        NativeLoadStoreCoreFiller::new(NativeLoadStoreAdapterFiller),
+        tester.memory_helper(),
+    );
 
-#[derive(Debug)]
-struct TestData {
-    a: F,
-    b: F,
-    c: F,
-    d: F,
-    e: F,
-    ad_val: F,
-    cd_val: F,
-    data_val: F,
-    is_load: bool,
-    is_hint: bool,
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
 }
 
-fn setup() -> (StdRng, VmChipTestBuilder<F>, NativeLoadStoreChip<F, 1>) {
-    let rng = create_seeded_rng();
-    let tester = VmChipTestBuilder::default();
+fn set_and_execute<const NUM_CELLS: usize>(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness<NUM_CELLS>,
+    rng: &mut StdRng,
+    opcode: NativeLoadStoreOpcode,
+) {
+    let a = gen_pointer(rng, NUM_CELLS);
+    let ([c_val], c) = write_native_array(tester, rng, None);
+
+    let mem_ptr = gen_pointer(rng, NUM_CELLS);
+    let b = F::from_canonical_usize(mem_ptr) - c_val;
+    let data: [F; NUM_CELLS] = array::from_fn(|_| rng.gen());
 
-    let adapter = NativeLoadStoreAdapterChip::<F, 1>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        NativeLoadStoreOpcode::CLASS_OFFSET,
+    match opcode {
+        LOADW => {
+            tester.write(AS::Native as usize, mem_ptr, data);
+        }
+        STOREW => {
+            tester.write(AS::Native as usize, a, data);
+        }
+        HINT_STOREW => {
+            tester.streams.hint_stream.extend(data);
+        }
+    }
+
+    tester.execute(
+        harness,
+        &Instruction::from_usize(
+            opcode.global_opcode(),
+            [
+                a,
+                b.as_canonical_u32() as usize,
+                c,
+                AS::Native as usize,
+                AS::Native as usize,
+            ],
+        ),
     );
-    let mut inner = NativeLoadStoreCoreChip::new(NativeLoadStoreOpcode::CLASS_OFFSET);
-    inner.set_streams(Arc::new(Mutex::new(Streams::default())));
-    let chip = NativeLoadStoreChip::<F, 1>::new(adapter, inner, tester.offline_memory_mutex_arc());
-    (rng, tester, chip)
+
+    let result = match opcode {
+        STOREW | HINT_STOREW => tester.read(AS::Native as usize, mem_ptr),
+        LOADW => tester.read(AS::Native as usize, a),
+    };
+    assert_eq!(result, data);
 }
 
-fn gen_test_data(rng: &mut StdRng, opcode: NativeLoadStoreOpcode) -> TestData {
-    let is_load = matches!(opcode, NativeLoadStoreOpcode::LOADW);
-
-    let a = rng.gen_range(0..1 << 20);
-    let b = rng.gen_range(0..1 << 20);
-    let c = rng.gen_range(0..1 << 20);
-    let d = F::from_canonical_u32(4u32);
-    let e = F::from_canonical_u32(4u32);
-
-    TestData {
-        a: F::from_canonical_u32(a),
-        b: F::from_canonical_u32(b),
-        c: F::from_canonical_u32(c),
-        d,
-        e,
-        ad_val: F::from_canonical_u32(111),
-        cd_val: F::from_canonical_u32(222),
-        data_val: F::from_canonical_u32(444),
-        is_load,
-        is_hint: matches!(opcode, NativeLoadStoreOpcode::HINT_STOREW),
+///////////////////////////////////////////////////////////////////////////////////////
+/// POSITIVE TESTS
+///
+/// Randomly generate computations and execute, ensuring that the generated trace
+/// passes all constraints.
+///////////////////////////////////////////////////////////////////////////////////////
+
+#[test_case(STOREW, 100)]
+#[test_case(HINT_STOREW, 100)]
+#[test_case(LOADW, 100)]
+fn rand_native_loadstore_test_1(opcode: NativeLoadStoreOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip::<1>(&tester);
+
+    for _ in 0..num_ops {
+        set_and_execute(&mut tester, &mut harness, &mut rng, opcode);
     }
+    let tester = tester.build().load(harness).finalize();
+    tester.simple_test().expect("Verification failed");
 }
 
-fn get_data_pointer(data: &TestData) -> F {
-    if data.d != F::ZERO {
-        data.cd_val + data.b
-    } else {
-        data.c + data.b
+#[test_case(STOREW, 100)]
+#[test_case(HINT_STOREW, 100)]
+#[test_case(LOADW, 100)]
+fn rand_native_loadstore_test_4(opcode: NativeLoadStoreOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip::<4>(&tester);
+
+    for _ in 0..num_ops {
+        set_and_execute(&mut tester, &mut harness, &mut rng, opcode);
     }
+    let tester = tester.build().load(harness).finalize();
+    tester.simple_test().expect("Verification failed");
 }
 
-fn set_values(
-    tester: &mut VmChipTestBuilder<F>,
-    chip: &mut NativeLoadStoreChip<F, 1>,
-    data: &TestData,
-) {
-    if data.d != F::ZERO {
-        tester.write(
-            data.d.as_canonical_u32() as usize,
-            data.a.as_canonical_u32() as usize,
-            [data.ad_val],
-        );
-        tester.write(
-            data.d.as_canonical_u32() as usize,
-            data.c.as_canonical_u32() as usize,
-            [data.cd_val],
-        );
-    }
-    if data.is_load {
-        let data_pointer = get_data_pointer(data);
-        tester.write(
-            data.e.as_canonical_u32() as usize,
-            data_pointer.as_canonical_u32() as usize,
-            [data.data_val],
-        );
-    }
-    if data.is_hint {
-        for _ in 0..data.e.as_canonical_u32() {
-            chip.core
-                .streams
-                .get()
-                .unwrap()
-                .lock()
-                .unwrap()
-                .hint_stream
-                .push_back(data.data_val);
-        }
-    }
+//////////////////////////////////////////////////////////////////////////////////////
+// NEGATIVE TESTS
+//
+// Given a fake trace of a single operation, setup a chip and run the test. We replace
+// part of the trace and check that the chip throws the expected error.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#[derive(Clone, Copy, Default)]
+struct NativeLoadStorePrankValues<const NUM_CELLS: usize> {
+    // Core cols
+    pub data: Option<[F; NUM_CELLS]>,
+    pub opcode_flags: Option<[bool; 3]>,
+    pub pointer_read: Option<F>,
+    // Adapter cols
+    pub data_write_pointer: Option<F>,
 }
 
-fn check_values(tester: &mut VmChipTestBuilder<F>, data: &TestData) {
-    let data_pointer = get_data_pointer(data);
-
-    let written_data_val = if data.is_load {
-        tester.read::<1>(
-            data.d.as_canonical_u32() as usize,
-            data.a.as_canonical_u32() as usize,
-        )[0]
-    } else {
-        tester.read::<1>(
-            data.e.as_canonical_u32() as usize,
-            data_pointer.as_canonical_u32() as usize,
-        )[0]
-    };
+fn run_negative_native_loadstore_test<const NUM_CELLS: usize>(
+    opcode: NativeLoadStoreOpcode,
+    prank_vals: NativeLoadStorePrankValues<NUM_CELLS>,
+    error: VerificationError,
+) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip::<NUM_CELLS>(&tester);
+
+    set_and_execute(&mut tester, &mut harness, &mut rng, opcode);
 
-    let correct_data_val = if data.is_load || data.is_hint {
-        data.data_val
-    } else if data.d != F::ZERO {
-        data.ad_val
-    } else {
-        data.a
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+    let modify_trace = |trace: &mut DenseMatrix<F>| {
+        let mut values = trace.row_slice(0).to_vec();
+        let (adapter_row, core_row) = values.split_at_mut(adapter_width);
+        let adapter_cols: &mut NativeLoadStoreAdapterCols<F, NUM_CELLS> = adapter_row.borrow_mut();
+        let core_cols: &mut NativeLoadStoreCoreCols<F, NUM_CELLS> = core_row.borrow_mut();
+
+        if let Some(data) = prank_vals.data {
+            core_cols.data = data;
+        }
+        if let Some(pointer_read) = prank_vals.pointer_read {
+            core_cols.pointer_read = pointer_read;
+        }
+        if let Some(opcode_flags) = prank_vals.opcode_flags {
+            [
+                core_cols.is_loadw,
+                core_cols.is_storew,
+                core_cols.is_hint_storew,
+            ] = opcode_flags.map(F::from_bool);
+        }
+        if let Some(data_write_pointer) = prank_vals.data_write_pointer {
+            adapter_cols.data_write_pointer = data_write_pointer;
+        }
+
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
-    assert_eq!(written_data_val, correct_data_val, "{:?}", data);
+    disable_debug_builder();
+    let tester = tester
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .finalize();
+    tester.simple_test_with_expected_error(error);
 }
 
-fn set_and_execute(
-    tester: &mut VmChipTestBuilder<F>,
-    chip: &mut NativeLoadStoreChip<F, 1>,
-    rng: &mut StdRng,
-    opcode: NativeLoadStoreOpcode,
-) {
-    let data = gen_test_data(rng, opcode);
-    set_values(tester, chip, &data);
+#[test]
+fn negative_native_loadstore_tests() {
+    run_negative_native_loadstore_test::<1>(
+        STOREW,
+        NativeLoadStorePrankValues {
+            data_write_pointer: Some(F::ZERO),
+            ..Default::default()
+        },
+        VerificationError::OodEvaluationMismatch,
+    );
 
-    tester.execute_with_pc(
-        chip,
-        &Instruction::from_usize(
-            opcode.global_opcode(),
-            [data.a, data.b, data.c, data.d, data.e].map(|x| x.as_canonical_u32() as usize),
-        ),
-        0u32,
+    run_negative_native_loadstore_test::<1>(
+        LOADW,
+        NativeLoadStorePrankValues {
+            data_write_pointer: Some(F::ZERO),
+            ..Default::default()
+        },
+        VerificationError::OodEvaluationMismatch,
     );
+}
 
-    check_values(tester, &data);
+#[test]
+fn invalid_flags_native_loadstore_tests() {
+    run_negative_native_loadstore_test::<1>(
+        HINT_STOREW,
+        NativeLoadStorePrankValues {
+            opcode_flags: Some([false, false, false]),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
+
+    run_negative_native_loadstore_test::<1>(
+        LOADW,
+        NativeLoadStorePrankValues {
+            opcode_flags: Some([false, false, true]),
+            ..Default::default()
+        },
+        VerificationError::OodEvaluationMismatch,
+    );
 }
 
 #[test]
-fn rand_native_loadstore_test() {
-    setup_tracing();
-    let (mut rng, mut tester, mut chip) = setup();
-    for _ in 0..20 {
-        set_and_execute(&mut tester, &mut chip, &mut rng, STOREW);
-        set_and_execute(&mut tester, &mut chip, &mut rng, HINT_STOREW);
-        set_and_execute(&mut tester, &mut chip, &mut rng, LOADW);
-    }
-    let tester = tester.build().load(chip).finalize();
-    tester.simple_test().expect("Verification failed");
+fn invalid_data_native_loadstore_tests() {
+    run_negative_native_loadstore_test(
+        LOADW,
+        NativeLoadStorePrankValues {
+            data: Some([F::ZERO; 4]),
+            ..Default::default()
+        },
+        VerificationError::ChallengePhaseError,
+    );
 }
diff --git a/extensions/native/circuit/src/poseidon2/air.rs b/extensions/native/circuit/src/poseidon2/air.rs
index 5ed28abd60..adf2c09a62 100644
--- a/extensions/native/circuit/src/poseidon2/air.rs
+++ b/extensions/native/circuit/src/poseidon2/air.rs
@@ -7,10 +7,13 @@ use openvm_circuit::{
 use openvm_circuit_primitives::utils::not;
 use openvm_instructions::LocalOpcode;
 use openvm_native_compiler::{
+    conversion::AS,
     Poseidon2Opcode::{COMP_POS2, PERM_POS2},
     VerifyBatchOpcode::VERIFY_BATCH,
 };
-use openvm_poseidon2_air::{Poseidon2SubAir, BABY_BEAR_POSEIDON2_HALF_FULL_ROUNDS};
+use openvm_poseidon2_air::{
+    Poseidon2Config, Poseidon2SubAir, BABY_BEAR_POSEIDON2_HALF_FULL_ROUNDS,
+};
 use openvm_stark_backend::{
     air_builders::sub::SubAirBuilder,
     interaction::{BusIndex, InteractionBuilder, PermutationCheckBus},
@@ -20,15 +23,13 @@ use openvm_stark_backend::{
     rap::{BaseAirWithPublicValues, PartitionedBaseAir},
 };
 
-use crate::{
+use crate::poseidon2::{
     chip::{NUM_INITIAL_READS, NUM_SIMPLE_ACCESSES},
-    poseidon2::{
-        columns::{
-            InsideRowSpecificCols, NativePoseidon2Cols, SimplePoseidonSpecificCols,
-            TopLevelSpecificCols,
-        },
-        CHUNK,
+    columns::{
+        InsideRowSpecificCols, NativePoseidon2Cols, SimplePoseidonSpecificCols,
+        TopLevelSpecificCols,
     },
+    CHUNK,
 };
 
 #[derive(Clone, Debug)]
@@ -40,6 +41,23 @@ pub struct NativePoseidon2Air<F: Field, const SBOX_REGISTERS: usize> {
     pub(crate) address_space: F,
 }
 
+impl<F: Field, const SBOX_REGISTERS: usize> NativePoseidon2Air<F, SBOX_REGISTERS> {
+    pub fn new(
+        execution_bridge: ExecutionBridge,
+        memory_bridge: MemoryBridge,
+        verify_batch_bus: VerifyBatchBus,
+        poseidon2_config: Poseidon2Config<F>,
+    ) -> Self {
+        NativePoseidon2Air {
+            execution_bridge,
+            memory_bridge,
+            internal_bus: verify_batch_bus,
+            subair: Arc::new(Poseidon2SubAir::new(poseidon2_config.constants.into())),
+            address_space: F::from_canonical_u32(AS::Native as u32),
+        }
+    }
+}
+
 impl<F: Field, const SBOX_REGISTERS: usize> BaseAir<F> for NativePoseidon2Air<F, SBOX_REGISTERS> {
     fn width(&self) -> usize {
         NativePoseidon2Cols::<F, SBOX_REGISTERS>::width()
diff --git a/extensions/native/circuit/src/poseidon2/chip.rs b/extensions/native/circuit/src/poseidon2/chip.rs
index 426b089a9c..19b2a3cb71 100644
--- a/extensions/native/circuit/src/poseidon2/chip.rs
+++ b/extensions/native/circuit/src/poseidon2/chip.rs
@@ -1,10 +1,13 @@
-use std::sync::{Arc, Mutex};
+use std::borrow::{Borrow, BorrowMut};
 
 use openvm_circuit::{
-    arch::{
-        ExecutionBridge, ExecutionError, ExecutionState, InstructionExecutor, Streams, SystemPort,
+    arch::*,
+    system::{
+        memory::{offline_checker::MemoryBaseAuxCols, online::TracingMemory, MemoryAuxColsFactory},
+        native_adapter::util::{
+            memory_read_native, tracing_read_native, tracing_write_native_inplace,
+        },
     },
-    system::memory::{MemoryController, OfflineMemory, RecordId},
 };
 use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_native_compiler::{
@@ -12,178 +15,154 @@ use openvm_native_compiler::{
     Poseidon2Opcode::{COMP_POS2, PERM_POS2},
     VerifyBatchOpcode::VERIFY_BATCH,
 };
-use openvm_poseidon2_air::{Poseidon2Config, Poseidon2SubAir, Poseidon2SubChip};
+use openvm_poseidon2_air::{Poseidon2Config, Poseidon2SubChip, Poseidon2SubCols};
 use openvm_stark_backend::{
+    p3_air::BaseAir,
     p3_field::{Field, PrimeField32},
-    p3_maybe_rayon::prelude::{ParallelIterator, ParallelSlice},
+    p3_matrix::{dense::RowMajorMatrix, Matrix},
+    p3_maybe_rayon::prelude::{IntoParallelIterator, ParallelSliceMut, *},
 };
-use serde::{Deserialize, Serialize};
 
 use crate::poseidon2::{
-    air::{NativePoseidon2Air, VerifyBatchBus},
+    columns::{
+        InsideRowSpecificCols, NativePoseidon2Cols, SimplePoseidonSpecificCols,
+        TopLevelSpecificCols,
+    },
     CHUNK,
 };
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct VerifyBatchRecord<F: Field> {
-    pub from_state: ExecutionState<u32>,
-    pub instruction: Instruction<F>,
-
-    pub dim_base_pointer: F,
-    pub opened_base_pointer: F,
-    pub opened_length: usize,
-    pub index_base_pointer: F,
-    pub commit_pointer: F,
-
-    pub dim_base_pointer_read: RecordId,
-    pub opened_base_pointer_read: RecordId,
-    pub opened_length_read: RecordId,
-    pub index_base_pointer_read: RecordId,
-    pub commit_pointer_read: RecordId,
-
-    pub commit_read: RecordId,
-    pub initial_log_height: usize,
-    pub top_level: Vec<TopLevelRecord<F>>,
+#[derive(Clone)]
+pub struct NativePoseidon2Executor<F: Field, const SBOX_REGISTERS: usize> {
+    pub(super) subchip: Poseidon2SubChip<F, SBOX_REGISTERS>,
+    /// If true, `verify_batch` assumes the verification is always passed and skips poseidon2
+    /// computation during execution for performance.
+    optimistic: bool,
 }
 
-impl<F: PrimeField32> VerifyBatchRecord<F> {
-    pub fn opened_element_size_inv(&self) -> F {
-        self.instruction.g
-    }
+pub struct NativePoseidon2Filler<F: Field, const SBOX_REGISTERS: usize> {
+    // pre-computed Poseidon2 sub cols for dummy rows.
+    empty_poseidon2_sub_cols: Vec<F>,
+    pub(super) subchip: Poseidon2SubChip<F, SBOX_REGISTERS>,
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct TopLevelRecord<F: Field> {
-    // must be present in first record
-    pub incorporate_row: Option<IncorporateRowRecord<F>>,
-    // must be present in all bust last record
-    pub incorporate_sibling: Option<IncorporateSiblingRecord<F>>,
+impl<F: PrimeField32, const SBOX_REGISTERS: usize> NativePoseidon2Executor<F, SBOX_REGISTERS> {
+    pub fn new(poseidon2_config: Poseidon2Config<F>) -> Self {
+        let subchip = Poseidon2SubChip::new(poseidon2_config.constants);
+        Self {
+            subchip,
+            optimistic: true,
+        }
+    }
+    pub fn set_optimistic(&mut self, optimistic: bool) {
+        self.optimistic = optimistic;
+    }
 }
 
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct IncorporateSiblingRecord<F: Field> {
-    pub read_sibling_is_on_right: RecordId,
-    pub sibling_is_on_right: bool,
-    pub p2_input: [F; 2 * CHUNK],
+pub(crate) fn compress<F: PrimeField32, const SBOX_REGISTERS: usize>(
+    subchip: &Poseidon2SubChip<F, SBOX_REGISTERS>,
+    left: [F; CHUNK],
+    right: [F; CHUNK],
+) -> ([F; 2 * CHUNK], [F; CHUNK]) {
+    let concatenated = std::array::from_fn(|i| if i < CHUNK { left[i] } else { right[i - CHUNK] });
+    let permuted = subchip.permute(concatenated);
+    (concatenated, std::array::from_fn(|i| permuted[i]))
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct IncorporateRowRecord<F: Field> {
-    pub chunks: Vec<InsideRowRecord<F>>,
-    pub initial_opened_index: usize,
-    pub final_opened_index: usize,
-    pub initial_height_read: RecordId,
-    pub final_height_read: RecordId,
-    pub p2_input: [F; 2 * CHUNK],
+impl<F: PrimeField32, const SBOX_REGISTERS: usize> NativePoseidon2Filler<F, SBOX_REGISTERS> {
+    pub fn new(poseidon2_config: Poseidon2Config<F>) -> Self {
+        let subchip = Poseidon2SubChip::new(poseidon2_config.constants);
+        let empty_poseidon2_sub_cols = subchip.generate_trace(vec![[F::ZERO; CHUNK * 2]]).values;
+        Self {
+            empty_poseidon2_sub_cols,
+            subchip,
+        }
+    }
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct InsideRowRecord<F: Field> {
-    pub cells: Vec<CellRecord>,
-    pub p2_input: [F; 2 * CHUNK],
-}
+pub(super) const NUM_INITIAL_READS: usize = 6;
+pub(super) const NUM_SIMPLE_ACCESSES: u32 = 7;
 
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct CellRecord {
-    pub read: RecordId,
-    pub opened_index: usize,
-    pub read_row_pointer_and_length: Option<RecordId>,
-    pub row_pointer: usize,
-    pub row_end: usize,
+#[derive(Debug, Clone, Default)]
+pub struct NativePoseidon2Metadata {
+    num_rows: usize,
 }
 
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct SimplePoseidonRecord<F: Field> {
-    pub from_state: ExecutionState<u32>,
-    pub instruction: Instruction<F>,
-
-    pub read_input_pointer_1: RecordId,
-    pub read_input_pointer_2: Option<RecordId>,
-    pub read_output_pointer: RecordId,
-    pub read_data_1: RecordId,
-    pub read_data_2: RecordId,
-    pub write_data_1: RecordId,
-    pub write_data_2: Option<RecordId>,
-
-    pub input_pointer_1: F,
-    pub input_pointer_2: F,
-    pub output_pointer: F,
-    pub p2_input: [F; 2 * CHUNK],
+impl MultiRowMetadata for NativePoseidon2Metadata {
+    #[inline(always)]
+    fn get_num_rows(&self) -> usize {
+        self.num_rows
+    }
 }
 
-#[derive(Debug, Clone, Serialize, Deserialize, Default)]
-#[serde(bound = "F: Field")]
-pub struct NativePoseidon2RecordSet<F: Field> {
-    pub verify_batch_records: Vec<VerifyBatchRecord<F>>,
-    pub simple_permute_records: Vec<SimplePoseidonRecord<F>>,
-}
+type NativePoseidon2RecordLayout = MultiRowLayout<NativePoseidon2Metadata>;
 
-pub struct NativePoseidon2Chip<F: Field, const SBOX_REGISTERS: usize> {
-    pub(super) air: NativePoseidon2Air<F, SBOX_REGISTERS>,
-    pub record_set: NativePoseidon2RecordSet<F>,
-    pub height: usize,
-    pub(super) offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    pub(super) subchip: Poseidon2SubChip<F, SBOX_REGISTERS>,
-    pub(super) streams: Arc<Mutex<Streams<F>>>,
-}
+pub struct NativePoseidon2RecordMut<'a, F, const SBOX_REGISTERS: usize>(
+    &'a mut [NativePoseidon2Cols<F, SBOX_REGISTERS>],
+);
 
-impl<F: PrimeField32, const SBOX_REGISTERS: usize> NativePoseidon2Chip<F, SBOX_REGISTERS> {
-    pub fn new(
-        port: SystemPort,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-        poseidon2_config: Poseidon2Config<F>,
-        verify_batch_bus: VerifyBatchBus,
-        streams: Arc<Mutex<Streams<F>>>,
-    ) -> Self {
-        let air = NativePoseidon2Air {
-            execution_bridge: ExecutionBridge::new(port.execution_bus, port.program_bus),
-            memory_bridge: port.memory_bridge,
-            internal_bus: verify_batch_bus,
-            subair: Arc::new(Poseidon2SubAir::new(poseidon2_config.constants.into())),
-            address_space: F::from_canonical_u32(AS::Native as u32),
+impl<'a, F: PrimeField32, const SBOX_REGISTERS: usize>
+    CustomBorrow<'a, NativePoseidon2RecordMut<'a, F, SBOX_REGISTERS>, NativePoseidon2RecordLayout>
+    for [u8]
+{
+    fn custom_borrow(
+        &'a mut self,
+        layout: NativePoseidon2RecordLayout,
+    ) -> NativePoseidon2RecordMut<'a, F, SBOX_REGISTERS> {
+        let arr = unsafe {
+            self.align_to_mut::<NativePoseidon2Cols<F, SBOX_REGISTERS>>()
+                .1
         };
-        Self {
-            record_set: Default::default(),
-            air,
-            height: 0,
-            offline_memory,
-            subchip: Poseidon2SubChip::new(poseidon2_config.constants),
-            streams,
-        }
+        NativePoseidon2RecordMut(&mut arr[..layout.metadata.num_rows])
     }
 
-    fn compress(&self, left: [F; CHUNK], right: [F; CHUNK]) -> ([F; 2 * CHUNK], [F; CHUNK]) {
-        let concatenated =
-            std::array::from_fn(|i| if i < CHUNK { left[i] } else { right[i - CHUNK] });
-        let permuted = self.subchip.permute(concatenated);
-        (concatenated, std::array::from_fn(|i| permuted[i]))
+    unsafe fn extract_layout(&self) -> NativePoseidon2RecordLayout {
+        // Each instruction record consists solely of some number of contiguously
+        // stored NativePoseidon2Cols<...> structs, each of which corresponds to a
+        // single trace row. Trace fillers don't actually need to know how many rows
+        // each instruction uses, and can thus treat each NativePoseidon2Cols<...>
+        // as a single record.
+        NativePoseidon2RecordLayout {
+            metadata: NativePoseidon2Metadata { num_rows: 1 },
+        }
     }
 }
 
-pub(super) const NUM_INITIAL_READS: usize = 6;
-pub(super) const NUM_SIMPLE_ACCESSES: u32 = 7;
+impl<F: PrimeField32, const SBOX_REGISTERS: usize> SizedRecord<NativePoseidon2RecordLayout>
+    for NativePoseidon2RecordMut<'_, F, SBOX_REGISTERS>
+{
+    fn size(layout: &NativePoseidon2RecordLayout) -> usize {
+        layout.metadata.num_rows * size_of::<NativePoseidon2Cols<F, SBOX_REGISTERS>>()
+    }
 
-impl<F: PrimeField32, const SBOX_REGISTERS: usize> InstructionExecutor<F>
-    for NativePoseidon2Chip<F, SBOX_REGISTERS>
+    fn alignment(_layout: &NativePoseidon2RecordLayout) -> usize {
+        align_of::<NativePoseidon2Cols<F, SBOX_REGISTERS>>()
+    }
+}
+
+impl<F: PrimeField32, RA, const SBOX_REGISTERS: usize> PreflightExecutor<F, RA>
+    for NativePoseidon2Executor<F, SBOX_REGISTERS>
+where
+    for<'buf> RA: RecordArena<
+        'buf,
+        MultiRowLayout<NativePoseidon2Metadata>,
+        NativePoseidon2RecordMut<'buf, F, SBOX_REGISTERS>,
+    >,
 {
     fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>, ExecutionError> {
+    ) -> Result<(), ExecutionError> {
+        let arena = state.ctx;
+        let init_timestamp_u32 = state.memory.timestamp;
         if instruction.opcode == PERM_POS2.global_opcode()
             || instruction.opcode == COMP_POS2.global_opcode()
         {
+            let cols = &mut arena
+                .alloc(MultiRowLayout::new(NativePoseidon2Metadata { num_rows: 1 }))
+                .0[0];
+            let simple_cols: &mut SimplePoseidonSpecificCols<F> =
+                cols.specific[..SimplePoseidonSpecificCols::<u8>::width()].borrow_mut();
             let &Instruction {
                 a: output_register,
                 b: input_register_1,
@@ -192,22 +171,45 @@ impl<F: PrimeField32, const SBOX_REGISTERS: usize> InstructionExecutor<F>
                 e: data_address_space,
                 ..
             } = instruction;
+            debug_assert_eq!(
+                register_address_space,
+                F::from_canonical_u32(AS::Native as u32)
+            );
+            debug_assert_eq!(data_address_space, F::from_canonical_u32(AS::Native as u32));
+            let [output_pointer]: [F; 1] = tracing_read_native_helper(
+                state.memory,
+                output_register.as_canonical_u32(),
+                simple_cols.read_output_pointer.as_mut(),
+            );
+            let output_pointer_u32 = output_pointer.as_canonical_u32();
+            let [input_pointer_1]: [F; 1] = tracing_read_native_helper(
+                state.memory,
+                input_register_1.as_canonical_u32(),
+                simple_cols.read_input_pointer_1.as_mut(),
+            );
+            let input_pointer_1_u32 = input_pointer_1.as_canonical_u32();
+            let [input_pointer_2]: [F; 1] = if instruction.opcode == PERM_POS2.global_opcode() {
+                state.memory.increment_timestamp();
+                [input_pointer_1 + F::from_canonical_usize(CHUNK)]
+            } else {
+                tracing_read_native_helper(
+                    state.memory,
+                    input_register_2.as_canonical_u32(),
+                    simple_cols.read_input_pointer_2.as_mut(),
+                )
+            };
+            let input_pointer_2_u32 = input_pointer_2.as_canonical_u32();
+            let data_1: [F; CHUNK] = tracing_read_native_helper(
+                state.memory,
+                input_pointer_1_u32,
+                simple_cols.read_data_1.as_mut(),
+            );
+            let data_2: [F; CHUNK] = tracing_read_native_helper(
+                state.memory,
+                input_pointer_2_u32,
+                simple_cols.read_data_2.as_mut(),
+            );
 
-            let (read_output_pointer, output_pointer) =
-                memory.read_cell(register_address_space, output_register);
-            let (read_input_pointer_1, input_pointer_1) =
-                memory.read_cell(register_address_space, input_register_1);
-            let (read_input_pointer_2, input_pointer_2) =
-                if instruction.opcode == PERM_POS2.global_opcode() {
-                    memory.increment_timestamp();
-                    (None, input_pointer_1 + F::from_canonical_usize(CHUNK))
-                } else {
-                    let (read_input_pointer_2, input_pointer_2) =
-                        memory.read_cell(register_address_space, input_register_2);
-                    (Some(read_input_pointer_2), input_pointer_2)
-                };
-            let (read_data_1, data_1) = memory.read::<CHUNK>(data_address_space, input_pointer_1);
-            let (read_data_2, data_2) = memory.read::<CHUNK>(data_address_space, input_pointer_2);
             let p2_input = std::array::from_fn(|i| {
                 if i < CHUNK {
                     data_1[i]
@@ -216,50 +218,51 @@ impl<F: PrimeField32, const SBOX_REGISTERS: usize> InstructionExecutor<F>
                 }
             });
             let output = self.subchip.permute(p2_input);
-            let (write_data_1, _) = memory.write::<CHUNK>(
-                data_address_space,
-                output_pointer,
+            tracing_write_native_inplace(
+                state.memory,
+                output_pointer_u32,
                 std::array::from_fn(|i| output[i]),
+                &mut simple_cols.write_data_1,
             );
-            let write_data_2 = if instruction.opcode == PERM_POS2.global_opcode() {
-                Some(
-                    memory
-                        .write::<CHUNK>(
-                            data_address_space,
-                            output_pointer + F::from_canonical_usize(CHUNK),
-                            std::array::from_fn(|i| output[CHUNK + i]),
-                        )
-                        .0,
-                )
+            if instruction.opcode == PERM_POS2.global_opcode() {
+                tracing_write_native_inplace(
+                    state.memory,
+                    output_pointer_u32 + CHUNK as u32,
+                    std::array::from_fn(|i| output[i + CHUNK]),
+                    &mut simple_cols.write_data_2,
+                );
             } else {
-                memory.increment_timestamp();
-                None
-            };
-
-            assert_eq!(
-                memory.timestamp(),
-                from_state.timestamp + NUM_SIMPLE_ACCESSES
+                state.memory.increment_timestamp();
+            }
+            debug_assert_eq!(
+                state.memory.timestamp,
+                init_timestamp_u32 + NUM_SIMPLE_ACCESSES
             );
-
-            self.record_set
-                .simple_permute_records
-                .push(SimplePoseidonRecord {
-                    from_state,
-                    instruction: instruction.clone(),
-                    read_input_pointer_1,
-                    read_input_pointer_2,
-                    read_output_pointer,
-                    read_data_1,
-                    read_data_2,
-                    write_data_1,
-                    write_data_2,
-                    input_pointer_1,
-                    input_pointer_2,
-                    output_pointer,
-                    p2_input,
-                });
-            self.height += 1;
+            cols.incorporate_row = F::ZERO;
+            cols.incorporate_sibling = F::ZERO;
+            cols.inside_row = F::ZERO;
+            cols.simple = F::ONE;
+            cols.end_inside_row = F::ZERO;
+            cols.end_top_level = F::ZERO;
+            cols.is_exhausted = [F::ZERO; CHUNK - 1];
+            cols.start_timestamp = F::from_canonical_u32(init_timestamp_u32);
+
+            cols.inner.inputs = p2_input;
+            simple_cols.pc = F::from_canonical_u32(*state.pc);
+            simple_cols.is_compress = F::from_bool(instruction.opcode == COMP_POS2.global_opcode());
+            simple_cols.output_register = output_register;
+            simple_cols.input_register_1 = input_register_1;
+            simple_cols.input_register_2 = input_register_2;
+            simple_cols.output_pointer = output_pointer;
+            simple_cols.input_pointer_1 = input_pointer_1;
+            simple_cols.input_pointer_2 = input_pointer_2;
         } else if instruction.opcode == VERIFY_BATCH.global_opcode() {
+            let init_timestamp = F::from_canonical_u32(init_timestamp_u32);
+            let mut col_buffer = vec![F::ZERO; NativePoseidon2Cols::<F, SBOX_REGISTERS>::width()];
+            let last_top_level_cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> =
+                col_buffer.as_mut_slice().borrow_mut();
+            let ltl_specific_cols: &mut TopLevelSpecificCols<F> =
+                last_top_level_cols.specific[..TopLevelSpecificCols::<u8>::width()].borrow_mut();
             let &Instruction {
                 a: dim_register,
                 b: opened_register,
@@ -270,228 +273,379 @@ impl<F: PrimeField32, const SBOX_REGISTERS: usize> InstructionExecutor<F>
                 g: opened_element_size_inv,
                 ..
             } = instruction;
-            let address_space = self.air.address_space;
             // calc inverse fast assuming opened_element_size in {1, 4}
             let mut opened_element_size = F::ONE;
             while opened_element_size * opened_element_size_inv != F::ONE {
                 opened_element_size += F::ONE;
             }
 
-            let proof_id = memory.unsafe_read_cell(address_space, proof_id_ptr);
-            let (dim_base_pointer_read, dim_base_pointer) =
-                memory.read_cell(address_space, dim_register);
-            let (opened_base_pointer_read, opened_base_pointer) =
-                memory.read_cell(address_space, opened_register);
-            let (opened_length_read, opened_length) =
-                memory.read_cell(address_space, opened_length_register);
-            let (index_base_pointer_read, index_base_pointer) =
-                memory.read_cell(address_space, index_register);
-            let (commit_pointer_read, commit_pointer) =
-                memory.read_cell(address_space, commit_register);
-            let (commit_read, commit) = memory.read(address_space, commit_pointer);
+            let [proof_id]: [F; 1] =
+                memory_read_native(state.memory.data(), proof_id_ptr.as_canonical_u32());
+            let [dim_base_pointer]: [F; 1] = tracing_read_native_helper(
+                state.memory,
+                dim_register.as_canonical_u32(),
+                ltl_specific_cols.dim_base_pointer_read.as_mut(),
+            );
+            let dim_base_pointer_u32 = dim_base_pointer.as_canonical_u32();
+            let [opened_base_pointer]: [F; 1] = tracing_read_native_helper(
+                state.memory,
+                opened_register.as_canonical_u32(),
+                ltl_specific_cols.opened_base_pointer_read.as_mut(),
+            );
+            let opened_base_pointer_u32 = opened_base_pointer.as_canonical_u32();
+            let [opened_length]: [F; 1] = tracing_read_native_helper(
+                state.memory,
+                opened_length_register.as_canonical_u32(),
+                ltl_specific_cols.opened_length_read.as_mut(),
+            );
+            let [index_base_pointer]: [F; 1] = tracing_read_native_helper(
+                state.memory,
+                index_register.as_canonical_u32(),
+                ltl_specific_cols.index_base_pointer_read.as_mut(),
+            );
+            let index_base_pointer_u32 = index_base_pointer.as_canonical_u32();
+            let [commit_pointer]: [F; 1] = tracing_read_native_helper(
+                state.memory,
+                commit_register.as_canonical_u32(),
+                ltl_specific_cols.commit_pointer_read.as_mut(),
+            );
+            // In E3, the proof is assumed to be valid. The verification during execution is
+            // skipped.
+            let commit: [F; CHUNK] = tracing_read_native_helper(
+                state.memory,
+                commit_pointer.as_canonical_u32(),
+                ltl_specific_cols.commit_read.as_mut(),
+            );
 
             let opened_length = opened_length.as_canonical_u32() as usize;
-
-            let initial_log_height = memory
-                .unsafe_read_cell(address_space, dim_base_pointer)
-                .as_canonical_u32();
-            let mut log_height = initial_log_height as i32;
-            let mut sibling_index = 0;
+            let [initial_log_height]: [F; 1] =
+                memory_read_native(state.memory.data(), dim_base_pointer_u32);
+            let initial_log_height_u32 = initial_log_height.as_canonical_u32();
+            let mut log_height = initial_log_height_u32 as i32;
+
+            // Number of non-inside rows, this is used to compute the offset of the inside row
+            // section.
+            let (num_inside_rows, num_non_inside_rows) = {
+                let opened_element_size_u32 = opened_element_size.as_canonical_u32();
+                let mut num_non_inside_rows = initial_log_height_u32 as usize;
+                let mut num_inside_rows = 0;
+                let mut log_height = initial_log_height_u32;
+                let mut opened_index = 0;
+                loop {
+                    let mut total_len = 0;
+                    while opened_index < opened_length {
+                        let [height]: [F; 1] = memory_read_native(
+                            state.memory.data(),
+                            dim_base_pointer_u32 + opened_index as u32,
+                        );
+                        if height.as_canonical_u32() != log_height {
+                            break;
+                        }
+                        let [row_len]: [F; 1] = memory_read_native(
+                            state.memory.data(),
+                            opened_base_pointer_u32 + 2 * opened_index as u32 + 1,
+                        );
+                        total_len += row_len.as_canonical_u32() * opened_element_size_u32;
+                        opened_index += 1;
+                    }
+                    if total_len != 0 {
+                        num_non_inside_rows += 1;
+                        num_inside_rows += (total_len as usize).div_ceil(CHUNK);
+                    }
+                    if log_height == 0 {
+                        break;
+                    }
+                    log_height -= 1;
+                }
+                (num_inside_rows, num_non_inside_rows)
+            };
+            let mut proof_index = 0;
             let mut opened_index = 0;
-            let mut top_level = vec![];
 
             let mut root = [F::ZERO; CHUNK];
             let sibling_proof: Vec<[F; CHUNK]> = {
-                let streams = self.streams.lock().unwrap();
                 let proof_idx = proof_id.as_canonical_u32() as usize;
-                streams.hint_space[proof_idx]
+                state.streams.hint_space[proof_idx]
                     .par_chunks(CHUNK)
                     .map(|c| c.try_into().unwrap())
                     .collect()
             };
 
+            let total_num_row = num_inside_rows + num_non_inside_rows;
+            let allocated_rows = arena
+                .alloc(MultiRowLayout::new(NativePoseidon2Metadata {
+                    num_rows: total_num_row,
+                }))
+                .0;
+            allocated_rows[0].inner.export = F::from_canonical_u32(num_non_inside_rows as u32);
+            let mut inside_row_idx = num_non_inside_rows;
+            let mut non_inside_row_idx = 0;
+
             while log_height >= 0 {
-                let incorporate_row = if opened_index < opened_length
-                    && memory.unsafe_read_cell(
-                        address_space,
-                        dim_base_pointer + F::from_canonical_usize(opened_index),
-                    ) == F::from_canonical_u32(log_height as u32)
+                if opened_index < opened_length
+                    && memory_read_native::<F, 1>(
+                        state.memory.data(),
+                        dim_base_pointer_u32 + opened_index as u32,
+                    )[0] == F::from_canonical_u32(log_height as u32)
                 {
+                    state
+                        .memory
+                        .increment_timestamp_by(NUM_INITIAL_READS as u32);
+                    let incorporate_start_timestamp = state.memory.timestamp;
                     let initial_opened_index = opened_index;
-                    for _ in 0..NUM_INITIAL_READS {
-                        memory.increment_timestamp();
-                    }
-                    let mut chunks = vec![];
-
                     let mut row_pointer = 0;
                     let mut row_end = 0;
-
-                    let mut prev_rolling_hash: Option<[F; 2 * CHUNK]> = None;
                     let mut rolling_hash = [F::ZERO; 2 * CHUNK];
-
                     let mut is_first_in_segment = true;
 
                     loop {
-                        let mut cells = vec![];
+                        if inside_row_idx == total_num_row {
+                            opened_index += 1;
+                            break;
+                        }
+                        let inside_cols = &mut allocated_rows[inside_row_idx];
+                        let inside_specific_cols: &mut InsideRowSpecificCols<F> = inside_cols
+                            .specific[..InsideRowSpecificCols::<u8>::width()]
+                            .borrow_mut();
+                        let start_timestamp_u32 = state.memory.timestamp;
+
+                        let mut cells_idx = 0;
                         for chunk_elem in rolling_hash.iter_mut().take(CHUNK) {
-                            let read_row_pointer_and_length = if is_first_in_segment
-                                || row_pointer == row_end
-                            {
+                            let cell_cols = &mut inside_specific_cols.cells[cells_idx];
+                            if is_first_in_segment || row_pointer == row_end {
                                 if is_first_in_segment {
                                     is_first_in_segment = false;
                                 } else {
                                     opened_index += 1;
                                     if opened_index == opened_length
-                                        || memory.unsafe_read_cell(
-                                            address_space,
-                                            dim_base_pointer
-                                                + F::from_canonical_usize(opened_index),
-                                        ) != F::from_canonical_u32(log_height as u32)
+                                        || memory_read_native::<F, 1>(
+                                            state.memory.data(),
+                                            dim_base_pointer_u32 + opened_index as u32,
+                                        )[0] != F::from_canonical_u32(log_height as u32)
                                     {
                                         break;
                                     }
                                 }
-                                let (result, [new_row_pointer, row_len]) = memory.read(
-                                    address_space,
-                                    opened_base_pointer + F::from_canonical_usize(2 * opened_index),
+                                let [new_row_pointer, row_len]: [F; 2] = tracing_read_native_helper(
+                                    state.memory,
+                                    opened_base_pointer_u32 + 2 * opened_index as u32,
+                                    cell_cols.read_row_pointer_and_length.as_mut(),
                                 );
                                 row_pointer = new_row_pointer.as_canonical_u32() as usize;
                                 row_end = row_pointer
                                     + (opened_element_size * row_len).as_canonical_u32() as usize;
-                                Some(result)
+                                cell_cols.is_first_in_row = F::ONE;
                             } else {
-                                memory.increment_timestamp();
-                                None
-                            };
-                            let (read, value) = memory
-                                .read_cell(address_space, F::from_canonical_usize(row_pointer));
-                            cells.push(CellRecord {
-                                read,
-                                opened_index,
-                                read_row_pointer_and_length,
-                                row_pointer,
-                                row_end,
-                            });
+                                state.memory.increment_timestamp();
+                            }
+                            let [value]: [F; 1] = tracing_read_native_helper(
+                                state.memory,
+                                row_pointer as u32,
+                                cell_cols.read.as_mut(),
+                            );
+
+                            cell_cols.opened_index = F::from_canonical_usize(opened_index);
+                            cell_cols.row_pointer = F::from_canonical_usize(row_pointer);
+                            cell_cols.row_end = F::from_canonical_usize(row_end);
+
                             *chunk_elem = value;
                             row_pointer += 1;
+                            cells_idx += 1;
                         }
-                        if cells.is_empty() {
+                        if cells_idx == 0 {
                             break;
                         }
-                        let cells_len = cells.len();
-                        chunks.push(InsideRowRecord {
-                            cells,
-                            p2_input: rolling_hash,
-                        });
-                        self.height += 1;
-                        prev_rolling_hash = Some(rolling_hash);
-                        self.subchip.permute_mut(&mut rolling_hash);
-                        if cells_len < CHUNK {
-                            for _ in 0..CHUNK - cells_len {
-                                memory.increment_timestamp();
-                                memory.increment_timestamp();
+                        inside_cols.inner.inputs[..CHUNK].copy_from_slice(&rolling_hash[..CHUNK]);
+                        if !self.optimistic {
+                            self.subchip.permute_mut(&mut rolling_hash);
+                        }
+                        if cells_idx < CHUNK {
+                            state
+                                .memory
+                                .increment_timestamp_by(2 * (CHUNK - cells_idx) as u32);
+                        }
+
+                        inside_row_idx += 1;
+                        // left
+                        inside_cols.incorporate_row = F::ZERO;
+                        inside_cols.incorporate_sibling = F::ZERO;
+                        inside_cols.inside_row = F::ONE;
+                        inside_cols.simple = F::ZERO;
+                        // `end_inside_row` of the last row will be set to 1 after this loop.
+                        inside_cols.end_inside_row = F::ZERO;
+                        inside_cols.end_top_level = F::ZERO;
+                        inside_cols.opened_element_size_inv = opened_element_size_inv;
+                        inside_cols.very_first_timestamp =
+                            F::from_canonical_u32(incorporate_start_timestamp);
+                        inside_cols.start_timestamp = F::from_canonical_u32(start_timestamp_u32);
+
+                        inside_cols.initial_opened_index =
+                            F::from_canonical_usize(initial_opened_index);
+                        inside_cols.opened_base_pointer = opened_base_pointer;
+                        if cells_idx < CHUNK {
+                            let exhausted_opened_idx = F::from_canonical_usize(opened_index - 1);
+                            for exhausted_idx in cells_idx..CHUNK {
+                                inside_cols.is_exhausted[exhausted_idx - 1] = F::ONE;
+                                inside_specific_cols.cells[exhausted_idx].opened_index =
+                                    exhausted_opened_idx;
                             }
                             break;
                         }
                     }
+                    {
+                        let inside_cols = &mut allocated_rows[inside_row_idx - 1];
+                        inside_cols.end_inside_row = F::ONE;
+                    }
+
+                    let incorporate_cols = &mut allocated_rows[non_inside_row_idx];
+                    let top_level_specific_cols: &mut TopLevelSpecificCols<F> = incorporate_cols
+                        .specific[..TopLevelSpecificCols::<u8>::width()]
+                        .borrow_mut();
+
                     let final_opened_index = opened_index - 1;
-                    let (initial_height_read, height_check) = memory.read_cell(
-                        address_space,
-                        dim_base_pointer + F::from_canonical_usize(initial_opened_index),
+                    let [height_check]: [F; 1] = tracing_read_native_helper(
+                        state.memory,
+                        dim_base_pointer_u32 + initial_opened_index as u32,
+                        top_level_specific_cols
+                            .read_initial_height_or_sibling_is_on_right
+                            .as_mut(),
                     );
                     assert_eq!(height_check, F::from_canonical_u32(log_height as u32));
-                    let (final_height_read, height_check) = memory.read_cell(
-                        address_space,
-                        dim_base_pointer + F::from_canonical_usize(final_opened_index),
+                    let final_height_read_timestamp = state.memory.timestamp;
+                    let [height_check]: [F; 1] = tracing_read_native_helper(
+                        state.memory,
+                        dim_base_pointer_u32 + final_opened_index as u32,
+                        top_level_specific_cols.read_final_height.as_mut(),
                     );
                     assert_eq!(height_check, F::from_canonical_u32(log_height as u32));
 
-                    let hash: [F; CHUNK] = std::array::from_fn(|i| rolling_hash[i]);
-
-                    let (p2_input, new_root) = if log_height as u32 == initial_log_height {
-                        (prev_rolling_hash.unwrap(), hash)
-                    } else {
-                        self.compress(root, hash)
-                    };
-                    root = new_root;
-
-                    self.height += 1;
-                    Some(IncorporateRowRecord {
-                        chunks,
-                        initial_opened_index,
-                        final_opened_index,
-                        initial_height_read,
-                        final_height_read,
-                        p2_input,
-                    })
-                } else {
-                    None
-                };
-
-                let incorporate_sibling = if log_height == 0 {
-                    None
-                } else {
-                    for _ in 0..NUM_INITIAL_READS {
-                        memory.increment_timestamp();
+                    if !self.optimistic {
+                        let hash: [F; CHUNK] = std::array::from_fn(|i| rolling_hash[i]);
+                        root = if log_height as u32 == initial_log_height_u32 {
+                            hash
+                        } else {
+                            compress(&self.subchip, root, hash).1
+                        };
                     }
+                    non_inside_row_idx += 1;
+
+                    incorporate_cols.incorporate_row = F::ONE;
+                    incorporate_cols.incorporate_sibling = F::ZERO;
+                    incorporate_cols.inside_row = F::ZERO;
+                    incorporate_cols.simple = F::ZERO;
+                    incorporate_cols.end_inside_row = F::ZERO;
+                    incorporate_cols.end_top_level = F::ZERO;
+                    incorporate_cols.start_top_level = F::from_bool(proof_index == 0);
+                    incorporate_cols.opened_element_size_inv = opened_element_size_inv;
+                    incorporate_cols.very_first_timestamp = init_timestamp;
+                    incorporate_cols.start_timestamp = F::from_canonical_u32(
+                        incorporate_start_timestamp - NUM_INITIAL_READS as u32,
+                    );
+                    top_level_specific_cols.end_timestamp =
+                        F::from_canonical_u32(final_height_read_timestamp + 1);
+
+                    incorporate_cols.initial_opened_index =
+                        F::from_canonical_usize(initial_opened_index);
+                    top_level_specific_cols.final_opened_index =
+                        F::from_canonical_usize(final_opened_index);
+                    top_level_specific_cols.log_height = F::from_canonical_u32(log_height as u32);
+                    top_level_specific_cols.opened_length = F::from_canonical_usize(opened_length);
+                    top_level_specific_cols.dim_base_pointer = dim_base_pointer;
+                    incorporate_cols.opened_base_pointer = opened_base_pointer;
+                    top_level_specific_cols.index_base_pointer = index_base_pointer;
+                    top_level_specific_cols.proof_index = F::from_canonical_usize(proof_index);
+                }
 
-                    let (read_sibling_is_on_right, sibling_is_on_right) = memory.read_cell(
-                        address_space,
-                        index_base_pointer + F::from_canonical_usize(sibling_index),
+                if log_height != 0 {
+                    let row_start_timestamp = state.memory.timestamp;
+                    state
+                        .memory
+                        .increment_timestamp_by(NUM_INITIAL_READS as u32);
+
+                    let sibling_cols = &mut allocated_rows[non_inside_row_idx];
+                    let top_level_specific_cols: &mut TopLevelSpecificCols<F> =
+                        sibling_cols.specific[..TopLevelSpecificCols::<u8>::width()].borrow_mut();
+
+                    let read_sibling_is_on_right_timestamp = state.memory.timestamp;
+                    let [sibling_is_on_right]: [F; 1] = tracing_read_native_helper(
+                        state.memory,
+                        index_base_pointer_u32 + proof_index as u32,
+                        top_level_specific_cols
+                            .read_initial_height_or_sibling_is_on_right
+                            .as_mut(),
                     );
-                    let sibling_is_on_right = sibling_is_on_right == F::ONE;
-                    let sibling = sibling_proof[sibling_index];
-                    let (p2_input, new_root) = if sibling_is_on_right {
-                        self.compress(sibling, root)
-                    } else {
-                        self.compress(root, sibling)
-                    };
-                    root = new_root;
-
-                    self.height += 1;
-                    Some(IncorporateSiblingRecord {
-                        read_sibling_is_on_right,
-                        sibling_is_on_right,
-                        p2_input,
-                    })
-                };
+                    let sibling = sibling_proof[proof_index];
+                    if !self.optimistic {
+                        root = if sibling_is_on_right == F::ONE {
+                            compress(&self.subchip, sibling, root).1
+                        } else {
+                            compress(&self.subchip, root, sibling).1
+                        };
+                    }
 
-                top_level.push(TopLevelRecord {
-                    incorporate_row,
-                    incorporate_sibling,
-                });
+                    non_inside_row_idx += 1;
+
+                    sibling_cols.inner.inputs[..CHUNK].copy_from_slice(&sibling);
+
+                    sibling_cols.incorporate_row = F::ZERO;
+                    sibling_cols.incorporate_sibling = F::ONE;
+                    sibling_cols.inside_row = F::ZERO;
+                    sibling_cols.simple = F::ZERO;
+                    sibling_cols.end_inside_row = F::ZERO;
+                    sibling_cols.end_top_level = F::ZERO;
+                    sibling_cols.start_top_level = F::ZERO;
+                    sibling_cols.opened_element_size_inv = opened_element_size_inv;
+                    sibling_cols.very_first_timestamp = init_timestamp;
+                    sibling_cols.start_timestamp = F::from_canonical_u32(row_start_timestamp);
+
+                    top_level_specific_cols.end_timestamp =
+                        F::from_canonical_u32(read_sibling_is_on_right_timestamp + 1);
+                    sibling_cols.initial_opened_index = F::from_canonical_usize(opened_index);
+                    top_level_specific_cols.final_opened_index =
+                        F::from_canonical_usize(opened_index - 1);
+                    top_level_specific_cols.log_height = F::from_canonical_u32(log_height as u32);
+                    top_level_specific_cols.opened_length = F::from_canonical_usize(opened_length);
+                    top_level_specific_cols.dim_base_pointer = dim_base_pointer;
+                    sibling_cols.opened_base_pointer = opened_base_pointer;
+                    top_level_specific_cols.index_base_pointer = index_base_pointer;
+
+                    top_level_specific_cols.proof_index = F::from_canonical_usize(proof_index);
+                    top_level_specific_cols.sibling_is_on_right = sibling_is_on_right;
+                };
 
                 log_height -= 1;
-                sibling_index += 1;
+                proof_index += 1;
+            }
+            let ltl_trace_cols = &mut allocated_rows[non_inside_row_idx - 1];
+            let ltl_trace_specific_cols: &mut TopLevelSpecificCols<F> =
+                ltl_trace_cols.specific[..TopLevelSpecificCols::<u8>::width()].borrow_mut();
+            ltl_trace_cols.inner.export = F::from_canonical_u32(total_num_row as u32);
+            ltl_trace_cols.end_top_level = F::ONE;
+            ltl_trace_specific_cols.pc = F::from_canonical_u32(*state.pc);
+            ltl_trace_specific_cols.dim_register = dim_register;
+            ltl_trace_specific_cols.opened_register = opened_register;
+            ltl_trace_specific_cols.opened_length_register = opened_length_register;
+            ltl_trace_specific_cols.proof_id = proof_id_ptr;
+            ltl_trace_specific_cols.index_register = index_register;
+            ltl_trace_specific_cols.commit_register = commit_register;
+            ltl_trace_specific_cols.commit_pointer = commit_pointer;
+            ltl_trace_specific_cols.dim_base_pointer_read = ltl_specific_cols.dim_base_pointer_read;
+            ltl_trace_specific_cols.opened_base_pointer_read =
+                ltl_specific_cols.opened_base_pointer_read;
+            ltl_trace_specific_cols.opened_length_read = ltl_specific_cols.opened_length_read;
+            ltl_trace_specific_cols.index_base_pointer_read =
+                ltl_specific_cols.index_base_pointer_read;
+            ltl_trace_specific_cols.commit_pointer_read = ltl_specific_cols.commit_pointer_read;
+            ltl_trace_specific_cols.commit_read = ltl_specific_cols.commit_read;
+            if !self.optimistic {
+                assert_eq!(commit, root);
             }
-
-            assert_eq!(commit, root);
-            self.record_set
-                .verify_batch_records
-                .push(VerifyBatchRecord {
-                    from_state,
-                    instruction: instruction.clone(),
-                    dim_base_pointer,
-                    opened_base_pointer,
-                    opened_length,
-                    index_base_pointer,
-                    commit_pointer,
-                    dim_base_pointer_read,
-                    opened_base_pointer_read,
-                    opened_length_read,
-                    index_base_pointer_read,
-                    commit_pointer_read,
-                    commit_read,
-                    initial_log_height: initial_log_height as usize,
-                    top_level,
-                });
         } else {
             unreachable!()
         }
-        Ok(ExecutionState {
-            pc: from_state.pc + DEFAULT_PC_STEP,
-            timestamp: memory.timestamp(),
-        })
+
+        *state.pc += DEFAULT_PC_STEP;
+        Ok(())
     }
 
     fn get_opcode_name(&self, opcode: usize) -> String {
@@ -506,3 +660,324 @@ impl<F: PrimeField32, const SBOX_REGISTERS: usize> InstructionExecutor<F>
         }
     }
 }
+
+impl<F: PrimeField32, const SBOX_REGISTERS: usize> TraceFiller<F>
+    for NativePoseidon2Filler<F, SBOX_REGISTERS>
+{
+    fn fill_trace(
+        &self,
+        mem_helper: &MemoryAuxColsFactory<F>,
+        trace: &mut RowMajorMatrix<F>,
+        rows_used: usize,
+    ) where
+        F: Send + Sync + Clone,
+    {
+        // Split the trace rows by instruction
+        let width = trace.width();
+        let mut row_idx = 0;
+        let mut row_slice = trace.values.as_mut_slice();
+        let mut chunk_start = Vec::new();
+        while row_idx < rows_used {
+            let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> = row_slice[..width].borrow();
+            let (curr, rest) = if cols.simple.is_one() {
+                row_idx += 1;
+                row_slice.split_at_mut(width)
+            } else {
+                let num_non_inside_row = cols.inner.export.as_canonical_u32() as usize;
+                let start = (num_non_inside_row - 1) * width;
+                let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> =
+                    row_slice[start..(start + width)].borrow();
+                let total_num_row = cols.inner.export.as_canonical_u32() as usize;
+                row_idx += total_num_row;
+                row_slice.split_at_mut(total_num_row * width)
+            };
+            chunk_start.push(curr);
+            row_slice = rest;
+        }
+        chunk_start.into_par_iter().for_each(|chunk_slice| {
+            let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> = chunk_slice[..width].borrow();
+            if cols.simple.is_one() {
+                self.fill_simple_chunk(mem_helper, chunk_slice);
+            } else {
+                self.fill_verify_batch_chunk(mem_helper, chunk_slice);
+            }
+        });
+        // Remaining rows are dummy rows.
+        let inner_width = self.subchip.air.width();
+        row_slice.par_chunks_exact_mut(width).for_each(|row_slice| {
+            row_slice[..inner_width].copy_from_slice(&self.empty_poseidon2_sub_cols);
+        });
+    }
+}
+
+impl<F: PrimeField32, const SBOX_REGISTERS: usize> NativePoseidon2Filler<F, SBOX_REGISTERS> {
+    fn fill_simple_chunk(&self, mem_helper: &MemoryAuxColsFactory<F>, chunk_slice: &mut [F]) {
+        {
+            let inner_width = self.subchip.air.width();
+            let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> = chunk_slice.as_ref().borrow();
+            let inner_cols = &self.subchip.generate_trace(vec![cols.inner.inputs]).values;
+            chunk_slice[..inner_width].copy_from_slice(inner_cols);
+        }
+
+        let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> = chunk_slice.borrow_mut();
+        // Simple poseidon2 row
+        let simple_cols: &mut SimplePoseidonSpecificCols<F> =
+            cols.specific[..SimplePoseidonSpecificCols::<u8>::width()].borrow_mut();
+        let start_timestamp_u32 = cols.start_timestamp.as_canonical_u32();
+        mem_fill_helper(
+            mem_helper,
+            start_timestamp_u32,
+            simple_cols.read_output_pointer.as_mut(),
+        );
+        mem_fill_helper(
+            mem_helper,
+            start_timestamp_u32 + 1,
+            simple_cols.read_input_pointer_1.as_mut(),
+        );
+        if simple_cols.is_compress.is_one() {
+            mem_fill_helper(
+                mem_helper,
+                start_timestamp_u32 + 2,
+                simple_cols.read_input_pointer_2.as_mut(),
+            );
+        }
+        mem_fill_helper(
+            mem_helper,
+            start_timestamp_u32 + 3,
+            simple_cols.read_data_1.as_mut(),
+        );
+        mem_fill_helper(
+            mem_helper,
+            start_timestamp_u32 + 4,
+            simple_cols.read_data_2.as_mut(),
+        );
+        mem_fill_helper(
+            mem_helper,
+            start_timestamp_u32 + 5,
+            simple_cols.write_data_1.as_mut(),
+        );
+        if simple_cols.is_compress.is_zero() {
+            mem_fill_helper(
+                mem_helper,
+                start_timestamp_u32 + 6,
+                simple_cols.write_data_2.as_mut(),
+            );
+        }
+    }
+
+    fn fill_verify_batch_chunk(&self, mem_helper: &MemoryAuxColsFactory<F>, chunk_slice: &mut [F]) {
+        let inner_width = self.subchip.air.width();
+        let width = NativePoseidon2Cols::<F, SBOX_REGISTERS>::width();
+        let num_non_inside_rows = {
+            let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> = chunk_slice[..width].borrow();
+            cols.inner.export.as_canonical_u32() as usize
+        };
+        let total_num_rows = {
+            let start = (num_non_inside_rows - 1) * width;
+            let last_cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> =
+                chunk_slice[start..(start + width)].borrow();
+            // During execution, this field hasn't been filled with meaningful data. So we use this
+            // field to store the number of inside rows.
+            last_cols.inner.export.as_canonical_u32() as usize
+        };
+        let mut first_round = true;
+        let mut root = [F::ZERO; CHUNK];
+        let mut inside_idx = num_non_inside_rows;
+        let mut non_inside_idx = 0;
+        while inside_idx < total_num_rows || non_inside_idx < num_non_inside_rows {
+            debug_assert!(non_inside_idx < num_non_inside_rows);
+            let incorporate_sibling = {
+                let start = non_inside_idx * width;
+                let row_slice = &mut chunk_slice[start..(start + width)];
+                let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> = row_slice.as_ref().borrow();
+                cols.incorporate_sibling.is_one()
+            };
+            if !incorporate_sibling {
+                let mut prev_rolling_hash: [F; 2 * CHUNK];
+                let mut rolling_hash = [F::ZERO; 2 * CHUNK];
+                loop {
+                    let start = inside_idx * width;
+                    let row_slice = &mut chunk_slice[start..(start + width)];
+                    let mut input_len = 0;
+                    {
+                        let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> =
+                            row_slice.borrow_mut();
+                        let inside_row_specific_cols: &mut InsideRowSpecificCols<F> =
+                            cols.specific[..InsideRowSpecificCols::<u8>::width()].borrow_mut();
+                        let start_timestamp_u32 = cols.start_timestamp.as_canonical_u32();
+                        for (i, cell) in inside_row_specific_cols.cells.iter_mut().enumerate() {
+                            if i > 0 && cols.is_exhausted[i - 1].is_one() {
+                                break;
+                            }
+                            input_len += 1;
+                            if cell.is_first_in_row.is_one() {
+                                mem_fill_helper(
+                                    mem_helper,
+                                    start_timestamp_u32 + 2 * i as u32,
+                                    cell.read_row_pointer_and_length.as_mut(),
+                                );
+                            }
+                            mem_fill_helper(
+                                mem_helper,
+                                start_timestamp_u32 + 2 * i as u32 + 1,
+                                cell.read.as_mut(),
+                            );
+                        }
+                    }
+                    {
+                        let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> =
+                            row_slice.as_ref().borrow();
+                        rolling_hash[..input_len].copy_from_slice(&cols.inner.inputs[..input_len]);
+                    }
+                    prev_rolling_hash = rolling_hash;
+
+                    let inner_cols = &self.subchip.generate_trace(vec![rolling_hash]).values;
+                    row_slice[..inner_width].copy_from_slice(inner_cols);
+                    let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> = row_slice.as_ref().borrow();
+                    rolling_hash = *Self::poseidon2_output_from_trace(&cols.inner);
+                    inside_idx += 1;
+                    if cols.end_inside_row.is_one() {
+                        break;
+                    }
+                }
+
+                let start = non_inside_idx * width;
+                let row_slice = &mut chunk_slice[start..(start + width)];
+                let mut p2_input = [F::ZERO; 2 * CHUNK];
+                if first_round {
+                    p2_input.copy_from_slice(&prev_rolling_hash);
+                } else {
+                    p2_input[..CHUNK].copy_from_slice(&root);
+                    p2_input[CHUNK..].copy_from_slice(&rolling_hash[..CHUNK]);
+                }
+
+                first_round = false;
+                let inner_cols = &self.subchip.generate_trace(vec![p2_input]).values;
+                row_slice[..inner_width].copy_from_slice(inner_cols);
+                let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> = row_slice.borrow_mut();
+                Self::fill_timestamp_for_top_level(mem_helper, cols);
+                root.copy_from_slice(&Self::poseidon2_output_from_trace(&cols.inner)[..CHUNK]);
+                non_inside_idx += 1;
+            }
+
+            if non_inside_idx < num_non_inside_rows {
+                let start = non_inside_idx * width;
+                let row_slice = &mut chunk_slice[start..(start + width)];
+                let p2_input = {
+                    let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> = row_slice.borrow_mut();
+                    Self::fill_timestamp_for_top_level(mem_helper, cols);
+                    let sibling = &cols.inner.inputs[..CHUNK];
+                    let top_level_specific_cols: &TopLevelSpecificCols<F> =
+                        cols.specific[..TopLevelSpecificCols::<F>::width()].borrow();
+                    let sibling_is_on_right = top_level_specific_cols.sibling_is_on_right.is_one();
+                    let mut p2_input = [F::ZERO; 2 * CHUNK];
+                    if sibling_is_on_right {
+                        p2_input[..CHUNK].copy_from_slice(sibling);
+                        p2_input[CHUNK..].copy_from_slice(&root);
+                    } else {
+                        p2_input[..CHUNK].copy_from_slice(&root);
+                        p2_input[CHUNK..].copy_from_slice(sibling);
+                    };
+                    p2_input
+                };
+                let inner_cols = &self.subchip.generate_trace(vec![p2_input]).values;
+                row_slice[..inner_width].copy_from_slice(inner_cols);
+                let cols: &NativePoseidon2Cols<F, SBOX_REGISTERS> = row_slice.as_ref().borrow();
+                root.copy_from_slice(&Self::poseidon2_output_from_trace(&cols.inner)[..CHUNK]);
+                non_inside_idx += 1;
+            }
+        }
+    }
+    fn fill_timestamp_for_top_level(
+        mem_helper: &MemoryAuxColsFactory<F>,
+        cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS>,
+    ) {
+        let top_level_specific_cols: &mut TopLevelSpecificCols<F> =
+            cols.specific[..TopLevelSpecificCols::<u8>::width()].borrow_mut();
+        let start_timestamp_u32 = cols.start_timestamp.as_canonical_u32();
+        if cols.end_top_level.is_one() {
+            let very_start_timestamp_u32 = cols.very_first_timestamp.as_canonical_u32();
+            mem_fill_helper(
+                mem_helper,
+                very_start_timestamp_u32,
+                top_level_specific_cols.dim_base_pointer_read.as_mut(),
+            );
+            mem_fill_helper(
+                mem_helper,
+                very_start_timestamp_u32 + 1,
+                top_level_specific_cols.opened_base_pointer_read.as_mut(),
+            );
+            mem_fill_helper(
+                mem_helper,
+                very_start_timestamp_u32 + 2,
+                top_level_specific_cols.opened_length_read.as_mut(),
+            );
+            mem_fill_helper(
+                mem_helper,
+                very_start_timestamp_u32 + 3,
+                top_level_specific_cols.index_base_pointer_read.as_mut(),
+            );
+            mem_fill_helper(
+                mem_helper,
+                very_start_timestamp_u32 + 4,
+                top_level_specific_cols.commit_pointer_read.as_mut(),
+            );
+            mem_fill_helper(
+                mem_helper,
+                very_start_timestamp_u32 + 5,
+                top_level_specific_cols.commit_read.as_mut(),
+            );
+        }
+        if cols.incorporate_row.is_one() {
+            let end_timestamp = top_level_specific_cols.end_timestamp.as_canonical_u32();
+            mem_fill_helper(
+                mem_helper,
+                end_timestamp - 2,
+                top_level_specific_cols
+                    .read_initial_height_or_sibling_is_on_right
+                    .as_mut(),
+            );
+            mem_fill_helper(
+                mem_helper,
+                end_timestamp - 1,
+                top_level_specific_cols.read_final_height.as_mut(),
+            );
+        } else if cols.incorporate_sibling.is_one() {
+            mem_fill_helper(
+                mem_helper,
+                start_timestamp_u32 + NUM_INITIAL_READS as u32,
+                top_level_specific_cols
+                    .read_initial_height_or_sibling_is_on_right
+                    .as_mut(),
+            );
+        } else {
+            unreachable!()
+        }
+    }
+
+    #[inline(always)]
+    fn poseidon2_output_from_trace(inner: &Poseidon2SubCols<F, SBOX_REGISTERS>) -> &[F; 2 * CHUNK] {
+        &inner.ending_full_rounds.last().unwrap().post
+    }
+}
+
+fn tracing_read_native_helper<F: PrimeField32, const BLOCK_SIZE: usize>(
+    memory: &mut TracingMemory,
+    ptr: u32,
+    base_aux: &mut MemoryBaseAuxCols<F>,
+) -> [F; BLOCK_SIZE] {
+    let mut prev_ts = 0;
+    let ret = tracing_read_native(memory, ptr, &mut prev_ts);
+    base_aux.set_prev(F::from_canonical_u32(prev_ts));
+    ret
+}
+
+/// Fill `MemoryBaseAuxCols`, assuming that the `prev_timestamp` is already set in `base_aux`.
+fn mem_fill_helper<F: PrimeField32>(
+    mem_helper: &MemoryAuxColsFactory<F>,
+    timestamp: u32,
+    base_aux: &mut MemoryBaseAuxCols<F>,
+) {
+    let prev_ts = base_aux.prev_timestamp.as_canonical_u32();
+    mem_helper.fill(prev_ts, timestamp, base_aux);
+}
diff --git a/extensions/native/circuit/src/poseidon2/execution.rs b/extensions/native/circuit/src/poseidon2/execution.rs
new file mode 100644
index 0000000000..661d8e10cc
--- /dev/null
+++ b/extensions/native/circuit/src/poseidon2/execution.rs
@@ -0,0 +1,500 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
+use openvm_native_compiler::{
+    conversion::AS,
+    Poseidon2Opcode::{COMP_POS2, PERM_POS2},
+    VerifyBatchOpcode::VERIFY_BATCH,
+};
+use openvm_poseidon2_air::Poseidon2SubChip;
+use openvm_stark_backend::{
+    p3_field::{Field, PrimeField32},
+    p3_maybe_rayon::prelude::{ParallelIterator, ParallelSlice},
+};
+
+use super::chip::{compress, NativePoseidon2Executor};
+use crate::poseidon2::CHUNK;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct Pos2PreCompute<'a, F: Field, const SBOX_REGISTERS: usize> {
+    subchip: &'a Poseidon2SubChip<F, SBOX_REGISTERS>,
+    output_register: u32,
+    input_register_1: u32,
+    input_register_2: u32,
+}
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct VerifyBatchPreCompute<'a, F: Field, const SBOX_REGISTERS: usize> {
+    subchip: &'a Poseidon2SubChip<F, SBOX_REGISTERS>,
+    dim_register: u32,
+    opened_register: u32,
+    opened_length_register: u32,
+    proof_id_ptr: u32,
+    index_register: u32,
+    commit_register: u32,
+    opened_element_size: F,
+}
+
+impl<'a, F: PrimeField32, const SBOX_REGISTERS: usize> NativePoseidon2Executor<F, SBOX_REGISTERS> {
+    #[inline(always)]
+    fn pre_compute_pos2_impl(
+        &'a self,
+        pc: u32,
+        inst: &Instruction<F>,
+        pos2_data: &mut Pos2PreCompute<'a, F, SBOX_REGISTERS>,
+    ) -> Result<(), StaticProgramError> {
+        let &Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+
+        if opcode != PERM_POS2.global_opcode() && opcode != COMP_POS2.global_opcode() {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        let c = c.as_canonical_u32();
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+
+        if d != AS::Native as u32 {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        if e != AS::Native as u32 {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        *pos2_data = Pos2PreCompute {
+            subchip: &self.subchip,
+            output_register: a,
+            input_register_1: b,
+            input_register_2: c,
+        };
+
+        Ok(())
+    }
+
+    #[inline(always)]
+    fn pre_compute_verify_batch_impl(
+        &'a self,
+        pc: u32,
+        inst: &Instruction<F>,
+        verify_batch_data: &mut VerifyBatchPreCompute<'a, F, SBOX_REGISTERS>,
+    ) -> Result<(), StaticProgramError> {
+        let &Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g,
+            ..
+        } = inst;
+
+        if opcode != VERIFY_BATCH.global_opcode() {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        let c = c.as_canonical_u32();
+        let d = d.as_canonical_u32();
+        let e = e.as_canonical_u32();
+        let f = f.as_canonical_u32();
+
+        let opened_element_size_inv = g;
+        // calc inverse fast assuming opened_element_size in {1, 4}
+        let mut opened_element_size = F::ONE;
+        while opened_element_size * opened_element_size_inv != F::ONE {
+            opened_element_size += F::ONE;
+        }
+
+        *verify_batch_data = VerifyBatchPreCompute {
+            subchip: &self.subchip,
+            dim_register: a,
+            opened_register: b,
+            opened_length_register: c,
+            proof_id_ptr: d,
+            index_register: e,
+            commit_register: f,
+            opened_element_size,
+        };
+
+        Ok(())
+    }
+}
+
+impl<F: PrimeField32, const SBOX_REGISTERS: usize> Executor<F>
+    for NativePoseidon2Executor<F, SBOX_REGISTERS>
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        std::cmp::max(
+            size_of::<Pos2PreCompute<F, SBOX_REGISTERS>>(),
+            size_of::<VerifyBatchPreCompute<F, SBOX_REGISTERS>>(),
+        )
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let &Instruction { opcode, .. } = inst;
+
+        let is_pos2 = opcode == PERM_POS2.global_opcode() || opcode == COMP_POS2.global_opcode();
+
+        if is_pos2 {
+            let pos2_data: &mut Pos2PreCompute<F, SBOX_REGISTERS> = data.borrow_mut();
+            self.pre_compute_pos2_impl(pc, inst, pos2_data)?;
+            if opcode == PERM_POS2.global_opcode() {
+                Ok(execute_pos2_e1_impl::<_, _, SBOX_REGISTERS, true>)
+            } else {
+                Ok(execute_pos2_e1_impl::<_, _, SBOX_REGISTERS, false>)
+            }
+        } else {
+            let verify_batch_data: &mut VerifyBatchPreCompute<F, SBOX_REGISTERS> =
+                data.borrow_mut();
+            self.pre_compute_verify_batch_impl(pc, inst, verify_batch_data)?;
+            Ok(execute_verify_batch_e1_impl::<_, _, SBOX_REGISTERS>)
+        }
+    }
+}
+
+impl<F: PrimeField32, const SBOX_REGISTERS: usize> MeteredExecutor<F>
+    for NativePoseidon2Executor<F, SBOX_REGISTERS>
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        std::cmp::max(
+            size_of::<E2PreCompute<Pos2PreCompute<F, SBOX_REGISTERS>>>(),
+            size_of::<E2PreCompute<VerifyBatchPreCompute<F, SBOX_REGISTERS>>>(),
+        )
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let &Instruction { opcode, .. } = inst;
+
+        let is_pos2 = opcode == PERM_POS2.global_opcode() || opcode == COMP_POS2.global_opcode();
+
+        if is_pos2 {
+            let pre_compute: &mut E2PreCompute<Pos2PreCompute<F, SBOX_REGISTERS>> =
+                data.borrow_mut();
+            pre_compute.chip_idx = chip_idx as u32;
+
+            self.pre_compute_pos2_impl(pc, inst, &mut pre_compute.data)?;
+            if opcode == PERM_POS2.global_opcode() {
+                Ok(execute_pos2_e2_impl::<_, _, SBOX_REGISTERS, true>)
+            } else {
+                Ok(execute_pos2_e2_impl::<_, _, SBOX_REGISTERS, false>)
+            }
+        } else {
+            let pre_compute: &mut E2PreCompute<VerifyBatchPreCompute<F, SBOX_REGISTERS>> =
+                data.borrow_mut();
+            pre_compute.chip_idx = chip_idx as u32;
+
+            self.pre_compute_verify_batch_impl(pc, inst, &mut pre_compute.data)?;
+            Ok(execute_verify_batch_e2_impl::<_, _, SBOX_REGISTERS>)
+        }
+    }
+}
+
+unsafe fn execute_pos2_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const SBOX_REGISTERS: usize,
+    const IS_PERM: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &Pos2PreCompute<F, SBOX_REGISTERS> = pre_compute.borrow();
+    execute_pos2_e12_impl::<_, _, SBOX_REGISTERS, IS_PERM>(pre_compute, vm_state);
+}
+
+unsafe fn execute_pos2_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const SBOX_REGISTERS: usize,
+    const IS_PERM: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<Pos2PreCompute<F, SBOX_REGISTERS>> = pre_compute.borrow();
+    let height =
+        execute_pos2_e12_impl::<_, _, SBOX_REGISTERS, IS_PERM>(&pre_compute.data, vm_state);
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, height);
+}
+
+unsafe fn execute_verify_batch_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const SBOX_REGISTERS: usize,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &VerifyBatchPreCompute<F, SBOX_REGISTERS> = pre_compute.borrow();
+    // NOTE: using optimistic execution
+    execute_verify_batch_e12_impl::<_, _, SBOX_REGISTERS, true>(pre_compute, vm_state);
+}
+
+unsafe fn execute_verify_batch_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const SBOX_REGISTERS: usize,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<VerifyBatchPreCompute<F, SBOX_REGISTERS>> = pre_compute.borrow();
+    // NOTE: using optimistic execution
+    let height =
+        execute_verify_batch_e12_impl::<_, _, SBOX_REGISTERS, true>(&pre_compute.data, vm_state);
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, height);
+}
+
+#[inline(always)]
+unsafe fn execute_pos2_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const SBOX_REGISTERS: usize,
+    const IS_PERM: bool,
+>(
+    pre_compute: &Pos2PreCompute<F, SBOX_REGISTERS>,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) -> u32 {
+    let subchip = pre_compute.subchip;
+
+    let [output_pointer]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.output_register);
+    let [input_pointer_1]: [F; 1] =
+        vm_state.vm_read(AS::Native as u32, pre_compute.input_register_1);
+    let [input_pointer_2] = if IS_PERM {
+        [input_pointer_1 + F::from_canonical_usize(CHUNK)]
+    } else {
+        vm_state.vm_read(AS::Native as u32, pre_compute.input_register_2)
+    };
+
+    let data_1: [F; CHUNK] =
+        vm_state.vm_read(AS::Native as u32, input_pointer_1.as_canonical_u32());
+    let data_2: [F; CHUNK] =
+        vm_state.vm_read(AS::Native as u32, input_pointer_2.as_canonical_u32());
+
+    let p2_input = std::array::from_fn(|i| {
+        if i < CHUNK {
+            data_1[i]
+        } else {
+            data_2[i - CHUNK]
+        }
+    });
+    let output = subchip.permute(p2_input);
+    let output_pointer_u32 = output_pointer.as_canonical_u32();
+
+    vm_state.vm_write::<F, CHUNK>(
+        AS::Native as u32,
+        output_pointer_u32,
+        &std::array::from_fn(|i| output[i]),
+    );
+    if IS_PERM {
+        vm_state.vm_write::<F, CHUNK>(
+            AS::Native as u32,
+            output_pointer_u32 + CHUNK as u32,
+            &std::array::from_fn(|i| output[i + CHUNK]),
+        );
+    }
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+
+    1
+}
+
+#[inline(always)]
+unsafe fn execute_verify_batch_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const SBOX_REGISTERS: usize,
+    const OPTIMISTIC: bool,
+>(
+    pre_compute: &VerifyBatchPreCompute<F, SBOX_REGISTERS>,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) -> u32 {
+    let subchip = pre_compute.subchip;
+    let opened_element_size = pre_compute.opened_element_size;
+
+    let [proof_id]: [F; 1] = vm_state.host_read(AS::Native as u32, pre_compute.proof_id_ptr);
+    let [dim_base_pointer]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.dim_register);
+    let dim_base_pointer_u32 = dim_base_pointer.as_canonical_u32();
+    let [opened_base_pointer]: [F; 1] =
+        vm_state.vm_read(AS::Native as u32, pre_compute.opened_register);
+    let opened_base_pointer_u32 = opened_base_pointer.as_canonical_u32();
+    let [opened_length]: [F; 1] =
+        vm_state.vm_read(AS::Native as u32, pre_compute.opened_length_register);
+    let [index_base_pointer]: [F; 1] =
+        vm_state.vm_read(AS::Native as u32, pre_compute.index_register);
+    let index_base_pointer_u32 = index_base_pointer.as_canonical_u32();
+    let [commit_pointer]: [F; 1] = vm_state.vm_read(AS::Native as u32, pre_compute.commit_register);
+    let commit: [F; CHUNK] = vm_state.vm_read(AS::Native as u32, commit_pointer.as_canonical_u32());
+
+    let opened_length = opened_length.as_canonical_u32() as usize;
+
+    let initial_log_height = {
+        let [height]: [F; 1] = vm_state.host_read(AS::Native as u32, dim_base_pointer_u32);
+        height.as_canonical_u32()
+    };
+
+    let mut log_height = initial_log_height as i32;
+    let mut sibling_index = 0;
+    let mut opened_index = 0;
+    let mut height = 0;
+
+    let mut root = [F::ZERO; CHUNK];
+    let sibling_proof: Vec<[F; CHUNK]> = {
+        let proof_idx = proof_id.as_canonical_u32() as usize;
+        vm_state.streams.hint_space[proof_idx]
+            .par_chunks(CHUNK)
+            .map(|c| c.try_into().unwrap())
+            .collect()
+    };
+
+    while log_height >= 0 {
+        if opened_index < opened_length
+            && vm_state.host_read::<F, 1>(
+                AS::Native as u32,
+                dim_base_pointer_u32 + opened_index as u32,
+            )[0] == F::from_canonical_u32(log_height as u32)
+        {
+            let initial_opened_index = opened_index;
+
+            let mut row_pointer = 0;
+            let mut row_end = 0;
+
+            let mut rolling_hash = [F::ZERO; 2 * CHUNK];
+
+            let mut is_first_in_segment = true;
+
+            loop {
+                let mut cells_len = 0;
+                for chunk_elem in rolling_hash.iter_mut().take(CHUNK) {
+                    if is_first_in_segment || row_pointer == row_end {
+                        if is_first_in_segment {
+                            is_first_in_segment = false;
+                        } else {
+                            opened_index += 1;
+                            if opened_index == opened_length
+                                || vm_state.host_read::<F, 1>(
+                                    AS::Native as u32,
+                                    dim_base_pointer_u32 + opened_index as u32,
+                                )[0] != F::from_canonical_u32(log_height as u32)
+                            {
+                                break;
+                            }
+                        }
+                        let [new_row_pointer, row_len]: [F; 2] = vm_state.vm_read(
+                            AS::Native as u32,
+                            opened_base_pointer_u32 + 2 * opened_index as u32,
+                        );
+                        row_pointer = new_row_pointer.as_canonical_u32() as usize;
+                        row_end = row_pointer
+                            + (opened_element_size * row_len).as_canonical_u32() as usize;
+                    }
+                    let [value]: [F; 1] = vm_state.vm_read(AS::Native as u32, row_pointer as u32);
+                    cells_len += 1;
+                    *chunk_elem = value;
+                    row_pointer += 1;
+                }
+                if cells_len == 0 {
+                    break;
+                }
+                height += 1;
+                if !OPTIMISTIC {
+                    subchip.permute_mut(&mut rolling_hash);
+                }
+                if cells_len < CHUNK {
+                    break;
+                }
+            }
+
+            let final_opened_index = opened_index - 1;
+            let [height_check]: [F; 1] = vm_state.host_read(
+                AS::Native as u32,
+                dim_base_pointer_u32 + initial_opened_index as u32,
+            );
+            assert_eq!(height_check, F::from_canonical_u32(log_height as u32));
+            let [height_check]: [F; 1] = vm_state.host_read(
+                AS::Native as u32,
+                dim_base_pointer_u32 + final_opened_index as u32,
+            );
+            assert_eq!(height_check, F::from_canonical_u32(log_height as u32));
+
+            if !OPTIMISTIC {
+                let hash: [F; CHUNK] = std::array::from_fn(|i| rolling_hash[i]);
+
+                let new_root = if log_height as u32 == initial_log_height {
+                    hash
+                } else {
+                    let (_, new_root) = compress(subchip, root, hash);
+                    new_root
+                };
+                root = new_root;
+            }
+            height += 1;
+        }
+
+        if log_height != 0 {
+            let [sibling_is_on_right]: [F; 1] = vm_state.vm_read(
+                AS::Native as u32,
+                index_base_pointer_u32 + sibling_index as u32,
+            );
+            let sibling_is_on_right = sibling_is_on_right == F::ONE;
+            let sibling = sibling_proof[sibling_index];
+            if !OPTIMISTIC {
+                let (_, new_root) = if sibling_is_on_right {
+                    compress(subchip, sibling, root)
+                } else {
+                    compress(subchip, root, sibling)
+                };
+                root = new_root;
+            }
+            height += 1;
+        }
+
+        log_height -= 1;
+        sibling_index += 1;
+    }
+
+    if !OPTIMISTIC {
+        assert_eq!(commit, root);
+    }
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+
+    height
+}
diff --git a/extensions/native/circuit/src/poseidon2/mod.rs b/extensions/native/circuit/src/poseidon2/mod.rs
index af503e20f4..7d50050ed1 100644
--- a/extensions/native/circuit/src/poseidon2/mod.rs
+++ b/extensions/native/circuit/src/poseidon2/mod.rs
@@ -1,8 +1,14 @@
+use openvm_circuit::arch::VmChipWrapper;
+
+use crate::chip::NativePoseidon2Filler;
+
 pub mod air;
 pub mod chip;
-mod columns;
+pub mod columns;
+mod execution;
 #[cfg(test)]
 mod tests;
-mod trace;
 
 const CHUNK: usize = 8;
+pub type NativePoseidon2Chip<F, const SBOX_REGISTERS: usize> =
+    VmChipWrapper<F, NativePoseidon2Filler<F, SBOX_REGISTERS>>;
diff --git a/extensions/native/circuit/src/poseidon2/tests.rs b/extensions/native/circuit/src/poseidon2/tests.rs
index 32a0e483a3..64966585c0 100644
--- a/extensions/native/circuit/src/poseidon2/tests.rs
+++ b/extensions/native/circuit/src/poseidon2/tests.rs
@@ -1,11 +1,8 @@
-use std::{
-    cmp::min,
-    sync::{Arc, Mutex},
-};
+use std::cmp::min;
 
-use openvm_circuit::arch::{
-    testing::{memory::gen_pointer, VmChipTestBuilder, VmChipTester},
-    verify_single, Streams, VirtualMachine,
+use openvm_circuit::{
+    arch::testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder, VmChipTester},
+    utils::air_test,
 };
 use openvm_instructions::{instruction::Instruction, program::Program, LocalOpcode, SystemOpcode};
 use openvm_native_compiler::{
@@ -16,14 +13,16 @@ use openvm_poseidon2_air::{Poseidon2Config, Poseidon2SubChip};
 use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32, PrimeField64},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
     utils::disable_debug_builder,
     verifier::VerificationError,
 };
 use openvm_stark_sdk::{
     config::{
         baby_bear_blake3::{BabyBearBlake3Config, BabyBearBlake3Engine},
-        baby_bear_poseidon2::BabyBearPoseidon2Engine,
-        fri_params::standard_fri_params_with_100_bits_conjectured_security,
         FriParameters,
     },
     engine::StarkFriEngine,
@@ -34,12 +33,38 @@ use rand::{rngs::StdRng, Rng};
 
 use super::air::VerifyBatchBus;
 use crate::{
-    poseidon2::{chip::NativePoseidon2Chip, CHUNK},
-    NativeConfig,
+    air::NativePoseidon2Air,
+    chip::NativePoseidon2Executor,
+    poseidon2::{chip::NativePoseidon2Filler, CHUNK},
+    NativeConfig, NativeCpuBuilder, NativePoseidon2Chip,
 };
 
 const VERIFY_BATCH_BUS: VerifyBatchBus = VerifyBatchBus::new(7);
+const MAX_INS_CAPACITY: usize = 1 << 15;
+type Harness<F, const SBOX_REGISTERS: usize> = TestChipHarness<
+    F,
+    NativePoseidon2Executor<F, SBOX_REGISTERS>,
+    NativePoseidon2Air<F, SBOX_REGISTERS>,
+    NativePoseidon2Chip<F, SBOX_REGISTERS>,
+>;
+
+fn create_test_chip<F: PrimeField32, const SBOX_REGISTERS: usize>(
+    tester: &VmChipTestBuilder<F>,
+) -> Harness<F, SBOX_REGISTERS> {
+    let air = NativePoseidon2Air::new(
+        tester.execution_bridge(),
+        tester.memory_bridge(),
+        VERIFY_BATCH_BUS,
+        Poseidon2Config::default(),
+    );
+    let step = NativePoseidon2Executor::new(Poseidon2Config::default());
+    let chip = NativePoseidon2Chip::new(
+        NativePoseidon2Filler::new(Poseidon2Config::default()),
+        tester.memory_helper(),
+    );
 
+    Harness::with_capacity(step, air, chip, MAX_INS_CAPACITY)
+}
 fn compute_commit<F: Field>(
     dim: &[usize],
     opened: &[Vec<F>],
@@ -140,140 +165,144 @@ fn random_instance(
 
 const SBOX_REGISTERS: usize = 1;
 
+#[derive(Clone)]
 struct Case {
     row_lengths: Vec<Vec<usize>>,
     opened_element_size: usize,
 }
 
+fn set_and_execute<const SBOX_REGISTERS: usize>(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness<BabyBear, SBOX_REGISTERS>,
+    rng: &mut StdRng,
+    case: Case,
+) {
+    let instance = random_instance(
+        rng,
+        case.row_lengths,
+        case.opened_element_size,
+        |left, right| {
+            let concatenated =
+                std::array::from_fn(|i| if i < CHUNK { left[i] } else { right[i - CHUNK] });
+            let permuted = harness.executor.subchip.permute(concatenated);
+            (
+                std::array::from_fn(|i| permuted[i]),
+                std::array::from_fn(|i| permuted[i + CHUNK]),
+            )
+        },
+    );
+    let VerifyBatchInstance {
+        dim,
+        opened,
+        proof,
+        sibling_is_on_right,
+        commit,
+    } = instance;
+
+    let dim_register = gen_pointer(rng, 1);
+    let opened_register = gen_pointer(rng, 1);
+    let opened_length_register = gen_pointer(rng, 1);
+    let proof_id = gen_pointer(rng, 1);
+    let index_register = gen_pointer(rng, 1);
+    let commit_register = gen_pointer(rng, 1);
+
+    let dim_base_pointer = gen_pointer(rng, 1);
+    let opened_base_pointer = gen_pointer(rng, 2);
+    let index_base_pointer = gen_pointer(rng, 1);
+    let commit_pointer = gen_pointer(rng, 1);
+
+    let address_space = AS::Native as usize;
+    tester.write_usize(address_space, dim_register, [dim_base_pointer]);
+    tester.write_usize(address_space, opened_register, [opened_base_pointer]);
+    tester.write_usize(address_space, opened_length_register, [opened.len()]);
+    tester.write_usize(address_space, proof_id, [tester.streams.hint_space.len()]);
+    tester.write_usize(address_space, index_register, [index_base_pointer]);
+    tester.write_usize(address_space, commit_register, [commit_pointer]);
+
+    for (i, &dim_value) in dim.iter().enumerate() {
+        tester.write_usize(address_space, dim_base_pointer + i, [dim_value]);
+    }
+    for (i, opened_row) in opened.iter().enumerate() {
+        let row_pointer = gen_pointer(rng, 1);
+        tester.write_usize(
+            address_space,
+            opened_base_pointer + (2 * i),
+            [row_pointer, opened_row.len() / case.opened_element_size],
+        );
+        for (j, &opened_value) in opened_row.iter().enumerate() {
+            tester.write(address_space, row_pointer + j, [opened_value]);
+        }
+    }
+    tester
+        .streams
+        .hint_space
+        .push(proof.iter().flatten().copied().collect());
+    for (i, &bit) in sibling_is_on_right.iter().enumerate() {
+        tester.write(address_space, index_base_pointer + i, [F::from_bool(bit)]);
+    }
+    tester.write(address_space, commit_pointer, commit);
+
+    let opened_element_size_inv = F::from_canonical_usize(case.opened_element_size)
+        .inverse()
+        .as_canonical_u32() as usize;
+    tester.execute(
+        harness,
+        &Instruction::from_usize(
+            VERIFY_BATCH.global_opcode(),
+            [
+                dim_register,
+                opened_register,
+                opened_length_register,
+                proof_id,
+                index_register,
+                commit_register,
+                opened_element_size_inv,
+            ],
+        ),
+    );
+}
+
 fn test<const N: usize>(cases: [Case; N]) {
     unsafe {
         std::env::set_var("RUST_BACKTRACE", "1");
     }
-
-    // single op
-    let address_space = AS::Native as usize;
-
-    let mut tester = VmChipTestBuilder::default();
-    let streams = Arc::new(Mutex::new(Streams::default()));
-    let mut chip = NativePoseidon2Chip::<F, SBOX_REGISTERS>::new(
-        tester.system_port(),
-        tester.offline_memory_mutex_arc(),
-        Poseidon2Config::default(),
-        VERIFY_BATCH_BUS,
-        streams.clone(),
-    );
+    let mut valid_tester = VmChipTestBuilder::default_native();
+    let mut valid_harness = create_test_chip::<F, SBOX_REGISTERS>(&valid_tester);
+    let mut prank_tester = VmChipTestBuilder::default_native();
+    let mut prank_harness = create_test_chip::<F, SBOX_REGISTERS>(&prank_tester);
 
     let mut rng = create_seeded_rng();
-    for Case {
-        row_lengths,
-        opened_element_size,
-    } in cases
-    {
-        let mut streams = streams.lock().unwrap();
-        let instance =
-            random_instance(&mut rng, row_lengths, opened_element_size, |left, right| {
-                let concatenated =
-                    std::array::from_fn(|i| if i < CHUNK { left[i] } else { right[i - CHUNK] });
-                let permuted = chip.subchip.permute(concatenated);
-                (
-                    std::array::from_fn(|i| permuted[i]),
-                    std::array::from_fn(|i| permuted[i + CHUNK]),
-                )
-            });
-        let VerifyBatchInstance {
-            dim,
-            opened,
-            proof,
-            sibling_is_on_right,
-            commit,
-        } = instance;
-
-        let dim_register = gen_pointer(&mut rng, 1);
-        let opened_register = gen_pointer(&mut rng, 1);
-        let opened_length_register = gen_pointer(&mut rng, 1);
-        let proof_id = gen_pointer(&mut rng, 1);
-        let index_register = gen_pointer(&mut rng, 1);
-        let commit_register = gen_pointer(&mut rng, 1);
-
-        let dim_base_pointer = gen_pointer(&mut rng, 1);
-        let opened_base_pointer = gen_pointer(&mut rng, 2);
-        let index_base_pointer = gen_pointer(&mut rng, 1);
-        let commit_pointer = gen_pointer(&mut rng, 1);
-
-        tester.write_usize(address_space, dim_register, [dim_base_pointer]);
-        tester.write_usize(address_space, opened_register, [opened_base_pointer]);
-        tester.write_usize(address_space, opened_length_register, [opened.len()]);
-        tester.write_usize(address_space, proof_id, [streams.hint_space.len()]);
-        tester.write_usize(address_space, index_register, [index_base_pointer]);
-        tester.write_usize(address_space, commit_register, [commit_pointer]);
-
-        for (i, &dim_value) in dim.iter().enumerate() {
-            tester.write_usize(address_space, dim_base_pointer + i, [dim_value]);
-        }
-        for (i, opened_row) in opened.iter().enumerate() {
-            let row_pointer = gen_pointer(&mut rng, 1);
-            tester.write_usize(
-                address_space,
-                opened_base_pointer + (2 * i),
-                [row_pointer, opened_row.len() / opened_element_size],
-            );
-            for (j, &opened_value) in opened_row.iter().enumerate() {
-                tester.write_cell(address_space, row_pointer + j, opened_value);
-            }
-        }
-        streams
-            .hint_space
-            .push(proof.iter().flatten().copied().collect());
-        drop(streams);
-        for (i, &bit) in sibling_is_on_right.iter().enumerate() {
-            tester.write_cell(address_space, index_base_pointer + i, F::from_bool(bit));
-        }
-        tester.write(address_space, commit_pointer, commit);
-
-        let opened_element_size_inv = F::from_canonical_usize(opened_element_size)
-            .inverse()
-            .as_canonical_u32() as usize;
-        tester.execute(
-            &mut chip,
-            &Instruction::from_usize(
-                VERIFY_BATCH.global_opcode(),
-                [
-                    dim_register,
-                    opened_register,
-                    opened_length_register,
-                    proof_id,
-                    index_register,
-                    commit_register,
-                    opened_element_size_inv,
-                ],
-            ),
+    for case in cases {
+        set_and_execute(
+            &mut valid_tester,
+            &mut valid_harness,
+            &mut rng,
+            case.clone(),
         );
+        set_and_execute(&mut prank_tester, &mut prank_harness, &mut rng, case);
     }
 
-    let mut tester = tester.build().load(chip).finalize();
-    tester.simple_test().expect("Verification failed");
+    let valid_tester = valid_tester.build().load(valid_harness).finalize();
+    valid_tester.simple_test().expect("Verification failed");
 
     disable_debug_builder();
-    let trace = tester.air_proof_inputs[2]
-        .1
-        .raw
-        .common_main
-        .as_mut()
-        .unwrap();
-    let row_index = 0;
-    trace.row_mut(row_index);
-
     let p2_chip = Poseidon2SubChip::<F, SBOX_REGISTERS>::new(Poseidon2Config::default().constants);
     let inner_trace = p2_chip.generate_trace(vec![[F::ZERO; 2 * CHUNK]]);
     let inner_width = p2_chip.air.width();
 
-    trace.row_mut(row_index)[..inner_width].copy_from_slice(&inner_trace.values);
+    let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
+        let mut trace_row = trace.row_slice(0).to_vec();
+        trace_row[..inner_width].copy_from_slice(&inner_trace.values);
+        *trace = RowMajorMatrix::new(trace_row, trace.width());
+    };
+
+    let prank_tester = prank_tester
+        .build()
+        .load_and_prank_trace(prank_harness, modify_trace)
+        .finalize();
+
     // Run a test after pranking the poseidon2 stuff
-    assert_eq!(
-        tester.simple_test().err(),
-        Some(VerificationError::OodEvaluationMismatch),
-        "Expected constraint to fail"
-    );
+    prank_tester.simple_test_with_expected_error(VerificationError::OodEvaluationMismatch);
 }
 
 #[test]
@@ -383,15 +412,8 @@ fn random_instructions(num_ops: usize) -> Vec<Instruction<BabyBear>> {
 fn tester_with_random_poseidon2_ops(num_ops: usize) -> VmChipTester<BabyBearBlake3Config> {
     let elem_range = || 1..=100;
 
-    let mut tester = VmChipTestBuilder::default();
-    let streams = Arc::new(Mutex::new(Streams::default()));
-    let mut chip = NativePoseidon2Chip::<F, SBOX_REGISTERS>::new(
-        tester.system_port(),
-        tester.offline_memory_mutex_arc(),
-        Poseidon2Config::default(),
-        VERIFY_BATCH_BUS,
-        streams.clone(),
-    );
+    let mut tester = VmChipTestBuilder::default_native();
+    let mut harness = create_test_chip::<F, SBOX_REGISTERS>(&tester);
 
     let mut rng = create_seeded_rng();
 
@@ -417,27 +439,28 @@ fn tester_with_random_poseidon2_ops(num_ops: usize) -> VmChipTester<BabyBearBlak
         let data: [_; 2 * CHUNK] =
             std::array::from_fn(|_| BabyBear::from_canonical_usize(rng.gen_range(elem_range())));
 
-        let hash = chip.subchip.permute(data);
+        let hash = harness.executor.subchip.permute(data);
 
-        tester.write_cell(d, a, BabyBear::from_canonical_usize(dst));
-        tester.write_cell(d, b, BabyBear::from_canonical_usize(lhs));
+        tester.write(d, a, [BabyBear::from_canonical_usize(dst)]);
+        tester.write(d, b, [BabyBear::from_canonical_usize(lhs)]);
         if opcode == COMP_POS2 {
-            tester.write_cell(d, c, BabyBear::from_canonical_usize(rhs));
+            tester.write(d, c, [BabyBear::from_canonical_usize(rhs)]);
         }
 
+        let data_left: [_; CHUNK] = std::array::from_fn(|i| data[i]);
+        let data_right: [_; CHUNK] = std::array::from_fn(|i| data[CHUNK + i]);
         match opcode {
             COMP_POS2 => {
-                let data_left: [_; CHUNK] = std::array::from_fn(|i| data[i]);
-                let data_right: [_; CHUNK] = std::array::from_fn(|i| data[CHUNK + i]);
                 tester.write(e, lhs, data_left);
                 tester.write(e, rhs, data_right);
             }
             PERM_POS2 => {
-                tester.write(e, lhs, data);
+                tester.write(e, lhs, data_left);
+                tester.write(e, lhs + CHUNK, data_right);
             }
         }
 
-        tester.execute(&mut chip, &instruction);
+        tester.execute(&mut harness, &instruction);
 
         match opcode {
             COMP_POS2 => {
@@ -446,12 +469,14 @@ fn tester_with_random_poseidon2_ops(num_ops: usize) -> VmChipTester<BabyBearBlak
                 assert_eq!(expected, actual);
             }
             PERM_POS2 => {
-                let actual = tester.read::<{ 2 * CHUNK }>(e, dst);
-                assert_eq!(hash, actual);
+                let actual_0 = tester.read::<{ CHUNK }>(e, dst);
+                let actual_1 = tester.read::<{ CHUNK }>(e, dst + CHUNK);
+                let actual = [actual_0, actual_1].concat();
+                assert_eq!(&hash, &actual[..]);
             }
         }
     }
-    tester.build().load(chip).finalize()
+    tester.build().load(harness).finalize()
 }
 
 fn get_engine() -> BabyBearBlake3Engine {
@@ -476,34 +501,6 @@ fn verify_batch_chip_simple_50() {
     tester.test(get_engine).expect("Verification failed");
 }
 
-// log_blowup = 3 for poseidon2 chip
-fn air_test_with_compress_poseidon2(
-    poseidon2_max_constraint_degree: usize,
-    program: Program<BabyBear>,
-) {
-    let fri_params = if matches!(std::env::var("OPENVM_FAST_TEST"), Ok(x) if &x == "1") {
-        FriParameters {
-            log_blowup: 3,
-            log_final_poly_len: 0,
-            num_queries: 2,
-            proof_of_work_bits: 0,
-        }
-    } else {
-        standard_fri_params_with_100_bits_conjectured_security(3)
-    };
-    let engine = BabyBearPoseidon2Engine::new(fri_params);
-
-    let config = NativeConfig::aggregation(0, poseidon2_max_constraint_degree);
-    let vm = VirtualMachine::new(engine, config);
-
-    let pk = vm.keygen();
-    let result = vm.execute_and_generate(program, vec![]).unwrap();
-    let proofs = vm.prove(&pk, result);
-    for proof in proofs {
-        verify_single(&vm.engine, &pk.get_vk(), &proof).expect("Verification failed");
-    }
-}
-
 #[test]
 fn test_vm_compress_poseidon2_as4() {
     let mut rng = create_seeded_rng();
@@ -594,6 +591,14 @@ fn test_vm_compress_poseidon2_as4() {
 
     let program = Program::from_instructions(&instructions);
 
-    air_test_with_compress_poseidon2(3, program.clone());
-    air_test_with_compress_poseidon2(7, program.clone());
+    air_test(
+        NativeCpuBuilder,
+        NativeConfig::aggregation(0, 3),
+        program.clone(),
+    );
+    air_test(
+        NativeCpuBuilder,
+        NativeConfig::aggregation(0, 7),
+        program.clone(),
+    );
 }
diff --git a/extensions/native/circuit/src/poseidon2/trace.rs b/extensions/native/circuit/src/poseidon2/trace.rs
deleted file mode 100644
index df8547767f..0000000000
--- a/extensions/native/circuit/src/poseidon2/trace.rs
+++ /dev/null
@@ -1,485 +0,0 @@
-use std::{borrow::BorrowMut, sync::Arc};
-
-use openvm_circuit::system::memory::{MemoryAuxColsFactory, OfflineMemory};
-use openvm_circuit_primitives::utils::next_power_of_two_or_zero;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_native_compiler::Poseidon2Opcode::COMP_POS2;
-use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
-    p3_air::BaseAir,
-    p3_field::{Field, PrimeField32},
-    p3_matrix::dense::RowMajorMatrix,
-    p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
-    AirRef, Chip, ChipUsageGetter,
-};
-
-use crate::{
-    chip::{SimplePoseidonRecord, NUM_INITIAL_READS},
-    poseidon2::{
-        chip::{
-            CellRecord, IncorporateRowRecord, IncorporateSiblingRecord, InsideRowRecord,
-            NativePoseidon2Chip, VerifyBatchRecord,
-        },
-        columns::{
-            InsideRowSpecificCols, NativePoseidon2Cols, SimplePoseidonSpecificCols,
-            TopLevelSpecificCols,
-        },
-        CHUNK,
-    },
-};
-impl<F: Field, const SBOX_REGISTERS: usize> ChipUsageGetter
-    for NativePoseidon2Chip<F, SBOX_REGISTERS>
-{
-    fn air_name(&self) -> String {
-        "VerifyBatchAir".to_string()
-    }
-
-    fn current_trace_height(&self) -> usize {
-        self.height
-    }
-
-    fn trace_width(&self) -> usize {
-        NativePoseidon2Cols::<F, SBOX_REGISTERS>::width()
-    }
-}
-
-impl<F: PrimeField32, const SBOX_REGISTERS: usize> NativePoseidon2Chip<F, SBOX_REGISTERS> {
-    fn generate_subair_cols(&self, input: [F; 2 * CHUNK], cols: &mut [F]) {
-        let inner_trace = self.subchip.generate_trace(vec![input]);
-        let inner_width = self.air.subair.width();
-        cols[..inner_width].copy_from_slice(inner_trace.values.as_slice());
-    }
-    #[allow(clippy::too_many_arguments)]
-    fn incorporate_sibling_record_to_row(
-        &self,
-        record: &IncorporateSiblingRecord<F>,
-        aux_cols_factory: &MemoryAuxColsFactory<F>,
-        slice: &mut [F],
-        memory: &OfflineMemory<F>,
-        parent: &VerifyBatchRecord<F>,
-        proof_index: usize,
-        opened_index: usize,
-        log_height: usize,
-    ) {
-        let &IncorporateSiblingRecord {
-            read_sibling_is_on_right,
-            sibling_is_on_right,
-            p2_input,
-        } = record;
-
-        let read_sibling_is_on_right = memory.record_by_id(read_sibling_is_on_right);
-
-        self.generate_subair_cols(p2_input, slice);
-        let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> = slice.borrow_mut();
-        cols.incorporate_row = F::ZERO;
-        cols.incorporate_sibling = F::ONE;
-        cols.inside_row = F::ZERO;
-        cols.simple = F::ZERO;
-        cols.end_inside_row = F::ZERO;
-        cols.end_top_level = F::ZERO;
-        cols.start_top_level = F::ZERO;
-        cols.opened_element_size_inv = parent.opened_element_size_inv();
-        cols.very_first_timestamp = F::from_canonical_u32(parent.from_state.timestamp);
-        cols.start_timestamp =
-            F::from_canonical_u32(read_sibling_is_on_right.timestamp - NUM_INITIAL_READS as u32);
-
-        let specific: &mut TopLevelSpecificCols<F> =
-            cols.specific[..TopLevelSpecificCols::<F>::width()].borrow_mut();
-
-        specific.end_timestamp =
-            F::from_canonical_usize(read_sibling_is_on_right.timestamp as usize + 1);
-        cols.initial_opened_index = F::from_canonical_usize(opened_index);
-        specific.final_opened_index = F::from_canonical_usize(opened_index - 1);
-        specific.log_height = F::from_canonical_usize(log_height);
-        specific.opened_length = F::from_canonical_usize(parent.opened_length);
-        specific.dim_base_pointer = parent.dim_base_pointer;
-        cols.opened_base_pointer = parent.opened_base_pointer;
-        specific.index_base_pointer = parent.index_base_pointer;
-
-        specific.proof_index = F::from_canonical_usize(proof_index);
-        aux_cols_factory.generate_read_aux(
-            read_sibling_is_on_right,
-            &mut specific.read_initial_height_or_sibling_is_on_right,
-        );
-        specific.sibling_is_on_right = F::from_bool(sibling_is_on_right);
-    }
-    fn correct_last_top_level_row(
-        &self,
-        record: &VerifyBatchRecord<F>,
-        aux_cols_factory: &MemoryAuxColsFactory<F>,
-        slice: &mut [F],
-        memory: &OfflineMemory<F>,
-    ) {
-        let &VerifyBatchRecord {
-            from_state,
-            commit_pointer,
-            dim_base_pointer_read,
-            opened_base_pointer_read,
-            opened_length_read,
-            index_base_pointer_read,
-            commit_pointer_read,
-            commit_read,
-            ..
-        } = record;
-        let instruction = &record.instruction;
-        let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> = slice.borrow_mut();
-        cols.end_top_level = F::ONE;
-
-        let specific: &mut TopLevelSpecificCols<F> =
-            cols.specific[..TopLevelSpecificCols::<F>::width()].borrow_mut();
-
-        specific.pc = F::from_canonical_u32(from_state.pc);
-        specific.dim_register = instruction.a;
-        specific.opened_register = instruction.b;
-        specific.opened_length_register = instruction.c;
-        specific.proof_id = instruction.d;
-        specific.index_register = instruction.e;
-        specific.commit_register = instruction.f;
-        specific.commit_pointer = commit_pointer;
-        aux_cols_factory.generate_read_aux(
-            memory.record_by_id(dim_base_pointer_read),
-            &mut specific.dim_base_pointer_read,
-        );
-        aux_cols_factory.generate_read_aux(
-            memory.record_by_id(opened_base_pointer_read),
-            &mut specific.opened_base_pointer_read,
-        );
-        aux_cols_factory.generate_read_aux(
-            memory.record_by_id(opened_length_read),
-            &mut specific.opened_length_read,
-        );
-        aux_cols_factory.generate_read_aux(
-            memory.record_by_id(index_base_pointer_read),
-            &mut specific.index_base_pointer_read,
-        );
-        aux_cols_factory.generate_read_aux(
-            memory.record_by_id(commit_pointer_read),
-            &mut specific.commit_pointer_read,
-        );
-        aux_cols_factory
-            .generate_read_aux(memory.record_by_id(commit_read), &mut specific.commit_read);
-    }
-    #[allow(clippy::too_many_arguments)]
-    fn incorporate_row_record_to_row(
-        &self,
-        record: &IncorporateRowRecord<F>,
-        aux_cols_factory: &MemoryAuxColsFactory<F>,
-        slice: &mut [F],
-        memory: &OfflineMemory<F>,
-        parent: &VerifyBatchRecord<F>,
-        proof_index: usize,
-        log_height: usize,
-    ) {
-        let &IncorporateRowRecord {
-            initial_opened_index,
-            final_opened_index,
-            initial_height_read,
-            final_height_read,
-            p2_input,
-            ..
-        } = record;
-
-        let initial_height_read = memory.record_by_id(initial_height_read);
-        let final_height_read = memory.record_by_id(final_height_read);
-
-        self.generate_subair_cols(p2_input, slice);
-        let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> = slice.borrow_mut();
-        cols.incorporate_row = F::ONE;
-        cols.incorporate_sibling = F::ZERO;
-        cols.inside_row = F::ZERO;
-        cols.simple = F::ZERO;
-        cols.end_inside_row = F::ZERO;
-        cols.end_top_level = F::ZERO;
-        cols.start_top_level = F::from_bool(proof_index == 0);
-        cols.opened_element_size_inv = parent.opened_element_size_inv();
-        cols.very_first_timestamp = F::from_canonical_u32(parent.from_state.timestamp);
-        cols.start_timestamp = F::from_canonical_u32(
-            memory
-                .record_by_id(
-                    record.chunks[0].cells[0]
-                        .read_row_pointer_and_length
-                        .unwrap(),
-                )
-                .timestamp
-                - NUM_INITIAL_READS as u32,
-        );
-        let specific: &mut TopLevelSpecificCols<F> =
-            cols.specific[..TopLevelSpecificCols::<F>::width()].borrow_mut();
-
-        specific.end_timestamp = F::from_canonical_u32(final_height_read.timestamp + 1);
-
-        cols.initial_opened_index = F::from_canonical_usize(initial_opened_index);
-        specific.final_opened_index = F::from_canonical_usize(final_opened_index);
-        specific.log_height = F::from_canonical_usize(log_height);
-        specific.opened_length = F::from_canonical_usize(parent.opened_length);
-        specific.dim_base_pointer = parent.dim_base_pointer;
-        cols.opened_base_pointer = parent.opened_base_pointer;
-        specific.index_base_pointer = parent.index_base_pointer;
-
-        specific.proof_index = F::from_canonical_usize(proof_index);
-        aux_cols_factory.generate_read_aux(
-            initial_height_read,
-            &mut specific.read_initial_height_or_sibling_is_on_right,
-        );
-        aux_cols_factory.generate_read_aux(final_height_read, &mut specific.read_final_height);
-    }
-    #[allow(clippy::too_many_arguments)]
-    fn inside_row_record_to_row(
-        &self,
-        record: &InsideRowRecord<F>,
-        aux_cols_factory: &MemoryAuxColsFactory<F>,
-        slice: &mut [F],
-        memory: &OfflineMemory<F>,
-        parent: &IncorporateRowRecord<F>,
-        grandparent: &VerifyBatchRecord<F>,
-        is_last: bool,
-    ) {
-        let InsideRowRecord { cells, p2_input } = record;
-
-        self.generate_subair_cols(*p2_input, slice);
-        let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> = slice.borrow_mut();
-        cols.incorporate_row = F::ZERO;
-        cols.incorporate_sibling = F::ZERO;
-        cols.inside_row = F::ONE;
-        cols.simple = F::ZERO;
-        cols.end_inside_row = F::from_bool(is_last);
-        cols.end_top_level = F::ZERO;
-        cols.opened_element_size_inv = grandparent.opened_element_size_inv();
-        cols.very_first_timestamp = F::from_canonical_u32(
-            memory
-                .record_by_id(
-                    parent.chunks[0].cells[0]
-                        .read_row_pointer_and_length
-                        .unwrap(),
-                )
-                .timestamp,
-        );
-        cols.start_timestamp =
-            F::from_canonical_u32(memory.record_by_id(cells[0].read).timestamp - 1);
-        let specific: &mut InsideRowSpecificCols<F> =
-            cols.specific[..InsideRowSpecificCols::<F>::width()].borrow_mut();
-
-        for (record, cell) in cells.iter().zip(specific.cells.iter_mut()) {
-            let &CellRecord {
-                read,
-                opened_index,
-                read_row_pointer_and_length,
-                row_pointer,
-                row_end,
-            } = record;
-            aux_cols_factory.generate_read_aux(memory.record_by_id(read), &mut cell.read);
-            cell.opened_index = F::from_canonical_usize(opened_index);
-            if let Some(read_row_pointer_and_length) = read_row_pointer_and_length {
-                aux_cols_factory.generate_read_aux(
-                    memory.record_by_id(read_row_pointer_and_length),
-                    &mut cell.read_row_pointer_and_length,
-                );
-            }
-            cell.row_pointer = F::from_canonical_usize(row_pointer);
-            cell.row_end = F::from_canonical_usize(row_end);
-            cell.is_first_in_row = F::from_bool(read_row_pointer_and_length.is_some());
-        }
-
-        for cell in specific.cells.iter_mut().skip(cells.len()) {
-            cell.opened_index = F::from_canonical_usize(parent.final_opened_index);
-        }
-
-        cols.is_exhausted = std::array::from_fn(|i| F::from_bool(i + 1 >= cells.len()));
-
-        cols.initial_opened_index = F::from_canonical_usize(parent.initial_opened_index);
-        cols.opened_base_pointer = grandparent.opened_base_pointer;
-    }
-    // returns number of used cells
-    fn verify_batch_record_to_rows(
-        &self,
-        record: &VerifyBatchRecord<F>,
-        aux_cols_factory: &MemoryAuxColsFactory<F>,
-        slice: &mut [F],
-        memory: &OfflineMemory<F>,
-    ) -> usize {
-        let width = NativePoseidon2Cols::<F, SBOX_REGISTERS>::width();
-        let mut used_cells = 0;
-
-        let mut opened_index = 0;
-        for (proof_index, top_level) in record.top_level.iter().enumerate() {
-            let log_height = record.initial_log_height - proof_index;
-            if let Some(incorporate_row) = &top_level.incorporate_row {
-                self.incorporate_row_record_to_row(
-                    incorporate_row,
-                    aux_cols_factory,
-                    &mut slice[used_cells..used_cells + width],
-                    memory,
-                    record,
-                    proof_index,
-                    log_height,
-                );
-                opened_index = incorporate_row.final_opened_index + 1;
-                used_cells += width;
-            }
-            if let Some(incorporate_sibling) = &top_level.incorporate_sibling {
-                self.incorporate_sibling_record_to_row(
-                    incorporate_sibling,
-                    aux_cols_factory,
-                    &mut slice[used_cells..used_cells + width],
-                    memory,
-                    record,
-                    proof_index,
-                    opened_index,
-                    log_height,
-                );
-                used_cells += width;
-            }
-        }
-        self.correct_last_top_level_row(
-            record,
-            aux_cols_factory,
-            &mut slice[used_cells - width..used_cells],
-            memory,
-        );
-
-        for top_level in record.top_level.iter() {
-            if let Some(incorporate_row) = &top_level.incorporate_row {
-                for (i, chunk) in incorporate_row.chunks.iter().enumerate() {
-                    self.inside_row_record_to_row(
-                        chunk,
-                        aux_cols_factory,
-                        &mut slice[used_cells..used_cells + width],
-                        memory,
-                        incorporate_row,
-                        record,
-                        i == incorporate_row.chunks.len() - 1,
-                    );
-                    used_cells += width;
-                }
-            }
-        }
-
-        used_cells
-    }
-    fn simple_record_to_row(
-        &self,
-        record: &SimplePoseidonRecord<F>,
-        aux_cols_factory: &MemoryAuxColsFactory<F>,
-        slice: &mut [F],
-        memory: &OfflineMemory<F>,
-    ) {
-        let &SimplePoseidonRecord {
-            from_state,
-            instruction:
-                Instruction {
-                    opcode,
-                    a: output_register,
-                    b: input_register_1,
-                    c: input_register_2,
-                    ..
-                },
-            read_input_pointer_1,
-            read_input_pointer_2,
-            read_output_pointer,
-            read_data_1,
-            read_data_2,
-            write_data_1,
-            write_data_2,
-            input_pointer_1,
-            input_pointer_2,
-            output_pointer,
-            p2_input,
-        } = record;
-
-        let read_input_pointer_1 = memory.record_by_id(read_input_pointer_1);
-        let read_output_pointer = memory.record_by_id(read_output_pointer);
-        let read_data_1 = memory.record_by_id(read_data_1);
-        let read_data_2 = memory.record_by_id(read_data_2);
-        let write_data_1 = memory.record_by_id(write_data_1);
-
-        self.generate_subair_cols(p2_input, slice);
-        let cols: &mut NativePoseidon2Cols<F, SBOX_REGISTERS> = slice.borrow_mut();
-        cols.incorporate_row = F::ZERO;
-        cols.incorporate_sibling = F::ZERO;
-        cols.inside_row = F::ZERO;
-        cols.simple = F::ONE;
-        cols.end_inside_row = F::ZERO;
-        cols.end_top_level = F::ZERO;
-        cols.is_exhausted = [F::ZERO; CHUNK - 1];
-
-        cols.start_timestamp = F::from_canonical_u32(from_state.timestamp);
-        let specific: &mut SimplePoseidonSpecificCols<F> =
-            cols.specific[..SimplePoseidonSpecificCols::<F>::width()].borrow_mut();
-
-        specific.pc = F::from_canonical_u32(from_state.pc);
-        specific.is_compress = F::from_bool(opcode == COMP_POS2.global_opcode());
-        specific.output_register = output_register;
-        specific.input_register_1 = input_register_1;
-        specific.input_register_2 = input_register_2;
-        specific.output_pointer = output_pointer;
-        specific.input_pointer_1 = input_pointer_1;
-        specific.input_pointer_2 = input_pointer_2;
-        aux_cols_factory.generate_read_aux(read_output_pointer, &mut specific.read_output_pointer);
-        aux_cols_factory
-            .generate_read_aux(read_input_pointer_1, &mut specific.read_input_pointer_1);
-        aux_cols_factory.generate_read_aux(read_data_1, &mut specific.read_data_1);
-        aux_cols_factory.generate_read_aux(read_data_2, &mut specific.read_data_2);
-        aux_cols_factory.generate_write_aux(write_data_1, &mut specific.write_data_1);
-
-        if opcode == COMP_POS2.global_opcode() {
-            let read_input_pointer_2 = memory.record_by_id(read_input_pointer_2.unwrap());
-            aux_cols_factory
-                .generate_read_aux(read_input_pointer_2, &mut specific.read_input_pointer_2);
-        } else {
-            let write_data_2 = memory.record_by_id(write_data_2.unwrap());
-            aux_cols_factory.generate_write_aux(write_data_2, &mut specific.write_data_2);
-        }
-    }
-
-    fn generate_trace(self) -> RowMajorMatrix<F> {
-        let width = self.trace_width();
-        let height = next_power_of_two_or_zero(self.height);
-        let mut flat_trace = F::zero_vec(width * height);
-
-        let memory = self.offline_memory.lock().unwrap();
-
-        let aux_cols_factory = memory.aux_cols_factory();
-
-        let mut used_cells = 0;
-        for record in self.record_set.verify_batch_records.iter() {
-            used_cells += self.verify_batch_record_to_rows(
-                record,
-                &aux_cols_factory,
-                &mut flat_trace[used_cells..],
-                &memory,
-            );
-        }
-        for record in self.record_set.simple_permute_records.iter() {
-            self.simple_record_to_row(
-                record,
-                &aux_cols_factory,
-                &mut flat_trace[used_cells..used_cells + width],
-                &memory,
-            );
-            used_cells += width;
-        }
-        // poseidon2 constraints are always checked
-        // following can be optimized to only hash [0; _] once
-        flat_trace[used_cells..]
-            .par_chunks_mut(width)
-            .for_each(|row| {
-                self.generate_subair_cols([F::ZERO; 2 * CHUNK], row);
-            });
-
-        RowMajorMatrix::new(flat_trace, width)
-    }
-}
-
-impl<SC: StarkGenericConfig, const SBOX_REGISTERS: usize> Chip<SC>
-    for NativePoseidon2Chip<Val<SC>, SBOX_REGISTERS>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
-    }
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        AirProofInput::simple_no_pis(self.generate_trace())
-    }
-}
diff --git a/extensions/native/circuit/src/utils.rs b/extensions/native/circuit/src/utils.rs
index 2815427336..f7e6513c24 100644
--- a/extensions/native/circuit/src/utils.rs
+++ b/extensions/native/circuit/src/utils.rs
@@ -1,19 +1,162 @@
-use openvm_circuit::arch::{Streams, SystemConfig, VmExecutor};
-use openvm_instructions::program::Program;
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
+pub(crate) const CASTF_MAX_BITS: usize = 30;
 
-use crate::{Native, NativeConfig};
+pub(crate) const fn const_max(a: usize, b: usize) -> usize {
+    [a, b][(a < b) as usize]
+}
 
-pub fn execute_program(program: Program<BabyBear>, input_stream: impl Into<Streams<BabyBear>>) {
-    let system_config = SystemConfig::default()
-        .with_public_values(4)
-        .with_max_segment_len((1 << 25) - 100);
-    let config = NativeConfig::new(system_config, Native);
-    let executor = VmExecutor::<BabyBear, NativeConfig>::new(config);
+/// Testing framework
+#[cfg(any(test, feature = "test-utils"))]
+pub mod test_utils {
+    use std::array;
 
-    executor.execute(program, input_stream).unwrap();
-}
+    use openvm_circuit::{
+        arch::{
+            execution_mode::Segment,
+            testing::{memory::gen_pointer, VmChipTestBuilder},
+            MatrixRecordArena, PreflightExecutionOutput, Streams, VirtualMachine,
+            VirtualMachineError, VmBuilder, VmState,
+        },
+        utils::test_system_config_without_continuations,
+    };
+    use openvm_instructions::{
+        exe::VmExe,
+        program::Program,
+        riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    };
+    use openvm_native_compiler::conversion::AS;
+    use openvm_stark_backend::{
+        config::Domain, p3_commit::PolynomialSpace, p3_field::PrimeField32,
+    };
+    use openvm_stark_sdk::{
+        config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, setup_tracing, FriParameters},
+        engine::StarkFriEngine,
+        p3_baby_bear::BabyBear,
+    };
+    use rand::{distributions::Standard, prelude::Distribution, rngs::StdRng, Rng};
 
-pub(crate) const fn const_max(a: usize, b: usize) -> usize {
-    [a, b][(a < b) as usize]
+    use crate::{NativeConfig, NativeCpuBuilder, Rv32WithKernelsConfig};
+
+    // If immediate, returns (value, AS::Immediate). Otherwise, writes to native memory and returns
+    // (ptr, AS::Native). If is_imm is None, randomizes it.
+    pub fn write_native_or_imm<F: PrimeField32>(
+        tester: &mut VmChipTestBuilder<F>,
+        rng: &mut StdRng,
+        value: F,
+        is_imm: Option<bool>,
+    ) -> (F, usize) {
+        let is_imm = is_imm.unwrap_or(rng.gen_bool(0.5));
+        if is_imm {
+            (value, AS::Immediate as usize)
+        } else {
+            let ptr = gen_pointer(rng, 1);
+            tester.write::<1>(AS::Native as usize, ptr, [value]);
+            (F::from_canonical_usize(ptr), AS::Native as usize)
+        }
+    }
+
+    // Writes value to native memory and returns a pointer to the first element together with the
+    // value If `value` is None, randomizes it.
+    pub fn write_native_array<F: PrimeField32, const N: usize>(
+        tester: &mut VmChipTestBuilder<F>,
+        rng: &mut StdRng,
+        value: Option<[F; N]>,
+    ) -> ([F; N], usize)
+    where
+        Standard: Distribution<F>, // Needed for `rng.gen`
+    {
+        let value = value.unwrap_or(array::from_fn(|_| rng.gen()));
+        let ptr = gen_pointer(rng, N);
+        tester.write::<N>(AS::Native as usize, ptr, value);
+        (value, ptr)
+    }
+
+    // Besides taking in system_config, this also returns Result and the full
+    // (PreflightExecutionOutput, VirtualMachine) for more advanced testing needs.
+    #[allow(clippy::type_complexity)]
+    pub fn execute_program_with_config<E, VB>(
+        program: Program<BabyBear>,
+        input_stream: impl Into<Streams<BabyBear>>,
+        builder: VB,
+        config: VB::VmConfig,
+    ) -> Result<
+        (
+            PreflightExecutionOutput<BabyBear, MatrixRecordArena<BabyBear>>,
+            VirtualMachine<E, VB>,
+        ),
+        VirtualMachineError,
+    >
+    where
+        E: StarkFriEngine,
+        Domain<E::SC>: PolynomialSpace<Val = BabyBear>,
+        VB: VmBuilder<E, VmConfig = NativeConfig, RecordArena = MatrixRecordArena<BabyBear>>,
+    {
+        setup_tracing();
+        assert!(!config.as_ref().continuation_enabled);
+        let input = input_stream.into();
+
+        let engine = E::new(FriParameters::new_for_testing(1));
+        let (vm, _) = VirtualMachine::new_with_keygen(engine, builder, config)?;
+        let ctx = vm.build_metered_ctx();
+        let exe = VmExe::new(program);
+        let (mut segments, _) = vm
+            .metered_interpreter(&exe)?
+            .execute_metered(input.clone(), ctx)?;
+        assert_eq!(segments.len(), 1, "test only supports one segment");
+        let Segment {
+            instret_start,
+            num_insns,
+            trace_heights,
+        } = segments.pop().unwrap();
+        assert_eq!(instret_start, 0);
+        let state = vm.create_initial_state(&exe, input);
+        let mut preflight_interpreter = vm.preflight_interpreter(&exe)?;
+        let output =
+            vm.execute_preflight(&mut preflight_interpreter, state, None, &trace_heights)?;
+        assert_eq!(
+            output.to_state.instret, num_insns,
+            "metered execution insn count doesn't match preflight execution"
+        );
+        Ok((output, vm))
+    }
+
+    pub fn execute_program(
+        program: Program<BabyBear>,
+        input_stream: impl Into<Streams<BabyBear>>,
+    ) -> VmState<BabyBear> {
+        let mut config = test_native_config();
+        config.system.num_public_values = 4;
+        // we set max segment len large so it doesn't segment
+        let (output, _) = execute_program_with_config::<BabyBearPoseidon2Engine, _>(
+            program,
+            input_stream,
+            NativeCpuBuilder,
+            config,
+        )
+        .unwrap();
+        output.to_state
+    }
+
+    pub fn test_native_config() -> NativeConfig {
+        let mut system = test_system_config_without_continuations();
+        system.memory_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 0;
+        system.memory_config.addr_spaces[RV32_MEMORY_AS as usize].num_cells = 0;
+        NativeConfig {
+            system,
+            native: Default::default(),
+        }
+    }
+
+    pub fn test_native_continuations_config() -> NativeConfig {
+        NativeConfig {
+            system: test_system_config_without_continuations().with_continuations(),
+            native: Default::default(),
+        }
+    }
+
+    pub fn test_rv32_with_kernels_config() -> Rv32WithKernelsConfig {
+        Rv32WithKernelsConfig {
+            system: test_system_config_without_continuations().with_continuations(),
+            ..Default::default()
+        }
+    }
 }
diff --git a/extensions/native/compiler/tests/arithmetic.rs b/extensions/native/circuit/tests/arithmetic.rs
similarity index 96%
rename from extensions/native/compiler/tests/arithmetic.rs
rename to extensions/native/circuit/tests/arithmetic.rs
index cd68fab563..6b50566a5d 100644
--- a/extensions/native/compiler/tests/arithmetic.rs
+++ b/extensions/native/circuit/tests/arithmetic.rs
@@ -1,4 +1,5 @@
 use openvm_circuit::arch::{ExecutionError, VmExecutor};
+use openvm_instructions::exe::VmExe;
 use openvm_native_circuit::{execute_program, NativeConfig};
 use openvm_native_compiler::{
     asm::{AsmBuilder, AsmCompiler, AsmConfig},
@@ -391,8 +392,15 @@ fn assert_failed_assertion(
     builder: Builder<AsmConfig<BabyBear, BinomialExtensionField<BabyBear, 4>>>,
 ) {
     let program = builder.compile_isa();
-
-    let executor = VmExecutor::<BabyBear, NativeConfig>::new(NativeConfig::aggregation(4, 3));
-    let result = executor.execute(program, vec![]);
-    assert!(matches!(result, Err(ExecutionError::Fail { .. })));
+    let exe = VmExe::new(program);
+
+    let config = NativeConfig::aggregation(4, 3);
+    let executor = VmExecutor::new(config).unwrap();
+    let instance = executor.instance(&exe).unwrap();
+    let result = instance.execute(vec![], None);
+    assert!(
+        matches!(result, Err(ExecutionError::Fail { .. })),
+        "Unexpected result: {:?}",
+        result.err()
+    );
 }
diff --git a/extensions/native/compiler/tests/array.rs b/extensions/native/circuit/tests/array.rs
similarity index 100%
rename from extensions/native/compiler/tests/array.rs
rename to extensions/native/circuit/tests/array.rs
diff --git a/extensions/native/compiler/tests/conditionals.rs b/extensions/native/circuit/tests/conditionals.rs
similarity index 100%
rename from extensions/native/compiler/tests/conditionals.rs
rename to extensions/native/circuit/tests/conditionals.rs
diff --git a/extensions/native/compiler/tests/cycle_tracker.rs b/extensions/native/circuit/tests/cycle_tracker.rs
similarity index 93%
rename from extensions/native/compiler/tests/cycle_tracker.rs
rename to extensions/native/circuit/tests/cycle_tracker.rs
index 3561dfd2ec..d8bcb59bce 100644
--- a/extensions/native/compiler/tests/cycle_tracker.rs
+++ b/extensions/native/circuit/tests/cycle_tracker.rs
@@ -1,3 +1,5 @@
+use std::ops::Deref;
+
 use openvm_native_circuit::execute_program;
 use openvm_native_compiler::{asm::AsmBuilder, conversion::CompilerOptions, ir::Var};
 use openvm_stark_backend::p3_field::{extension::BinomialExtensionField, FieldAlgebra};
@@ -43,7 +45,7 @@ fn test_cycle_tracker() {
         ..Default::default()
     });
 
-    for (i, debug_info) in program.debug_infos().iter().enumerate() {
+    for (i, debug_info) in program.debug_infos().deref().iter().enumerate() {
         println!("debug_info {}: {:?}", i, debug_info);
     }
 
diff --git a/extensions/native/compiler/tests/ext.rs b/extensions/native/circuit/tests/ext.rs
similarity index 100%
rename from extensions/native/compiler/tests/ext.rs
rename to extensions/native/circuit/tests/ext.rs
diff --git a/extensions/native/circuit/examples/fibonacci.rs b/extensions/native/circuit/tests/fibonacci.rs
similarity index 97%
rename from extensions/native/circuit/examples/fibonacci.rs
rename to extensions/native/circuit/tests/fibonacci.rs
index aca5e2d6c5..8dfb29a835 100644
--- a/extensions/native/circuit/examples/fibonacci.rs
+++ b/extensions/native/circuit/tests/fibonacci.rs
@@ -47,6 +47,6 @@ fn main() {
     builder.halt();
 
     let program = builder.compile_isa();
-    println!("{}", program);
+    println!("{program}");
     execute_program(program, vec![]);
 }
diff --git a/extensions/native/compiler/tests/for_loops.rs b/extensions/native/circuit/tests/for_loops.rs
similarity index 100%
rename from extensions/native/compiler/tests/for_loops.rs
rename to extensions/native/circuit/tests/for_loops.rs
diff --git a/extensions/native/compiler/tests/fri_ro_eval.rs b/extensions/native/circuit/tests/fri_ro_eval.rs
similarity index 100%
rename from extensions/native/compiler/tests/fri_ro_eval.rs
rename to extensions/native/circuit/tests/fri_ro_eval.rs
diff --git a/extensions/native/compiler/tests/hint.rs b/extensions/native/circuit/tests/hint.rs
similarity index 100%
rename from extensions/native/compiler/tests/hint.rs
rename to extensions/native/circuit/tests/hint.rs
diff --git a/extensions/native/circuit/tests/integration_test.rs b/extensions/native/circuit/tests/integration_test.rs
new file mode 100644
index 0000000000..1c3cacfad6
--- /dev/null
+++ b/extensions/native/circuit/tests/integration_test.rs
@@ -0,0 +1,1002 @@
+use std::{
+    collections::{BTreeMap, VecDeque},
+    mem::transmute,
+    sync::Arc,
+};
+
+use itertools::Itertools;
+use openvm_circuit::{
+    arch::{
+        execution_mode::metered::segment_ctx::{SegmentationLimits, DEFAULT_SEGMENT_CHECK_INSNS},
+        hasher::{poseidon2::vm_poseidon2_hasher, Hasher},
+        verify_segments, verify_single, AirInventory, ContinuationVmProver,
+        PreflightExecutionOutput, RowMajorMatrixArena, SingleSegmentVmProver, VirtualMachine,
+        VmCircuitConfig, VmExecutor, VmInstance, PUBLIC_VALUES_AIR_ID,
+    },
+    system::{memory::CHUNK, program::trace::VmCommittedExe, SystemCpuBuilder},
+    utils::{air_test, air_test_with_min_segments, test_system_config_without_continuations},
+};
+use openvm_instructions::{
+    exe::VmExe,
+    instruction::Instruction,
+    program::{Program, DEFAULT_PC_STEP},
+    LocalOpcode, PhantomDiscriminant,
+    PublishOpcode::PUBLISH,
+    SysPhantom,
+    SystemOpcode::*,
+};
+use openvm_native_circuit::{
+    execute_program, test_native_config, test_native_continuations_config,
+    test_rv32_with_kernels_config, NativeConfig, NativeCpuBuilder,
+};
+use openvm_native_compiler::{
+    CastfOpcode,
+    FieldArithmeticOpcode::*,
+    FieldExtensionOpcode::*,
+    FriOpcode, NativeBranchEqualOpcode,
+    NativeJalOpcode::{self, *},
+    NativeLoadStoreOpcode::*,
+    NativePhantom, NativeRangeCheckOpcode, Poseidon2Opcode,
+};
+use openvm_rv32im_transpiler::BranchEqualOpcode::*;
+use openvm_stark_backend::{
+    config::StarkGenericConfig, engine::StarkEngine, p3_field::FieldAlgebra,
+};
+use openvm_stark_sdk::{
+    config::{
+        baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
+        fri_params::standard_fri_params_with_100_bits_conjectured_security,
+        setup_tracing, FriParameters,
+    },
+    engine::StarkFriEngine,
+    p3_baby_bear::BabyBear,
+};
+use rand::Rng;
+use test_log::test;
+
+pub fn gen_pointer<R>(rng: &mut R, len: usize) -> usize
+where
+    R: Rng + ?Sized,
+{
+    const MAX_MEMORY: usize = 1 << 29;
+    rng.gen_range(0..MAX_MEMORY - len) / len * len
+}
+
+#[test]
+fn test_vm_1() {
+    let n = 6;
+    /*
+    Instruction 0 assigns word[0]_4 to n.
+    Instruction 4 terminates
+    The remainder is a loop that decrements word[0]_4 until it reaches 0, then terminates.
+    Instruction 1 checks if word[0]_4 is 0 yet, and if so sets pc to 5 in order to terminate
+    Instruction 2 decrements word[0]_4 (using word[1]_4)
+    Instruction 3 uses JAL as a simple jump to go back to instruction 1 (repeating the loop).
+     */
+    let instructions = vec![
+        // word[0]_4 <- word[n]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 0, n, 0, 4, 0, 0, 0),
+        // if word[0]_4 == 0 then pc += 3 * DEFAULT_PC_STEP
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BEQ).global_opcode(),
+            0,
+            0,
+            3 * DEFAULT_PC_STEP as isize,
+            4,
+            0,
+        ),
+        // word[0]_4 <- word[0]_4 - word[1]_4
+        Instruction::large_from_isize(SUB.global_opcode(), 0, 0, 1, 4, 4, 0, 0),
+        // word[2]_4 <- pc + DEFAULT_PC_STEP, pc -= 2 * DEFAULT_PC_STEP
+        Instruction::from_isize(
+            JAL.global_opcode(),
+            2,
+            -2 * DEFAULT_PC_STEP as isize,
+            0,
+            4,
+            0,
+        ),
+        // terminate
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+
+    air_test(NativeCpuBuilder, test_native_config(), program);
+}
+
+// See crates/sdk/src/prover/root.rs for intended usage
+#[test]
+fn test_vm_override_trace_heights() -> eyre::Result<()> {
+    let e = BabyBearPoseidon2Engine::new(FriParameters::standard_fast());
+    let program = Program::<BabyBear>::from_instructions(&[
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 4, 0, 4, 0, 0, 0),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ]);
+    let committed_exe = Arc::new(VmCommittedExe::<BabyBearPoseidon2Config>::commit(
+        program.into(),
+        e.config().pcs(),
+    ));
+    // It's hard to define the mapping semantically. Please recompute the following magical AIR
+    // heights by hands whenever something changes.
+    let fixed_air_heights = vec![
+        2, 2, 16, 1, 8, 4, 2, 8192, 4096, 2048, 1024, 512, 256, 128, 64, 32, 262144,
+    ];
+
+    // Test getting heights.
+    let vm_config = NativeConfig::aggregation(8, 3);
+    let (mut vm, pk) = VirtualMachine::new_with_keygen(e, NativeCpuBuilder, vm_config)?;
+    let vk = pk.get_vk();
+
+    let state = vm.create_initial_state(&committed_exe.exe, vec![]);
+    vm.transport_init_memory_to_device(&state.memory);
+    let cached_program_trace = vm.transport_committed_exe_to_device(&committed_exe);
+    vm.load_program(cached_program_trace);
+    let mut preflight_interpreter = vm.preflight_interpreter(&committed_exe.exe)?;
+    let PreflightExecutionOutput {
+        system_records,
+        mut record_arenas,
+        ..
+    } = vm.execute_preflight(&mut preflight_interpreter, state, None, &fixed_air_heights)?;
+
+    let mut expected_actual_heights = vec![0; vk.inner.per_air.len()];
+    let executor_idx_to_air_idx = vm.executor_idx_to_air_idx();
+    expected_actual_heights[executor_idx_to_air_idx[6]] = 1; // corresponds to FieldArithmeticChip
+    assert_eq!(
+        record_arenas
+            .iter()
+            .map(|ra| ra.trace_offset() / ra.width())
+            .collect_vec(),
+        expected_actual_heights
+    );
+    for ra in &mut record_arenas {
+        ra.force_matrix_dimensions();
+    }
+    vm.override_system_trace_heights(&fixed_air_heights);
+
+    let ctx = vm.generate_proving_ctx(system_records, record_arenas)?;
+    let air_heights: Vec<_> = ctx
+        .per_air
+        .iter()
+        .map(|(_, air_ctx)| air_ctx.main_trace_height() as u32)
+        .collect();
+    assert_eq!(air_heights, fixed_air_heights);
+    Ok(())
+}
+
+#[test]
+fn test_vm_1_optional_air() -> eyre::Result<()> {
+    // Aggregation VmConfig has Core/Poseidon2/FieldArithmetic/FieldExtension chips. The program
+    // only uses Core and FieldArithmetic. All other chips should not have AIR proof inputs.
+    let config = NativeConfig::aggregation(4, 3);
+    let engine =
+        BabyBearPoseidon2Engine::new(standard_fri_params_with_100_bits_conjectured_security(3));
+    let (vm, pk) = VirtualMachine::new_with_keygen(engine, NativeCpuBuilder, config)?;
+    let num_airs = pk.per_air.len();
+
+    let n = 6;
+    let instructions = vec![
+        Instruction::large_from_isize(ADD.global_opcode(), 0, n, 0, 4, 0, 0, 0),
+        Instruction::large_from_isize(SUB.global_opcode(), 0, 0, 1, 4, 4, 0, 0),
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BNE).global_opcode(),
+            0,
+            0,
+            -(DEFAULT_PC_STEP as isize),
+            4,
+            0,
+        ),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+    let cached_program_trace = vm.commit_program_on_device(&program);
+    let exe = Arc::new(VmExe::new(program));
+    let mut prover = VmInstance::new(vm, exe, cached_program_trace)?;
+    let proof = SingleSegmentVmProver::prove(&mut prover, vec![], &vec![256; num_airs])?;
+    assert!(proof.per_air.len() < num_airs, "Expect less used AIRs");
+    verify_single(&prover.vm.engine, &pk.get_vk(), &proof)?;
+    Ok(())
+}
+
+#[test]
+fn test_vm_public_values() -> eyre::Result<()> {
+    setup_tracing();
+    let num_public_values = 100;
+    let config = test_system_config_without_continuations().with_public_values(num_public_values);
+    assert!(!config.continuation_enabled);
+    let engine =
+        BabyBearPoseidon2Engine::new(standard_fri_params_with_100_bits_conjectured_security(3));
+    let (vm, pk) = VirtualMachine::new_with_keygen(engine, SystemCpuBuilder, config)?;
+
+    let instructions = vec![
+        Instruction::from_usize(PUBLISH.global_opcode(), [0, 12, 2, 0, 0, 0]),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+    let program = Program::from_instructions(&instructions);
+    let cached_program_trace = vm.commit_program_on_device(&program);
+    let exe = Arc::new(VmExe::new(program));
+    let mut prover = VmInstance::new(vm, exe, cached_program_trace)?;
+    let proof = SingleSegmentVmProver::prove(&mut prover, vec![], &vec![256; pk.per_air.len()])?;
+    assert_eq!(
+        proof.per_air[PUBLIC_VALUES_AIR_ID].air_id,
+        PUBLIC_VALUES_AIR_ID
+    );
+    assert_eq!(
+        proof.per_air[PUBLIC_VALUES_AIR_ID].public_values,
+        [
+            vec![
+                BabyBear::ZERO,
+                BabyBear::ZERO,
+                BabyBear::from_canonical_u32(12)
+            ],
+            vec![BabyBear::ZERO; num_public_values - 3]
+        ]
+        .concat(),
+    );
+    verify_single(&prover.vm.engine, &pk.get_vk(), &proof)?;
+    Ok(())
+}
+
+#[test]
+fn test_vm_initial_memory() {
+    // Program that fails if mem[(4, 7)] != 101.
+    let program = Program::from_instructions(&[
+        Instruction::<BabyBear>::from_isize(
+            NativeBranchEqualOpcode(BEQ).global_opcode(),
+            7,
+            101,
+            2 * DEFAULT_PC_STEP as isize,
+            4,
+            0,
+        ),
+        Instruction::<BabyBear>::from_isize(
+            PHANTOM.global_opcode(),
+            0,
+            0,
+            SysPhantom::DebugPanic as isize,
+            0,
+            0,
+        ),
+        Instruction::<BabyBear>::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ]);
+
+    let raw = unsafe { transmute::<BabyBear, [u8; 4]>(BabyBear::from_canonical_u32(101)) };
+    let init_memory = BTreeMap::from_iter((0..4).map(|i| ((4u32, 7u32 * 4 + i), raw[i as usize])));
+
+    let config = test_native_continuations_config();
+    let exe = VmExe {
+        program,
+        pc_start: 0,
+        init_memory,
+        fn_bounds: Default::default(),
+    };
+    air_test(NativeCpuBuilder, config, exe);
+}
+
+#[test]
+fn test_vm_1_persistent() -> eyre::Result<()> {
+    let engine = BabyBearPoseidon2Engine::new(FriParameters::standard_fast());
+    let config = test_native_continuations_config();
+    let merkle_air_idx = config.system.memory_boundary_air_id() + 1;
+    let ptr_max_bits = config.system.memory_config.pointer_max_bits;
+    let addr_space_height = config.system.memory_config.addr_space_height;
+
+    let (vm, pk) = VirtualMachine::new_with_keygen(engine, NativeCpuBuilder, config)?;
+
+    let n = 6;
+    let instructions = vec![
+        Instruction::large_from_isize(ADD.global_opcode(), 0, n, 0, 4, 0, 0, 0),
+        Instruction::large_from_isize(SUB.global_opcode(), 0, 0, 1, 4, 4, 0, 0),
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BNE).global_opcode(),
+            0,
+            0,
+            -(DEFAULT_PC_STEP as isize),
+            4,
+            0,
+        ),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+    let cached_program_trace = vm.commit_program_on_device(&program);
+    let exe = Arc::new(VmExe::new(program));
+    let mut prover = VmInstance::new(vm, exe, cached_program_trace)?;
+    let proof = ContinuationVmProver::prove(&mut prover, vec![])?;
+
+    {
+        assert_eq!(proof.per_segment.len(), 1);
+        let public_values = proof.per_segment[0].per_air[merkle_air_idx]
+            .public_values
+            .clone();
+        assert_eq!(public_values.len(), 16);
+        assert_eq!(public_values[..8], public_values[8..]);
+        let mut digest = [BabyBear::ZERO; CHUNK];
+        let compression = vm_poseidon2_hasher();
+        for _ in 0..ptr_max_bits + addr_space_height - 2 {
+            digest = compression.compress(&digest, &digest);
+        }
+        assert_eq!(
+            public_values[..8],
+            // The value when you start with zeros and repeatedly hash the value with itself
+            // ptr_max_bits + addr_space_height - 2 times.
+            // The height of the tree is ptr_max_bits + addr_space_height - log2(8). The leaf also
+            // must be hashed once with padding for security.
+            digest
+        );
+    }
+    verify_segments(&prover.vm.engine, &pk.get_vk(), &proof.per_segment)?;
+    Ok(())
+}
+
+#[test]
+fn test_vm_without_field_arithmetic() {
+    /*
+    Instruction 0 assigns word[0]_4 to 5.
+    Instruction 1 checks if word[0]_4 is *not* 4, and if so jumps to instruction 4.
+    Instruction 2 is never run.
+    Instruction 3 terminates.
+    Instruction 4 checks if word[0]_4 is 5, and if so jumps to instruction 3 to terminate.
+     */
+    let instructions = vec![
+        // word[0]_4 <- word[5]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 5, 0, 4, 0, 0, 0),
+        // if word[0]_4 != 4 then pc += 3 * DEFAULT_PC_STEP
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BNE).global_opcode(),
+            0,
+            4,
+            3 * DEFAULT_PC_STEP as isize,
+            4,
+            0,
+        ),
+        // word[2]_4 <- pc + DEFAULT_PC_STEP, pc -= 2 * DEFAULT_PC_STEP
+        Instruction::from_isize(
+            JAL.global_opcode(),
+            2,
+            -2 * DEFAULT_PC_STEP as isize,
+            0,
+            4,
+            0,
+        ),
+        // terminate
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+        // if word[0]_4 == 5 then pc -= 1
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BEQ).global_opcode(),
+            0,
+            5,
+            -(DEFAULT_PC_STEP as isize),
+            4,
+            0,
+        ),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+
+    air_test(NativeCpuBuilder, test_native_config(), program);
+}
+
+#[test]
+fn test_vm_fibonacci_old() {
+    let instructions = vec![
+        // [0]_4 <- [19]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 19, 0, 4, 0, 0, 0),
+        // [2]_4 <- [11]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 2, 11, 0, 4, 0, 0, 0),
+        // [3]_4 <- [1]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 3, 1, 0, 4, 0, 0, 0),
+        // [10]_4 <- [0]_4 + [2]_4
+        Instruction::large_from_isize(ADD.global_opcode(), 10, 0, 0, 4, 0, 0, 0),
+        // [11]_4 <- [1]_4 + [3]_4
+        Instruction::large_from_isize(ADD.global_opcode(), 11, 1, 0, 4, 0, 0, 0),
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BEQ).global_opcode(),
+            2,
+            0,
+            7 * DEFAULT_PC_STEP as isize,
+            4,
+            4,
+        ),
+        // [2]_4 <- [2]_4 + [3]_4
+        Instruction::large_from_isize(ADD.global_opcode(), 2, 2, 3, 4, 4, 4, 0),
+        // [4]_4 <- [[2]_4 - 2]_4
+        Instruction::from_isize(LOADW.global_opcode(), 4, -2, 2, 4, 4),
+        // [5]_4 <- [[2]_4 - 1]_4
+        Instruction::from_isize(LOADW.global_opcode(), 5, -1, 2, 4, 4),
+        // [6]_4 <- [4]_4 + [5]_4
+        Instruction::large_from_isize(ADD.global_opcode(), 6, 4, 5, 4, 4, 4, 0),
+        // [[2]_4]_4 <- [6]_4
+        Instruction::from_isize(STOREW.global_opcode(), 6, 0, 2, 4, 4),
+        Instruction::from_isize(
+            JAL.global_opcode(),
+            7,
+            -6 * DEFAULT_PC_STEP as isize,
+            0,
+            4,
+            0,
+        ),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+
+    air_test(NativeCpuBuilder, test_native_config(), program);
+}
+
+#[test]
+fn test_vm_fibonacci_old_cycle_tracker() {
+    // NOTE: Instructions commented until cycle tracker instructions are not counted as additional
+    // assembly Instructions
+    let instructions = vec![
+        Instruction::debug(PhantomDiscriminant(SysPhantom::CtStart as u16)),
+        Instruction::debug(PhantomDiscriminant(SysPhantom::CtStart as u16)),
+        // [0]_4 <- [19]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 19, 0, 4, 0, 0, 0),
+        // [2]_4 <- [11]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 2, 11, 0, 4, 0, 0, 0),
+        // [3]_4 <- [1]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 3, 1, 0, 4, 0, 0, 0),
+        // [10]_4 <- [0]_4 + [2]_4
+        Instruction::large_from_isize(ADD.global_opcode(), 10, 0, 0, 4, 0, 0, 0),
+        // [11]_4 <- [1]_4 + [3]_4
+        Instruction::large_from_isize(ADD.global_opcode(), 11, 1, 0, 4, 0, 0, 0),
+        Instruction::debug(PhantomDiscriminant(SysPhantom::CtEnd as u16)),
+        Instruction::debug(PhantomDiscriminant(SysPhantom::CtStart as u16)),
+        // if [2]_4 == [0]_4 then pc += 9 * DEFAULT_PC_STEP
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BEQ).global_opcode(),
+            2,
+            0,
+            9 * DEFAULT_PC_STEP as isize,
+            4,
+            4,
+        ),
+        // [2]_4 <- [2]_4 + [3]_4
+        Instruction::large_from_isize(ADD.global_opcode(), 2, 2, 3, 4, 4, 4, 0),
+        Instruction::debug(PhantomDiscriminant(SysPhantom::CtStart as u16)),
+        // [4]_4 <- [[2]_4 - 2]_4
+        Instruction::from_isize(LOADW.global_opcode(), 4, -2, 2, 4, 4),
+        // [5]_4 <- [[2]_4 - 1]_4
+        Instruction::from_isize(LOADW.global_opcode(), 5, -1, 2, 4, 4),
+        // [6]_4 <- [4]_4 + [5]_4
+        Instruction::large_from_isize(ADD.global_opcode(), 6, 4, 5, 4, 4, 4, 0),
+        // [[2]_4]_4 <- [6]_4
+        Instruction::from_isize(STOREW.global_opcode(), 6, 0, 2, 4, 4),
+        Instruction::debug(PhantomDiscriminant(SysPhantom::CtEnd as u16)),
+        // [a]_4 <- pc + 4, pc -= 8 * DEFAULT_PC_STEP
+        Instruction::from_isize(
+            JAL.global_opcode(),
+            7,
+            -8 * DEFAULT_PC_STEP as isize,
+            0,
+            4,
+            0,
+        ),
+        Instruction::debug(PhantomDiscriminant(SysPhantom::CtEnd as u16)),
+        Instruction::debug(PhantomDiscriminant(SysPhantom::CtEnd as u16)),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+
+    air_test(NativeCpuBuilder, test_native_config(), program);
+}
+
+#[test]
+fn test_vm_field_extension_arithmetic() {
+    let instructions = vec![
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 1, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 2, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 3, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 4, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 5, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 6, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 7, 0, 2, 4, 0, 0, 0),
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4SUB.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(BBE4MUL.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(BBE4DIV.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+
+    air_test(NativeCpuBuilder, test_native_config(), program);
+}
+
+#[test]
+fn test_vm_max_access_adapter_8() {
+    let instructions = vec![
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 1, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 2, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 3, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 4, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 5, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 6, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 7, 0, 2, 4, 0, 0, 0),
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4SUB.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(BBE4MUL.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(BBE4DIV.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+
+    let mut config = test_native_config();
+    {
+        let num_sys_airs1 = config.system.num_airs();
+        let inventory1: AirInventory<BabyBearPoseidon2Config> = config.create_airs().unwrap();
+        let num_ext_airs = inventory1.ext_airs().len();
+        let mem_inv1 = &inventory1.system().memory;
+        config.system.memory_config.max_access_adapter_n = 8;
+        let num_sys_airs2 = config.system.num_airs();
+        let inventory2: AirInventory<BabyBearPoseidon2Config> = config.create_airs().unwrap();
+        let mem_inv2 = &inventory2.system().memory;
+        // AccessAdapterAir with N=16/32 are disabled.
+        assert_eq!(
+            mem_inv1.access_adapters.len(),
+            mem_inv2.access_adapters.len() + 2
+        );
+        assert_eq!(num_sys_airs1, num_sys_airs2 + 2);
+        assert_eq!(
+            inventory1.into_airs().collect_vec().len(),
+            num_sys_airs1 + num_ext_airs
+        );
+        assert_eq!(
+            inventory2.into_airs().collect_vec().len(),
+            num_sys_airs2 + num_ext_airs
+        );
+    }
+    air_test(NativeCpuBuilder, test_native_config(), program);
+}
+
+#[test]
+fn test_vm_field_extension_arithmetic_persistent() {
+    let instructions = vec![
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 1, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 2, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 3, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 4, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 5, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 6, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 7, 0, 2, 4, 0, 0, 0),
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4SUB.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(BBE4MUL.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(BBE4DIV.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+    let config = test_native_continuations_config();
+    air_test(NativeCpuBuilder, config, program);
+}
+
+#[test]
+fn test_vm_hint() {
+    let instructions = vec![
+        Instruction::large_from_isize(ADD.global_opcode(), 16, 0, 0, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 20, 16, 16777220, 4, 4, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 32, 20, 0, 4, 4, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 20, 20, 1, 4, 4, 0, 0),
+        Instruction::from_isize(
+            PHANTOM.global_opcode(),
+            0,
+            0,
+            NativePhantom::HintInput as isize,
+            0,
+            0,
+        ),
+        Instruction::from_isize(HINT_STOREW.global_opcode(), 32, 0, 0, 4, 4),
+        Instruction::from_isize(LOADW.global_opcode(), 38, 0, 32, 4, 4),
+        Instruction::large_from_isize(ADD.global_opcode(), 44, 20, 0, 4, 4, 0, 0),
+        Instruction::from_isize(MUL.global_opcode(), 24, 38, 1, 4, 4),
+        Instruction::large_from_isize(ADD.global_opcode(), 20, 20, 24, 4, 4, 4, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 50, 16, 0, 4, 4, 0, 0),
+        Instruction::from_isize(
+            JAL.global_opcode(),
+            24,
+            6 * DEFAULT_PC_STEP as isize,
+            0,
+            4,
+            0,
+        ),
+        Instruction::from_isize(MUL.global_opcode(), 0, 50, 1, 4, 4),
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 44, 0, 4, 4, 4, 0),
+        Instruction::from_isize(HINT_STOREW.global_opcode(), 0, 0, 0, 4, 4),
+        Instruction::large_from_isize(ADD.global_opcode(), 50, 50, 1, 4, 4, 0, 0),
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BNE).global_opcode(),
+            50,
+            38,
+            -4 * (DEFAULT_PC_STEP as isize),
+            4,
+            4,
+        ),
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BNE).global_opcode(),
+            50,
+            38,
+            -5 * (DEFAULT_PC_STEP as isize),
+            4,
+            4,
+        ),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+
+    type F = BabyBear;
+
+    let input_stream: Vec<Vec<F>> = vec![vec![F::TWO]];
+    let config = test_native_config();
+    air_test_with_min_segments(NativeCpuBuilder, config, program, input_stream, 1);
+}
+
+#[test]
+fn test_hint_load_1() {
+    type F = BabyBear;
+    let instructions = vec![
+        Instruction::phantom(
+            PhantomDiscriminant(NativePhantom::HintLoad as u16),
+            F::ZERO,
+            F::ZERO,
+            0,
+        ),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+    let input = vec![vec![F::ONE, F::TWO]];
+
+    let state = execute_program(program, input);
+    let streams = state.streams;
+    assert!(streams.input_stream.is_empty());
+    assert_eq!(streams.hint_stream, VecDeque::from(vec![F::ZERO]));
+    assert_eq!(streams.hint_space, vec![vec![F::ONE, F::TWO]]);
+}
+
+#[test]
+fn test_hint_load_2() {
+    type F = BabyBear;
+    let instructions = vec![
+        Instruction::phantom(
+            PhantomDiscriminant(NativePhantom::HintLoad as u16),
+            F::ZERO,
+            F::ZERO,
+            0,
+        ),
+        Instruction::from_isize(HINT_STOREW.global_opcode(), 32, 0, 0, 4, 4),
+        Instruction::phantom(
+            PhantomDiscriminant(NativePhantom::HintLoad as u16),
+            F::ZERO,
+            F::ZERO,
+            0,
+        ),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let program = Program::from_instructions(&instructions);
+    let input = vec![vec![F::ONE, F::TWO], vec![F::TWO, F::ONE]];
+
+    let state = execute_program(program, input);
+    let [read] = unsafe { state.memory.read::<F, 1>(4, 32) };
+    assert_eq!(read, F::ZERO);
+    let streams = state.streams;
+    assert!(streams.input_stream.is_empty());
+    assert_eq!(streams.hint_stream, VecDeque::from(vec![F::ONE]));
+    assert_eq!(
+        streams.hint_space,
+        vec![vec![F::ONE, F::TWO], vec![F::TWO, F::ONE]]
+    );
+}
+
+#[test]
+fn test_vm_pure_execution_non_continuation() {
+    type F = BabyBear;
+    let n = 6;
+    /*
+    Instruction 0 assigns word[0]_4 to n.
+    Instruction 4 terminates
+    The remainder is a loop that decrements word[0]_4 until it reaches 0, then terminates.
+    Instruction 1 checks if word[0]_4 is 0 yet, and if so sets pc to 5 in order to terminate
+    Instruction 2 decrements word[0]_4 (using word[1]_4)
+    Instruction 3 uses JAL as a simple jump to go back to instruction 1 (repeating the loop).
+     */
+    let instructions: Vec<Instruction<F>> = vec![
+        // word[0]_4 <- word[n]_0
+        Instruction::large_from_isize(ADD.global_opcode(), 0, n, 0, 4, 0, 0, 0),
+        // if word[0]_4 == 0 then pc += 3 * DEFAULT_PC_STEP
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BEQ).global_opcode(),
+            0,
+            0,
+            3 * DEFAULT_PC_STEP as isize,
+            4,
+            0,
+        ),
+        // word[0]_4 <- word[0]_4 - word[1]_4
+        Instruction::large_from_isize(SUB.global_opcode(), 0, 0, 1, 4, 4, 0, 0),
+        // word[2]_4 <- pc + DEFAULT_PC_STEP, pc -= 2 * DEFAULT_PC_STEP
+        Instruction::from_isize(
+            JAL.global_opcode(),
+            2,
+            -2 * DEFAULT_PC_STEP as isize,
+            0,
+            4,
+            0,
+        ),
+        // terminate
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let exe = VmExe::new(Program::from_instructions(&instructions));
+    let executor = VmExecutor::new(test_native_config()).unwrap();
+    let instance = executor.instance(&exe).unwrap();
+    instance.execute(vec![], None).expect("Failed to execute");
+}
+
+#[test]
+fn test_vm_pure_execution_continuation() {
+    type F = BabyBear;
+    let instructions: Vec<Instruction<F>> = vec![
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 1, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 2, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 3, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 4, 0, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 5, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 6, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(ADD.global_opcode(), 7, 0, 2, 4, 0, 0, 0),
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4SUB.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(BBE4MUL.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(BBE4DIV.global_opcode(), 12, 0, 4, 4, 4),
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let exe = VmExe::new(Program::from_instructions(&instructions));
+    let executor = VmExecutor::new(test_native_continuations_config()).unwrap();
+    let instance = executor.instance(&exe).unwrap();
+    instance.execute(vec![], None).expect("Failed to execute");
+}
+
+#[test]
+fn test_vm_execute_native_chips() {
+    type F = BabyBear;
+
+    let instructions = vec![
+        // Field Arithmetic operations (FieldArithmeticChip)
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(SUB.global_opcode(), 1, 10, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(MUL.global_opcode(), 2, 3, 4, 4, 0, 0, 0),
+        Instruction::large_from_isize(DIV.global_opcode(), 3, 20, 5, 4, 0, 0, 0),
+        // Field Extension operations (FieldExtensionChip)
+        Instruction::from_isize(FE4ADD.global_opcode(), 8, 0, 4, 4, 4),
+        Instruction::from_isize(FE4SUB.global_opcode(), 12, 8, 4, 4, 4),
+        Instruction::from_isize(BBE4MUL.global_opcode(), 16, 12, 8, 4, 4),
+        Instruction::from_isize(BBE4DIV.global_opcode(), 20, 16, 12, 4, 4),
+        // Branch operations (NativeBranchEqChip)
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BEQ).global_opcode(),
+            0,
+            0,
+            DEFAULT_PC_STEP as isize,
+            4,
+            4,
+        ),
+        Instruction::from_isize(
+            NativeBranchEqualOpcode(BNE).global_opcode(),
+            1,
+            2,
+            DEFAULT_PC_STEP as isize,
+            4,
+            4,
+        ),
+        // JAL operation (JalRangeCheckChip)
+        Instruction::from_isize(
+            NativeJalOpcode::JAL.global_opcode(),
+            24,
+            DEFAULT_PC_STEP as isize,
+            0,
+            4,
+            0,
+        ),
+        // Range check operation (JalRangeCheckChip)
+        Instruction::from_isize(
+            NativeRangeCheckOpcode::RANGE_CHECK.global_opcode(),
+            0,
+            10,
+            8,
+            4,
+            0,
+        ),
+        // Load/Store operations (NativeLoadStoreChip)
+        Instruction::from_isize(STOREW.global_opcode(), 0, 0, 28, 4, 4),
+        Instruction::from_isize(LOADW.global_opcode(), 32, 0, 28, 4, 4),
+        Instruction::from_isize(
+            PHANTOM.global_opcode(),
+            0,
+            0,
+            NativePhantom::HintInput as isize,
+            0,
+            0,
+        ),
+        Instruction::from_isize(HINT_STOREW.global_opcode(), 32, 0, 0, 4, 4),
+        // Cast to field operation (CastFChip)
+        Instruction::from_usize(CastfOpcode::CASTF.global_opcode(), [36, 40, 0, 2, 4]),
+        // Poseidon2 operations (Poseidon2Chip)
+        Instruction::new(
+            Poseidon2Opcode::PERM_POS2.global_opcode(),
+            F::from_canonical_usize(44),
+            F::from_canonical_usize(48),
+            F::ZERO,
+            F::from_canonical_usize(4),
+            F::from_canonical_usize(4),
+            F::ZERO,
+            F::ZERO,
+        ),
+        Instruction::new(
+            Poseidon2Opcode::COMP_POS2.global_opcode(),
+            F::from_canonical_usize(52),
+            F::from_canonical_usize(44),
+            F::from_canonical_usize(48),
+            F::from_canonical_usize(4),
+            F::from_canonical_usize(4),
+            F::ZERO,
+            F::ZERO,
+        ),
+        // FRI operation (FriReducedOpeningChip)
+        Instruction::large_from_isize(ADD.global_opcode(), 60, 64, 0, 4, 4, 0, 0), /* a_pointer_pointer, */
+        Instruction::large_from_isize(ADD.global_opcode(), 64, 68, 0, 4, 4, 0, 0), /* b_pointer_pointer, */
+        Instruction::large_from_isize(ADD.global_opcode(), 68, 2, 0, 4, 0, 0, 0), /* length_pointer (value 2), */
+        Instruction::large_from_isize(ADD.global_opcode(), 72, 1, 0, 4, 0, 0, 0), //alpha_pointer
+        Instruction::large_from_isize(ADD.global_opcode(), 76, 80, 0, 4, 4, 0, 0), /* result_pointer, */
+        Instruction::large_from_isize(ADD.global_opcode(), 80, 1, 0, 4, 0, 0, 0), /* is_init (value 1) , */
+        Instruction::from_usize(
+            FriOpcode::FRI_REDUCED_OPENING.global_opcode(),
+            [60, 64, 68, 72, 76, 0, 80],
+        ),
+        // Terminate
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let exe = VmExe::new(Program::from_instructions(&instructions));
+    let input_stream: Vec<Vec<F>> = vec![vec![]];
+
+    let executor = VmExecutor::new(test_rv32_with_kernels_config()).unwrap();
+    let instance = executor.instance(&exe).unwrap();
+    instance
+        .execute(input_stream, None)
+        .expect("Failed to execute");
+}
+
+// This test ensures that metered execution never segments when continuations is disabled
+#[test]
+fn test_single_segment_executor_no_segmentation() {
+    setup_tracing();
+
+    let mut config = test_native_config();
+    config
+        .system
+        .set_segmentation_limits(SegmentationLimits::default().with_max_trace_height(1));
+
+    let engine = BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(3));
+    let (vm, _) = VirtualMachine::new_with_keygen(engine, NativeCpuBuilder, config).unwrap();
+    let instructions: Vec<_> = (0..2 * DEFAULT_SEGMENT_CHECK_INSNS)
+        .map(|_| Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0))
+        .chain(std::iter::once(Instruction::from_isize(
+            TERMINATE.global_opcode(),
+            0,
+            0,
+            0,
+            0,
+            0,
+        )))
+        .collect();
+
+    let exe = VmExe::new(Program::from_instructions(&instructions));
+    let executor_idx_to_air_idx = vm.executor_idx_to_air_idx();
+    let metered_ctx = vm.build_metered_ctx();
+    vm.executor()
+        .metered_instance(&exe, &executor_idx_to_air_idx)
+        .unwrap()
+        .execute_metered(vec![], metered_ctx)
+        .unwrap();
+}
+
+#[test]
+fn test_vm_execute_metered_cost_native_chips() {
+    type F = BabyBear;
+
+    setup_tracing();
+    let config = test_native_config();
+
+    let engine = BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(3));
+    let (vm, _) = VirtualMachine::new_with_keygen(engine, NativeCpuBuilder, config).unwrap();
+
+    let instructions = vec![
+        // Field Arithmetic operations (FieldArithmeticChip)
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(SUB.global_opcode(), 1, 10, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(MUL.global_opcode(), 2, 3, 4, 4, 0, 0, 0),
+        Instruction::large_from_isize(DIV.global_opcode(), 3, 20, 5, 4, 0, 0, 0),
+        // Terminate
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let exe = VmExe::new(Program::<F>::from_instructions(&instructions));
+
+    let executor_idx_to_air_idx = vm.executor_idx_to_air_idx();
+    let instance = vm
+        .executor()
+        .metered_cost_instance(&exe, &executor_idx_to_air_idx)
+        .unwrap();
+    let ctx = vm.build_metered_cost_ctx();
+    let output = instance
+        .execute_metered_cost(vec![], ctx)
+        .expect("Failed to execute");
+
+    assert_eq!(output.instret, instructions.len() as u64);
+    assert!(output.cost > 0);
+}
+
+#[test]
+fn test_vm_execute_metered_cost_halt() {
+    type F = BabyBear;
+
+    setup_tracing();
+    let config = test_native_config();
+
+    let engine = BabyBearPoseidon2Engine::new(FriParameters::new_for_testing(3));
+    let (vm, _) =
+        VirtualMachine::new_with_keygen(engine, NativeCpuBuilder, config.clone()).unwrap();
+
+    let instructions = vec![
+        // Field Arithmetic operations (FieldArithmeticChip)
+        Instruction::large_from_isize(ADD.global_opcode(), 0, 0, 1, 4, 0, 0, 0),
+        Instruction::large_from_isize(SUB.global_opcode(), 1, 10, 2, 4, 0, 0, 0),
+        Instruction::large_from_isize(MUL.global_opcode(), 2, 3, 4, 4, 0, 0, 0),
+        Instruction::large_from_isize(DIV.global_opcode(), 3, 20, 5, 4, 0, 0, 0),
+        // Terminate
+        Instruction::from_isize(TERMINATE.global_opcode(), 0, 0, 0, 0, 0),
+    ];
+
+    let exe = VmExe::new(Program::<F>::from_instructions(&instructions));
+
+    let executor_idx_to_air_idx = vm.executor_idx_to_air_idx();
+    let instance = vm
+        .executor()
+        .metered_cost_instance(&exe, &executor_idx_to_air_idx)
+        .unwrap();
+    let ctx = vm.build_metered_cost_ctx();
+    let output1 = instance
+        .execute_metered_cost(vec![], ctx)
+        .expect("Failed to execute");
+
+    assert_eq!(output1.instret, instructions.len() as u64);
+
+    let executor_idx_to_air_idx2 = vm.executor_idx_to_air_idx();
+    let instance2 = vm
+        .executor()
+        .metered_cost_instance(&exe, &executor_idx_to_air_idx2)
+        .unwrap();
+    let ctx2 = vm.build_metered_cost_ctx().with_max_execution_cost(0);
+    let output2 = instance2
+        .execute_metered_cost(vec![], ctx2)
+        .expect("Failed to execute");
+
+    assert_eq!(output2.instret, 1);
+
+    assert!(output2.cost < output1.cost);
+}
diff --git a/extensions/native/compiler/tests/io.rs b/extensions/native/circuit/tests/io.rs
similarity index 100%
rename from extensions/native/compiler/tests/io.rs
rename to extensions/native/circuit/tests/io.rs
diff --git a/extensions/native/compiler/tests/poseidon2.rs b/extensions/native/circuit/tests/poseidon2.rs
similarity index 100%
rename from extensions/native/compiler/tests/poseidon2.rs
rename to extensions/native/circuit/tests/poseidon2.rs
diff --git a/extensions/native/compiler/tests/ptr_struct.rs b/extensions/native/circuit/tests/ptr_struct.rs
similarity index 100%
rename from extensions/native/compiler/tests/ptr_struct.rs
rename to extensions/native/circuit/tests/ptr_struct.rs
diff --git a/extensions/native/compiler/tests/public_values.rs b/extensions/native/circuit/tests/public_values.rs
similarity index 55%
rename from extensions/native/compiler/tests/public_values.rs
rename to extensions/native/circuit/tests/public_values.rs
index 7c7abe3bc6..f3ad166d77 100644
--- a/extensions/native/compiler/tests/public_values.rs
+++ b/extensions/native/circuit/tests/public_values.rs
@@ -1,8 +1,11 @@
-use openvm_circuit::arch::{SingleSegmentVmExecutor, SystemConfig};
-use openvm_native_circuit::{execute_program, Native, NativeConfig};
+use openvm_circuit::{arch::PUBLIC_VALUES_AIR_ID, utils::air_test_impl};
+use openvm_native_circuit::{execute_program_with_config, test_native_config, NativeCpuBuilder};
 use openvm_native_compiler::{asm::AsmBuilder, prelude::*};
 use openvm_stark_backend::p3_field::{extension::BinomialExtensionField, FieldAlgebra};
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
+use openvm_stark_sdk::{
+    config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
+    p3_baby_bear::BabyBear,
+};
 
 type F = BabyBear;
 type EF = BinomialExtensionField<BabyBear, 4>;
@@ -28,21 +31,26 @@ fn test_compiler_public_values() {
     }
 
     let program = builder.compile_isa();
-    let executor = SingleSegmentVmExecutor::new(NativeConfig::new(
-        SystemConfig::default().with_public_values(2),
-        Native,
-    ));
-
-    let exe_result = executor
-        .execute_and_compute_heights(program, vec![])
-        .unwrap();
+    let mut config = test_native_config();
+    config.system.num_public_values = 2;
+    // This is to justify using log_blowup=1
+    assert!(config.as_ref().max_constraint_degree <= 3);
+    let fri_params = FriParameters::new_for_testing(1);
+    let (_, mut vdata) = air_test_impl::<BabyBearPoseidon2Engine, _>(
+        fri_params,
+        NativeCpuBuilder,
+        config,
+        program,
+        vec![],
+        1,
+        true,
+    )
+    .unwrap();
+    assert_eq!(vdata.len(), 1);
+    let proof = vdata.pop().unwrap().data.proof;
     assert_eq!(
-        exe_result
-            .public_values
-            .into_iter()
-            .flatten()
-            .collect::<Vec<_>>(),
-        vec![public_value_0, public_value_1]
+        &proof.get_public_values()[PUBLIC_VALUES_AIR_ID],
+        &[public_value_0, public_value_1]
     );
 }
 
@@ -66,5 +74,13 @@ fn test_compiler_public_values_no_initial() {
     builder.halt();
 
     let program = builder.compile_isa();
-    execute_program(program, vec![]);
+    let (output, _) = execute_program_with_config::<BabyBearPoseidon2Engine, _>(
+        program,
+        vec![],
+        NativeCpuBuilder,
+        test_native_config(),
+    )
+    .unwrap();
+    assert_eq!(output.system_records.public_values[0], public_value_0);
+    assert_eq!(output.system_records.public_values[1], public_value_1);
 }
diff --git a/extensions/native/compiler/tests/range_check.rs b/extensions/native/circuit/tests/range_check.rs
similarity index 100%
rename from extensions/native/compiler/tests/range_check.rs
rename to extensions/native/circuit/tests/range_check.rs
diff --git a/extensions/native/compiler/Cargo.toml b/extensions/native/compiler/Cargo.toml
index cb41c17f63..5f855abca4 100644
--- a/extensions/native/compiler/Cargo.toml
+++ b/extensions/native/compiler/Cargo.toml
@@ -32,14 +32,11 @@ metrics = { workspace = true, optional = true }
 strum = { workspace = true }
 
 [dev-dependencies]
-p3-symmetric = { workspace = true }
 openvm-circuit = { workspace = true, features = ["test-utils"] }
-openvm-native-circuit = { workspace = true }
 openvm-stark-sdk = { workspace = true }
-rand.workspace = true
 
 [features]
 default = ["parallel", "halo2-compiler"]
 halo2-compiler = ["dep:snark-verifier-sdk"]
 parallel = ["openvm-circuit/parallel"]
-bench-metrics = ["dep:metrics", "openvm-circuit/bench-metrics"]
+metrics = ["dep:metrics", "openvm-circuit/metrics"]
diff --git a/extensions/native/compiler/src/constraints/halo2/compiler.rs b/extensions/native/compiler/src/constraints/halo2/compiler.rs
index ce108addaa..fd75d526d2 100644
--- a/extensions/native/compiler/src/constraints/halo2/compiler.rs
+++ b/extensions/native/compiler/src/constraints/halo2/compiler.rs
@@ -7,7 +7,7 @@ use std::{
 };
 
 use itertools::Itertools;
-#[cfg(feature = "bench-metrics")]
+#[cfg(feature = "metrics")]
 use openvm_circuit::metrics::cycle_tracker::CycleTracker;
 use openvm_stark_backend::p3_field::{ExtensionField, Field, FieldAlgebra, PrimeField};
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, p3_bn254_fr::Bn254Fr};
@@ -135,7 +135,7 @@ impl<C: Config + Debug> Halo2ConstraintCompiler<C> {
     where
         C: Config<N = Bn254Fr, F = BabyBear, EF = BabyBearExt4>,
     {
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         let mut cell_tracker = CycleTracker::new();
         let range = Arc::new(halo2_state.builder.range_chip());
         let f_chip = Arc::new(BabyBearChip::new(range.clone()));
@@ -149,10 +149,10 @@ impl<C: Config + Debug> Halo2ConstraintCompiler<C> {
         let mut felts = HashMap::<u32, AssignedBabyBear>::new();
         let mut exts = HashMap::<u32, AssignedBabyBearExt4>::new();
 
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         let mut old_stats = stats_snapshot(ctx, range.clone());
         for (instruction, backtrace) in operations {
-            #[cfg(feature = "bench-metrics")]
+            #[cfg(feature = "metrics")]
             if self.profiling {
                 old_stats = stats_snapshot(ctx, range.clone());
             }
@@ -492,11 +492,11 @@ impl<C: Config + Debug> Halo2ConstraintCompiler<C> {
                         range.check_less_than(ctx, vars[&a.0], vars[&b.0], C::F::bits());
                     }
                     DslIr::CycleTrackerStart(_name) => {
-                        #[cfg(feature = "bench-metrics")]
+                        #[cfg(feature = "metrics")]
                         cell_tracker.start(_name);
                     }
                     DslIr::CycleTrackerEnd(_name) => {
-                        #[cfg(feature = "bench-metrics")]
+                        #[cfg(feature = "metrics")]
                         cell_tracker.end(_name);
                     }
                     DslIr::CircuitPublish(val, index) => {
@@ -512,7 +512,7 @@ impl<C: Config + Debug> Halo2ConstraintCompiler<C> {
                 }
                 res.unwrap();
             }
-            #[cfg(feature = "bench-metrics")]
+            #[cfg(feature = "metrics")]
             if self.profiling {
                 let mut new_stats = stats_snapshot(ctx, range.clone());
                 new_stats.diff(&old_stats);
@@ -538,7 +538,7 @@ pub fn convert_efr<F: PrimeField, EF: ExtensionField<F>>(a: &EF) -> Vec<Fr> {
 }
 
 // Unfortunately `builder.statistics()` cannot be called when `ctx` exists.
-#[allow(dead_code)] // used only in bench-metrics
+#[allow(dead_code)] // used only in metrics
 fn stats_snapshot(ctx: &Context<Fr>, range_chip: Arc<RangeChip<Fr>>) -> Halo2Stats {
     Halo2Stats {
         total_gate_cell: ctx.advice.len(),
diff --git a/extensions/native/compiler/src/constraints/halo2/stats.rs b/extensions/native/compiler/src/constraints/halo2/stats.rs
index 0d5192ec82..c18f64d5cb 100644
--- a/extensions/native/compiler/src/constraints/halo2/stats.rs
+++ b/extensions/native/compiler/src/constraints/halo2/stats.rs
@@ -14,7 +14,7 @@ impl Halo2Stats {
     }
 }
 
-#[cfg(feature = "bench-metrics")]
+#[cfg(feature = "metrics")]
 mod emit {
     use metrics::counter;
 
diff --git a/extensions/native/compiler/src/conversion/mod.rs b/extensions/native/compiler/src/conversion/mod.rs
index 9c3fc8d752..af4e5080fb 100644
--- a/extensions/native/compiler/src/conversion/mod.rs
+++ b/extensions/native/compiler/src/conversion/mod.rs
@@ -565,7 +565,7 @@ pub fn convert_program<F: PrimeField32, EF: ExtensionField<F>>(
         }
     }
 
-    let mut result = Program::new_empty(DEFAULT_PC_STEP, 0);
+    let mut result = Program::new_empty(0);
     result.push_instruction_and_debug_info(init_register_0, init_debug_info);
     for block in program.blocks.iter() {
         for (instruction, debug_info) in block.0.iter().zip(block.1.iter()) {
diff --git a/extensions/native/recursion/Cargo.toml b/extensions/native/recursion/Cargo.toml
index c799671a55..4a2600915f 100644
--- a/extensions/native/recursion/Cargo.toml
+++ b/extensions/native/recursion/Cargo.toml
@@ -8,7 +8,7 @@ repository.workspace = true
 
 [dependencies]
 openvm-stark-backend = { workspace = true }
-openvm-native-circuit = { workspace = true }
+openvm-native-circuit = { workspace = true, features = ["test-utils"] }
 openvm-native-compiler = { workspace = true }
 openvm-native-compiler-derive = { workspace = true }
 openvm-stark-sdk = { workspace = true }
@@ -49,10 +49,10 @@ evm-verify = [
     "snark-verifier-sdk/revm",
 ] # evm-verify needs REVM to simulate EVM contract verification
 test-utils = ["openvm-circuit/test-utils"]
-bench-metrics = [
+metrics = [
     "dep:metrics",
-    "openvm-circuit/bench-metrics",
-    "openvm-native-compiler/bench-metrics",
+    "openvm-circuit/metrics",
+    "openvm-native-compiler/metrics",
 ]
 parallel = ["openvm-stark-backend/parallel"]
 mimalloc = ["openvm-stark-backend/mimalloc"]
diff --git a/extensions/native/recursion/src/fri/two_adic_pcs.rs b/extensions/native/recursion/src/fri/two_adic_pcs.rs
index 676da7493f..3e66e05e61 100644
--- a/extensions/native/recursion/src/fri/two_adic_pcs.rs
+++ b/extensions/native/recursion/src/fri/two_adic_pcs.rs
@@ -627,6 +627,7 @@ pub mod tests {
     };
     use openvm_stark_backend::{
         config::{StarkGenericConfig, Val},
+        engine::StarkEngine,
         p3_challenger::{CanObserve, FieldChallenger},
         p3_commit::{Pcs, TwoAdicMultiplicativeCoset},
         p3_matrix::dense::RowMajorMatrix,
@@ -662,8 +663,8 @@ pub mod tests {
         let mut rng = &mut OsRng;
         let log_degrees = &[nb_log2_rows];
         let engine = default_engine();
-        let pcs = engine.config.pcs();
-        let perm = engine.perm;
+        let pcs = engine.config().pcs();
+        let perm = engine.perm.clone();
 
         // Generate proof.
         let domains_and_polys = log_degrees
diff --git a/extensions/native/recursion/src/halo2/mod.rs b/extensions/native/recursion/src/halo2/mod.rs
index b53a298eb4..2046af0993 100644
--- a/extensions/native/recursion/src/halo2/mod.rs
+++ b/extensions/native/recursion/src/halo2/mod.rs
@@ -116,7 +116,7 @@ impl Halo2Prover {
         state.load_witness(witness);
 
         let backend = Halo2ConstraintCompiler::<C>::new(dsl_operations.num_public_values);
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         let backend = if profiling {
             backend.with_profiling()
         } else {
@@ -174,10 +174,10 @@ impl Halo2Prover {
         //
         //     pk
         // };
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         let start = std::time::Instant::now();
         let pk = keygen_pk2(params, &builder, false).unwrap();
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         metrics::gauge!("halo2_keygen_time_ms").set(start.elapsed().as_millis() as f64);
         let break_points = builder.break_points();
 
@@ -212,13 +212,13 @@ impl Halo2Prover {
         profiling: bool,
     ) -> Snark {
         let k = config_params.k;
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         let start = std::time::Instant::now();
         let builder = Self::builder(CircuitBuilderStage::Prover, k)
             .use_params(config_params)
             .use_break_points(break_points);
         let builder = Self::populate(builder, dsl_operations, witness, profiling);
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         {
             let stats = builder.statistics();
             let total_advices: usize = stats.gate.total_advice_per_phase.into_iter().sum();
@@ -228,7 +228,7 @@ impl Halo2Prover {
         }
         let snark = gen_snark_shplonk(params, pk, builder, None::<&str>);
 
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         metrics::gauge!("total_proof_time_ms").set(start.elapsed().as_millis() as f64);
 
         snark
diff --git a/extensions/native/recursion/src/halo2/utils.rs b/extensions/native/recursion/src/halo2/utils.rs
index 365ef460c7..78f66fdce0 100644
--- a/extensions/native/recursion/src/halo2/utils.rs
+++ b/extensions/native/recursion/src/halo2/utils.rs
@@ -1,5 +1,6 @@
 use std::{
     collections::HashMap,
+    env::var,
     io::BufReader,
     path::{Path, PathBuf},
     sync::{Arc, Mutex},
@@ -28,7 +29,6 @@ use snark_verifier_sdk::{
 };
 
 use crate::halo2::Halo2Params;
-pub const DEFAULT_PARAMS_DIR: &str = "./params";
 static TESTING_KZG_PARAMS_23: Lazy<Halo2Params> = Lazy::new(|| gen_kzg_params(23));
 
 pub(crate) fn gen_kzg_params(k: u32) -> Halo2Params {
@@ -91,6 +91,7 @@ pub trait Halo2ParamsReader {
     fn read_params(&self, k: usize) -> Arc<Halo2Params>;
 }
 
+#[derive(Clone)]
 pub struct CacheHalo2ParamsReader {
     params_dir: PathBuf,
     cached_params: Arc<Mutex<HashMap<usize, Arc<Halo2Params>>>>,
@@ -114,10 +115,10 @@ impl CacheHalo2ParamsReader {
         }
     }
     pub fn new_with_default_params_dir() -> Self {
-        Self {
-            params_dir: PathBuf::from(DEFAULT_PARAMS_DIR),
-            cached_params: Default::default(),
-        }
+        let default_params_dir = PathBuf::from(var("HOME").unwrap())
+            .join(".openvm")
+            .join("params");
+        CacheHalo2ParamsReader::new(default_params_dir)
     }
     fn read_params_from_folder(&self, k: usize) -> Halo2Params {
         let file_path = self.params_dir.as_path().join(format!("kzg_bn254_{k}.srs"));
diff --git a/extensions/native/recursion/src/halo2/verifier.rs b/extensions/native/recursion/src/halo2/verifier.rs
index fa2a829c88..93af642d2a 100644
--- a/extensions/native/recursion/src/halo2/verifier.rs
+++ b/extensions/native/recursion/src/halo2/verifier.rs
@@ -62,3 +62,9 @@ impl Halo2VerifierProvingKey {
         self.pinning.generate_dummy_snark(reader)
     }
 }
+
+// SAFETY: the reason these aren't auto-implemented is because DslOperations contains TracedVec
+// which has backtrace. This is only used for debugging purposes, and the rest of the proving key is
+// Send and Sync.
+unsafe impl Send for Halo2VerifierProvingKey {}
+unsafe impl Sync for Halo2VerifierProvingKey {}
diff --git a/extensions/native/recursion/src/halo2/wrapper.rs b/extensions/native/recursion/src/halo2/wrapper.rs
index 958c502a86..77d9978c38 100644
--- a/extensions/native/recursion/src/halo2/wrapper.rs
+++ b/extensions/native/recursion/src/halo2/wrapper.rs
@@ -57,7 +57,7 @@ impl Halo2WrapperProvingKey {
     }
     pub fn keygen(params: &Halo2Params, dummy_snark: Snark) -> Self {
         let k = params.k();
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         let start = std::time::Instant::now();
         let mut circuit =
             generate_wrapper_circuit_object(CircuitBuilderStage::Keygen, k as usize, dummy_snark);
@@ -67,11 +67,11 @@ impl Halo2WrapperProvingKey {
             "Wrapper circuit num advice: {:?}",
             config_params.num_advice_per_phase
         );
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         emit_wrapper_circuit_metrics(&circuit);
         let pk = keygen_pk2(params, &circuit, false).unwrap();
         let num_pvs = circuit.instances().iter().map(|x| x.len()).collect_vec();
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         metrics::gauge!("halo2_keygen_time_ms").set(start.elapsed().as_millis() as f64);
         Self {
             pinning: Halo2ProvingPinning {
@@ -112,7 +112,7 @@ impl Halo2WrapperProvingKey {
     }
     #[cfg(feature = "evm-prove")]
     pub fn prove_for_evm(&self, params: &Halo2Params, snark_to_verify: Snark) -> RawEvmProof {
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         let start = std::time::Instant::now();
         let k = self.pinning.metadata.config_params.k;
         let prover_circuit = self.generate_circuit_object_for_proving(k, snark_to_verify);
@@ -124,7 +124,7 @@ impl Halo2WrapperProvingKey {
             prover_circuit,
             pvs.clone(),
         );
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         metrics::gauge!("total_proof_time_ms").set(start.elapsed().as_millis() as f64);
 
         RawEvmProof {
@@ -212,7 +212,7 @@ fn generate_wrapper_circuit_object(
     circuit
 }
 
-#[cfg(feature = "bench-metrics")]
+#[cfg(feature = "metrics")]
 fn emit_wrapper_circuit_metrics(agg_circuit: &AggregationCircuit) {
     let stats = agg_circuit.builder.statistics();
     let total_advices: usize = stats.gate.total_advice_per_phase.into_iter().sum();
diff --git a/extensions/native/recursion/src/testing_utils.rs b/extensions/native/recursion/src/testing_utils.rs
index 380b2aa9a3..62a8a25b27 100644
--- a/extensions/native/recursion/src/testing_utils.rs
+++ b/extensions/native/recursion/src/testing_utils.rs
@@ -1,17 +1,7 @@
-use inner::build_verification_program;
-use openvm_circuit::{arch::instructions::program::Program, utils::execute_and_prove_program};
-use openvm_native_circuit::NativeConfig;
-use openvm_native_compiler::conversion::CompilerOptions;
-use openvm_stark_backend::{
-    config::{Com, Domain, PcsProof, PcsProverData, StarkGenericConfig},
-    engine::VerificationData,
-    p3_commit::PolynomialSpace,
-    verifier::VerificationError,
-};
+use openvm_circuit::{arch::instructions::program::Program, utils::air_test_impl};
+use openvm_stark_backend::engine::VerificationData;
 use openvm_stark_sdk::{
-    config::baby_bear_poseidon2::BabyBearPoseidon2Config,
-    engine::{StarkFriEngine, VerificationDataWithFriParams},
-    p3_baby_bear::BabyBear,
+    config::baby_bear_poseidon2::BabyBearPoseidon2Config, p3_baby_bear::BabyBear,
     utils::ProofInputForTest,
 };
 
@@ -20,7 +10,7 @@ use crate::hints::InnerVal;
 type InnerSC = BabyBearPoseidon2Config;
 
 pub mod inner {
-    use openvm_native_circuit::NativeConfig;
+    use openvm_native_circuit::{test_native_config, NativeCpuBuilder};
     use openvm_native_compiler::conversion::CompilerOptions;
     use openvm_stark_sdk::{
         config::{
@@ -42,12 +32,12 @@ pub mod inner {
 
         let advice = new_from_inner_multi_vk(&vk);
         cfg_if::cfg_if! {
-            if #[cfg(feature = "bench-metrics")] {
+            if #[cfg(feature = "metrics")] {
                 let start = std::time::Instant::now();
             }
         }
         let program = VerifierProgram::build_with_options(advice, &fri_params, compiler_options);
-        #[cfg(feature = "bench-metrics")]
+        #[cfg(feature = "metrics")]
         metrics::gauge!("verify_program_compile_ms").set(start.elapsed().as_millis() as f64);
 
         let mut input_stream = Vec::new();
@@ -72,36 +62,17 @@ pub mod inner {
             ))
             .unwrap();
 
-        recursive_stark_test(
-            vparams,
-            CompilerOptions::default(),
-            NativeConfig::aggregation(4, 7),
-            &BabyBearPoseidon2Engine::new(fri_params),
+        let compiler_options = CompilerOptions::default();
+        let (program, witness_stream) = build_verification_program(vparams, compiler_options);
+        air_test_impl::<BabyBearPoseidon2Engine, _>(
+            fri_params,
+            NativeCpuBuilder,
+            test_native_config(),
+            program,
+            witness_stream,
+            1,
+            true,
         )
         .unwrap();
     }
 }
-
-/// 1. Builds the recursive verification program to verify `vparams`
-/// 2. Execute and proves the program in VM with `AggSC` config using `engine`.
-///
-/// The `vparams` must be from the BabyBearPoseidon2 stark config for the recursion
-/// program to work at the moment.
-#[allow(clippy::type_complexity)]
-pub fn recursive_stark_test<AggSC: StarkGenericConfig, E: StarkFriEngine<AggSC>>(
-    vparams: VerificationDataWithFriParams<InnerSC>,
-    compiler_options: CompilerOptions,
-    vm_config: NativeConfig,
-    engine: &E,
-) -> Result<VerificationDataWithFriParams<AggSC>, VerificationError>
-where
-    Domain<AggSC>: PolynomialSpace<Val = BabyBear>,
-    Domain<AggSC>: Send + Sync,
-    PcsProverData<AggSC>: Send + Sync,
-    Com<AggSC>: Send + Sync,
-    PcsProof<AggSC>: Send + Sync,
-{
-    let (program, witness_stream) = build_verification_program(vparams, compiler_options);
-
-    execute_and_prove_program(program, witness_stream, vm_config, engine)
-}
diff --git a/extensions/native/recursion/src/tests.rs b/extensions/native/recursion/src/tests.rs
index 4077ee6f1d..627304e866 100644
--- a/extensions/native/recursion/src/tests.rs
+++ b/extensions/native/recursion/src/tests.rs
@@ -1,18 +1,19 @@
-use std::{panic::catch_unwind, sync::Arc};
+use std::sync::Arc;
 
-use openvm_circuit::utils::gen_vm_program_test_proof_input;
-use openvm_native_circuit::NativeConfig;
+use openvm_native_circuit::{execute_program_with_config, test_native_config, NativeCpuBuilder};
 use openvm_stark_backend::{
     config::{StarkGenericConfig, Val},
     interaction::BusIndex,
     p3_field::PrimeField32,
     p3_matrix::dense::RowMajorMatrix,
-    prover::types::AirProofInput,
+    prover::{
+        hal::DeviceDataTransporter,
+        types::{AirProvingContext, ProvingContext},
+    },
     utils::disable_debug_builder,
     Chip,
 };
 use openvm_stark_sdk::{
-    collect_airs_and_inputs,
     config::{
         baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
         FriParameters,
@@ -37,8 +38,12 @@ where
     Val<SC>: PrimeField32,
 {
     let fib_chip = FibonacciChip::new(0, 1, n);
-    let (airs, per_air) = collect_airs_and_inputs!(fib_chip);
-    ProofInputForTest { airs, per_air }
+    let airs = vec![fib_chip.air()];
+    let air_ctx = fib_chip.generate_proving_ctx(());
+    ProofInputForTest {
+        airs,
+        per_air: vec![air_ctx],
+    }
 }
 
 pub fn interaction_test_proof_input<SC: StarkGenericConfig>() -> ProofInputForTest<SC>
@@ -62,7 +67,12 @@ where
         fields: vec![vec![1, 1], vec![1, 2], vec![3, 4], vec![9999, 0]],
     });
 
-    let (airs, per_air) = collect_airs_and_inputs!(send_chip1, send_chip2, recv_chip);
+    let airs = vec![send_chip1.air(), send_chip2.air(), recv_chip.air()];
+    let per_air = vec![
+        send_chip1.generate_proving_ctx(()),
+        send_chip2.generate_proving_ctx(()),
+        recv_chip.generate_proving_ctx(()),
+    ];
     ProofInputForTest { airs, per_air }
 }
 
@@ -84,12 +94,12 @@ where
         receiver_air.field_width() + 1,
     );
 
-    let sender_air_proof_input = AirProofInput::simple_no_pis(sender_trace);
-    let receiver_air_proof_input = AirProofInput::simple_no_pis(receiver_trace);
+    let sender_ctx = AirProvingContext::simple_no_pis(Arc::new(sender_trace));
+    let receiver_ctx = AirProvingContext::simple_no_pis(Arc::new(receiver_trace));
 
     ProofInputForTest {
         airs: vec![Arc::new(sender_air), Arc::new(receiver_air)],
-        per_air: vec![sender_air_proof_input, receiver_air_proof_input],
+        per_air: vec![sender_ctx, receiver_ctx],
     }
 }
 
@@ -133,12 +143,12 @@ fn test_unordered() {
 
 #[test]
 fn test_optional_air() {
-    use openvm_stark_backend::{engine::StarkEngine, prover::types::ProofInput, Chip};
+    use openvm_stark_backend::engine::StarkEngine;
     let fri_params = FriParameters::new_for_testing(3);
     let engine = BabyBearPoseidon2Engine::new(fri_params);
     let fib_chip = FibonacciChip::new(0, 1, 8);
     let send_chip1 = DummyInteractionChip::new_without_partition(1, true, 0);
-    let send_chip2 = DummyInteractionChip::new_with_partition(engine.config(), 1, true, 0);
+    let send_chip2 = DummyInteractionChip::new_with_partition(engine.device().clone(), 1, true, 0);
     let recv_chip1 = DummyInteractionChip::new_without_partition(1, false, 0);
     let mut keygen_builder = engine.keygen_builder();
     let fib_chip_id = keygen_builder.add_air(fib_chip.air());
@@ -148,7 +158,7 @@ fn test_optional_air() {
     let pk = keygen_builder.generate_pk();
 
     let m_advice = new_from_inner_multi_vk(&pk.get_vk());
-    let vm_config = NativeConfig::aggregation(4, 7);
+    let config = test_native_config();
     let program = VerifierProgram::build(m_advice, &fri_params);
 
     // Case 1: All AIRs are present.
@@ -169,26 +179,27 @@ fn test_optional_air() {
             count: vec![2, 4, 12],
             fields: vec![vec![1], vec![2], vec![3]],
         });
-        let proof = engine.prove(
-            &pk,
-            ProofInput {
-                per_air: vec![
-                    fib_chip.generate_air_proof_input_with_id(fib_chip_id),
-                    send_chip1.generate_air_proof_input_with_id(send_chip1_id),
-                    send_chip2.generate_air_proof_input_with_id(send_chip2_id),
-                    recv_chip1.generate_air_proof_input_with_id(recv_chip1_id),
-                ],
-            },
-        );
-        engine
-            .verify(&pk.get_vk(), &proof)
-            .expect("Verification failed");
+        let proof = engine
+            .prove_then_verify(
+                &pk,
+                ProvingContext {
+                    per_air: vec![
+                        (fib_chip_id, fib_chip.generate_proving_ctx(())),
+                        (send_chip1_id, send_chip1.generate_proving_ctx(())),
+                        (send_chip2_id, send_chip2.generate_proving_ctx(())),
+                        (recv_chip1_id, recv_chip1.generate_proving_ctx(())),
+                    ],
+                },
+            )
+            .unwrap();
         // The VM program will panic when the program cannot verify the proof.
-        gen_vm_program_test_proof_input::<BabyBearPoseidon2Config, NativeConfig>(
+        assert!(execute_program_with_config::<BabyBearPoseidon2Engine, _>(
             program.clone(),
             proof.write(),
-            vm_config.clone(),
-        );
+            NativeCpuBuilder,
+            config.clone()
+        )
+        .is_ok());
     }
     // Case 2: The second AIR is not presented.
     {
@@ -202,24 +213,25 @@ fn test_optional_air() {
             count: vec![1, 2, 4],
             fields: vec![vec![1], vec![2], vec![3]],
         });
-        let proof = engine.prove(
-            &pk,
-            ProofInput {
-                per_air: vec![
-                    send_chip1.generate_air_proof_input_with_id(send_chip1_id),
-                    recv_chip1.generate_air_proof_input_with_id(recv_chip1_id),
-                ],
-            },
-        );
-        engine
-            .verify(&pk.get_vk(), &proof)
-            .expect("Verification failed");
+        let proof = engine
+            .prove_then_verify(
+                &pk,
+                ProvingContext {
+                    per_air: vec![
+                        (send_chip1_id, send_chip1.generate_proving_ctx(())),
+                        (recv_chip1_id, recv_chip1.generate_proving_ctx(())),
+                    ],
+                },
+            )
+            .unwrap();
         // The VM program will panic when the program cannot verify the proof.
-        gen_vm_program_test_proof_input::<BabyBearPoseidon2Config, NativeConfig>(
+        assert!(execute_program_with_config::<BabyBearPoseidon2Engine, _>(
             program.clone(),
             proof.write(),
-            vm_config.clone(),
-        );
+            NativeCpuBuilder,
+            config.clone()
+        )
+        .is_ok());
     }
     // Case 3: Negative - unbalanced interactions.
     {
@@ -229,21 +241,21 @@ fn test_optional_air() {
             count: vec![1, 2, 4],
             fields: vec![vec![1], vec![2], vec![3]],
         });
+        let d_pk = engine.device().transport_pk_to_device(&pk);
         let proof = engine.prove(
-            &pk,
-            ProofInput {
-                per_air: vec![recv_chip1.generate_air_proof_input_with_id(recv_chip1_id)],
+            &d_pk,
+            ProvingContext {
+                per_air: vec![(recv_chip1_id, recv_chip1.generate_proving_ctx(()))],
             },
         );
         assert!(engine.verify(&pk.get_vk(), &proof).is_err());
         // The VM program should panic when the proof cannot be verified.
-        let unwind_res = catch_unwind(|| {
-            gen_vm_program_test_proof_input::<BabyBearPoseidon2Config, NativeConfig>(
-                program.clone(),
-                proof.write(),
-                vm_config,
-            )
-        });
-        assert!(unwind_res.is_err());
+        assert!(execute_program_with_config::<BabyBearPoseidon2Engine, _>(
+            program.clone(),
+            proof.write(),
+            NativeCpuBuilder,
+            config.clone()
+        )
+        .is_err());
     }
 }
diff --git a/extensions/native/recursion/tests/recursion.rs b/extensions/native/recursion/tests/recursion.rs
index 8f354f3316..d5820d393c 100644
--- a/extensions/native/recursion/tests/recursion.rs
+++ b/extensions/native/recursion/tests/recursion.rs
@@ -1,13 +1,28 @@
-use openvm_circuit::arch::{instructions::program::Program, SystemConfig, VmConfig, VmExecutor};
-use openvm_native_circuit::{Native, NativeConfig};
+use itertools::Itertools;
+use openvm_circuit::arch::{
+    instructions::program::Program, MatrixRecordArena, PreflightExecutionOutput, VmBuilder,
+    VmCircuitConfig,
+};
+use openvm_native_circuit::{
+    execute_program_with_config, test_native_config, NativeConfig, NativeCpuBuilder,
+};
 use openvm_native_compiler::{asm::AsmBuilder, ir::Felt};
 use openvm_native_recursion::testing_utils::inner::run_recursive_test;
 use openvm_stark_backend::{
     config::{Domain, StarkGenericConfig},
     p3_commit::PolynomialSpace,
     p3_field::{extension::BinomialExtensionField, FieldAlgebra},
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use openvm_stark_sdk::{
+    config::{
+        baby_bear_poseidon2::{BabyBearPoseidon2Config, BabyBearPoseidon2Engine},
+        FriParameters,
+    },
+    engine::StarkFriEngine,
+    p3_baby_bear::BabyBear,
+    utils::ProofInputForTest,
 };
-use openvm_stark_sdk::{config::FriParameters, p3_baby_bear::BabyBear, utils::ProofInputForTest};
 
 fn fibonacci_program(a: u32, b: u32, n: u32) -> Program<BabyBear> {
     type F = BabyBear;
@@ -35,28 +50,46 @@ fn fibonacci_program(a: u32, b: u32, n: u32) -> Program<BabyBear> {
     builder.compile_isa()
 }
 
-pub(crate) fn fibonacci_program_test_proof_input<SC: StarkGenericConfig>(
+// We need this for both BabyBearPoseidon2Config and BabyBearPoseidon2RootConfig
+pub(crate) fn fibonacci_program_test_proof_input<SC, E>(
     a: u32,
     b: u32,
     n: u32,
-) -> ProofInputForTest<SC>
+) -> ProofInputForTest<E::SC>
 where
+    SC: StarkGenericConfig,
+    E: StarkFriEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
     Domain<SC>: PolynomialSpace<Val = BabyBear>,
+    NativeCpuBuilder:
+        VmBuilder<E, VmConfig = NativeConfig, RecordArena = MatrixRecordArena<BabyBear>>,
 {
     let fib_program = fibonacci_program(a, b, n);
-    let vm_config = NativeConfig::new(SystemConfig::default().with_public_values(3), Native);
-    let airs = vm_config.create_chip_complex().unwrap().airs();
+    let mut config = test_native_config();
+    config.as_mut().num_public_values = 3;
 
-    let executor = VmExecutor::<BabyBear, NativeConfig>::new(vm_config);
+    let (output, mut vm) = execute_program_with_config::<E, _>(
+        fib_program.clone(),
+        vec![],
+        NativeCpuBuilder,
+        config.clone(),
+    )
+    .unwrap();
+    let PreflightExecutionOutput {
+        system_records,
+        record_arenas,
+        ..
+    } = output;
+    let cached_program_trace = vm.commit_program_on_device(&fib_program);
+    vm.load_program(cached_program_trace);
+    let ctx = vm
+        .generate_proving_ctx(system_records, record_arenas)
+        .unwrap();
 
-    let mut result = executor.execute_and_generate(fib_program, vec![]).unwrap();
-    assert_eq!(result.per_segment.len(), 1, "unexpected continuation");
-    let proof_input = result.per_segment.remove(0);
-    // Filter out unused AIRS (where trace is empty)
-    let (used_airs, per_air) = proof_input
+    let airs = config.create_airs().unwrap().into_airs().collect_vec();
+    let (used_airs, per_air): (Vec<_>, Vec<_>) = ctx
         .per_air
         .into_iter()
-        .map(|(air_id, x)| (airs[air_id].clone(), x))
+        .map(|(air_id, air_ctx)| (airs[air_id].clone(), air_ctx))
         .unzip();
     ProofInputForTest {
         airs: used_airs,
@@ -66,7 +99,10 @@ where
 
 #[test]
 fn test_fibonacci_program_verify() {
-    let fib_program_stark = fibonacci_program_test_proof_input(0, 1, 32);
+    let fib_program_stark = fibonacci_program_test_proof_input::<
+        BabyBearPoseidon2Config,
+        BabyBearPoseidon2Engine,
+    >(0, 1, 32);
     run_recursive_test(fib_program_stark, FriParameters::new_for_testing(3));
 }
 
@@ -75,7 +111,13 @@ fn test_fibonacci_program_verify() {
 #[ignore = "slow"]
 fn test_fibonacci_program_halo2_verify() {
     use openvm_native_recursion::halo2::testing_utils::run_static_verifier_test;
+    use openvm_stark_sdk::config::baby_bear_poseidon2_root::{
+        BabyBearPoseidon2RootConfig, BabyBearPoseidon2RootEngine,
+    };
 
-    let fib_program_stark = fibonacci_program_test_proof_input(0, 1, 32);
+    let fib_program_stark = fibonacci_program_test_proof_input::<
+        BabyBearPoseidon2RootConfig,
+        BabyBearPoseidon2RootEngine,
+    >(0, 1, 32);
     run_static_verifier_test(fib_program_stark, FriParameters::new_for_testing(3));
 }
diff --git a/extensions/native/recursion/trusted_setup_s3.sh b/extensions/native/recursion/trusted_setup_s3.sh
index a249217b0b..a5ddc0cd66 100644
--- a/extensions/native/recursion/trusted_setup_s3.sh
+++ b/extensions/native/recursion/trusted_setup_s3.sh
@@ -7,8 +7,8 @@ else
 fi
 echo "maxk=$maxk"
 
-mkdir -p params/
-cd params
+PARAMS_DIR="$HOME/.openvm/params/"
+mkdir -p $PARAMS_DIR
 for k in $(seq 10 $maxk)
 do
     pkey_file="kzg_bn254_${k}.srs"
@@ -16,7 +16,6 @@ do
         echo "$pkey_file already exists"
     else
         echo "downloading $pkey_file"
-        s5cmd --no-sign-request cp --concurrency 10 "s3://axiom-crypto/challenge_0085/${pkey_file}" .
+        s5cmd --no-sign-request cp --concurrency 10 "s3://axiom-crypto/challenge_0085/${pkey_file}" $PARAMS_DIR
     fi
 done
-cd ..
diff --git a/extensions/pairing/circuit/Cargo.toml b/extensions/pairing/circuit/Cargo.toml
index af16f7eeab..a44afff0f8 100644
--- a/extensions/pairing/circuit/Cargo.toml
+++ b/extensions/pairing/circuit/Cargo.toml
@@ -8,7 +8,6 @@ homepage.workspace = true
 repository.workspace = true
 
 [dependencies]
-openvm-circuit-primitives-derive = { workspace = true }
 openvm-circuit-primitives = { workspace = true }
 openvm-circuit-derive = { workspace = true }
 openvm-circuit = { workspace = true }
@@ -23,7 +22,6 @@ openvm-mod-circuit-builder = { workspace = true }
 openvm-stark-backend = { workspace = true }
 openvm-rv32im-circuit = { workspace = true }
 openvm-algebra-circuit = { workspace = true }
-openvm-rv32-adapters = { workspace = true }
 openvm-ecc-circuit = { workspace = true }
 openvm-pairing-transpiler = { workspace = true }
 
@@ -33,7 +31,6 @@ strum = { workspace = true }
 derive_more = { workspace = true }
 derive-new = { workspace = true }
 rand = { workspace = true }
-itertools = { workspace = true }
 eyre = { workspace = true }
 serde = { workspace = true, features = ["derive", "std"] }
 halo2curves-axiom = { workspace = true }
@@ -45,7 +42,6 @@ openvm-pairing-guest = { workspace = true }
 openvm-stark-sdk = { workspace = true }
 openvm-mod-circuit-builder = { workspace = true, features = ["test-utils"] }
 openvm-circuit = { workspace = true, features = ["test-utils"] }
-openvm-rv32-adapters = { workspace = true, features = ["test-utils"] }
 halo2curves-axiom = { workspace = true }
 openvm-ecc-guest = { workspace = true }
 openvm-pairing-guest = { workspace = true, features = [
diff --git a/extensions/pairing/circuit/src/config.rs b/extensions/pairing/circuit/src/config.rs
index d63bac664e..20ea07186a 100644
--- a/extensions/pairing/circuit/src/config.rs
+++ b/extensions/pairing/circuit/src/config.rs
@@ -1,30 +1,37 @@
-use openvm_algebra_circuit::*;
-use openvm_circuit::arch::{InitFileGenerator, SystemConfig};
+use std::result::Result;
+
+use openvm_algebra_circuit::{
+    AlgebraCpuProverExt, Fp2Extension, Fp2ExtensionExecutor, Rv32ModularConfig,
+    Rv32ModularConfigExecutor, Rv32ModularCpuBuilder,
+};
+use openvm_circuit::{
+    arch::{
+        AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, SystemConfig,
+        VmBuilder, VmChipComplex, VmProverExtension,
+    },
+    system::SystemChipInventory,
+};
 use openvm_circuit_derive::VmConfig;
-use openvm_ecc_circuit::*;
-use openvm_rv32im_circuit::*;
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_ecc_circuit::{EccCpuProverExt, WeierstrassExtension, WeierstrassExtensionExecutor};
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
 use serde::{Deserialize, Serialize};
 
 use super::*;
 
 #[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
 pub struct Rv32PairingConfig {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub base: Rv32I,
-    #[extension]
-    pub mul: Rv32M,
-    #[extension]
-    pub io: Rv32Io,
-    #[extension]
-    pub modular: ModularExtension,
+    #[config(generics = true)]
+    pub modular: Rv32ModularConfig,
     #[extension]
     pub fp2: Fp2Extension,
     #[extension]
     pub weierstrass: WeierstrassExtension,
-    #[extension]
+    #[extension(generics = true)]
     pub pairing: PairingExtension,
 }
 
@@ -37,11 +44,7 @@ impl Rv32PairingConfig {
         let mut modulus_and_scalar_primes = modulus_primes.clone();
         modulus_and_scalar_primes.extend(curves.iter().map(|c| c.curve_config().scalar.clone()));
         Self {
-            system: SystemConfig::default().with_continuations(),
-            base: Default::default(),
-            mul: Default::default(),
-            io: Default::default(),
-            modular: ModularExtension::new(modulus_and_scalar_primes),
+            modular: Rv32ModularConfig::new(modulus_and_scalar_primes),
             fp2: Fp2Extension::new(
                 complex_struct_names
                     .into_iter()
@@ -60,9 +63,44 @@ impl InitFileGenerator for Rv32PairingConfig {
     fn generate_init_file_contents(&self) -> Option<String> {
         Some(format!(
             "// This file is automatically generated by cargo openvm. Do not rename or edit.\n{}\n{}\n{}\n",
-            self.modular.generate_moduli_init(),
-            self.fp2.generate_complex_init(&self.modular),
+            self.modular.modular.generate_moduli_init(),
+            self.fp2.generate_complex_init(&self.modular.modular),
             self.weierstrass.generate_sw_init()
         ))
     }
 }
+
+#[derive(Clone)]
+pub struct Rv32PairingCpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Rv32PairingCpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Rv32PairingConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Rv32PairingConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&Rv32ModularCpuBuilder, &config.modular, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&AlgebraCpuProverExt, &config.fp2, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(
+            &EccCpuProverExt,
+            &config.weierstrass,
+            inventory,
+        )?;
+        VmProverExtension::<E, _, _>::extend_prover(&PairingProverExt, &config.pairing, inventory)?;
+        Ok(chip_complex)
+    }
+}
diff --git a/extensions/pairing/circuit/src/fp12_chip/add.rs b/extensions/pairing/circuit/src/fp12_chip/add.rs
deleted file mode 100644
index 643c68ef27..0000000000
--- a/extensions/pairing/circuit/src/fp12_chip/add.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-use std::{cell::RefCell, rc::Rc};
-
-use openvm_circuit_primitives::var_range::VariableRangeCheckerBus;
-use openvm_mod_circuit_builder::{ExprBuilder, ExprBuilderConfig, FieldExpr};
-
-use crate::Fp12;
-
-pub fn fp12_add_expr(config: ExprBuilderConfig, range_bus: VariableRangeCheckerBus) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut x = Fp12::new(builder.clone());
-    let mut y = Fp12::new(builder.clone());
-    let mut res = x.add(&mut y);
-    res.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
diff --git a/extensions/pairing/circuit/src/fp12_chip/mod.rs b/extensions/pairing/circuit/src/fp12_chip/mod.rs
deleted file mode 100644
index c6894d0d27..0000000000
--- a/extensions/pairing/circuit/src/fp12_chip/mod.rs
+++ /dev/null
@@ -1,10 +0,0 @@
-mod add;
-mod mul;
-mod sub;
-
-pub use add::*;
-pub use mul::*;
-pub use sub::*;
-
-#[cfg(test)]
-mod tests;
diff --git a/extensions/pairing/circuit/src/fp12_chip/mul.rs b/extensions/pairing/circuit/src/fp12_chip/mul.rs
deleted file mode 100644
index 0736981de7..0000000000
--- a/extensions/pairing/circuit/src/fp12_chip/mul.rs
+++ /dev/null
@@ -1,175 +0,0 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
-
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_transpiler::Fp12Opcode;
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-use crate::Fp12;
-// Input: Fp12 * 2
-// Output: Fp12
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct Fp12MulChip<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<F: PrimeField32, const BLOCKS: usize, const BLOCK_SIZE: usize>
-    Fp12MulChip<F, BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, BLOCKS, BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        config: ExprBuilderConfig,
-        xi: [isize; 2],
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let expr = fp12_mul_expr(config, range_checker.bus(), xi);
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![Fp12Opcode::MUL as usize],
-            vec![],
-            range_checker,
-            "Fp12Mul",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-pub fn fp12_mul_expr(
-    config: ExprBuilderConfig,
-    range_bus: VariableRangeCheckerBus,
-    xi: [isize; 2],
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut x = Fp12::new(builder.clone());
-    let mut y = Fp12::new(builder.clone());
-    let mut res = x.mul(&mut y, xi);
-    res.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
-
-#[cfg(test)]
-mod tests {
-    use halo2curves_axiom::{bn256::Fq12, ff::Field};
-    use itertools::Itertools;
-    use openvm_circuit::arch::testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
-    use openvm_circuit_primitives::bitwise_op_lookup::{
-        BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-    };
-    use openvm_ecc_guest::algebra::field::FieldExtension;
-    use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-    use openvm_mod_circuit_builder::{
-        test_utils::{biguint_to_limbs, bn254_fq12_to_biguint_vec, bn254_fq2_to_biguint_vec},
-        ExprBuilderConfig,
-    };
-    use openvm_pairing_guest::bn254::{BN254_MODULUS, BN254_XI_ISIZE};
-    use openvm_rv32_adapters::rv32_write_heap_default_with_increment;
-    use openvm_stark_backend::p3_field::FieldAlgebra;
-    use openvm_stark_sdk::p3_baby_bear::BabyBear;
-    use rand::{rngs::StdRng, SeedableRng};
-
-    use super::*;
-
-    const LIMB_BITS: usize = 8;
-    type F = BabyBear;
-
-    #[test]
-    fn test_fp12_mul_bn254() {
-        const NUM_LIMBS: usize = 32;
-        const BLOCK_SIZE: usize = 32;
-
-        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-        let config = ExprBuilderConfig {
-            modulus: BN254_MODULUS.clone(),
-            num_limbs: NUM_LIMBS,
-            limb_bits: LIMB_BITS,
-        };
-        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-        let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-        let adapter = Rv32VecHeapAdapterChip::<F, 2, 12, 12, BLOCK_SIZE, BLOCK_SIZE>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        );
-
-        let mut chip = Fp12MulChip::new(
-            adapter,
-            config,
-            BN254_XI_ISIZE,
-            Fp12Opcode::CLASS_OFFSET,
-            tester.range_checker(),
-            tester.offline_memory_mutex_arc(),
-        );
-
-        let mut rng = StdRng::seed_from_u64(64);
-        let x = Fq12::random(&mut rng);
-        let y = Fq12::random(&mut rng);
-        let inputs = [x.to_coeffs(), y.to_coeffs()]
-            .concat()
-            .iter()
-            .flat_map(|&x| bn254_fq2_to_biguint_vec(x))
-            .collect::<Vec<_>>();
-
-        let cmp = bn254_fq12_to_biguint_vec(x * y);
-        let res = chip
-            .0
-            .core
-            .expr()
-            .execute_with_output(inputs.clone(), vec![true]);
-        assert_eq!(res.len(), cmp.len());
-        for i in 0..res.len() {
-            assert_eq!(res[i], cmp[i]);
-        }
-
-        let x_limbs = inputs[..12]
-            .iter()
-            .map(|x| {
-                biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS)
-                    .map(BabyBear::from_canonical_u32)
-            })
-            .collect_vec();
-        let y_limbs = inputs[12..]
-            .iter()
-            .map(|y| {
-                biguint_to_limbs::<NUM_LIMBS>(y.clone(), LIMB_BITS)
-                    .map(BabyBear::from_canonical_u32)
-            })
-            .collect_vec();
-        let instruction = rv32_write_heap_default_with_increment(
-            &mut tester,
-            x_limbs,
-            y_limbs,
-            512,
-            chip.0.core.air.offset + Fp12Opcode::MUL as usize,
-        );
-        tester.execute(&mut chip, &instruction);
-        let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-        tester.simple_test().expect("Verification failed");
-    }
-}
diff --git a/extensions/pairing/circuit/src/fp12_chip/sub.rs b/extensions/pairing/circuit/src/fp12_chip/sub.rs
deleted file mode 100644
index 470e700910..0000000000
--- a/extensions/pairing/circuit/src/fp12_chip/sub.rs
+++ /dev/null
@@ -1,20 +0,0 @@
-use std::{cell::RefCell, rc::Rc};
-
-use openvm_circuit_primitives::var_range::VariableRangeCheckerBus;
-use openvm_mod_circuit_builder::{ExprBuilder, ExprBuilderConfig, FieldExpr};
-
-use crate::Fp12;
-
-pub fn fp12_sub_expr(config: ExprBuilderConfig, range_bus: VariableRangeCheckerBus) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut x = Fp12::new(builder.clone());
-    let mut y = Fp12::new(builder.clone());
-    let mut res = x.sub(&mut y);
-    res.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
diff --git a/extensions/pairing/circuit/src/fp12_chip/tests.rs b/extensions/pairing/circuit/src/fp12_chip/tests.rs
deleted file mode 100644
index a9f6b235d5..0000000000
--- a/extensions/pairing/circuit/src/fp12_chip/tests.rs
+++ /dev/null
@@ -1,271 +0,0 @@
-use num_bigint::BigUint;
-use openvm_circuit::arch::{
-    testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-    VmChipWrapper,
-};
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-};
-use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-use openvm_mod_circuit_builder::{
-    test_utils::{
-        biguint_to_limbs, bls12381_fq12_random, bn254_fq12_random, bn254_fq12_to_biguint_vec,
-    },
-    ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_guest::{
-    bls12_381::{
-        BLS12_381_BLOCK_SIZE, BLS12_381_LIMB_BITS, BLS12_381_MODULUS, BLS12_381_NUM_LIMBS,
-        BLS12_381_XI_ISIZE,
-    },
-    bn254::{BN254_BLOCK_SIZE, BN254_LIMB_BITS, BN254_MODULUS, BN254_NUM_LIMBS, BN254_XI_ISIZE},
-};
-use openvm_pairing_transpiler::{Bls12381Fp12Opcode, Bn254Fp12Opcode, Fp12Opcode};
-use openvm_rv32_adapters::{rv32_write_heap_default, Rv32VecHeapAdapterChip};
-use openvm_stark_backend::p3_field::FieldAlgebra;
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
-
-use super::{fp12_add_expr, fp12_mul_expr, fp12_sub_expr};
-
-type F = BabyBear;
-
-#[allow(clippy::too_many_arguments)]
-fn test_fp12_fn<
-    const INPUT_SIZE: usize,
-    const NUM_LIMBS: usize,
-    const LIMB_BITS: usize,
-    const BLOCK_SIZE: usize,
->(
-    mut tester: VmChipTestBuilder<F>,
-    expr: FieldExpr,
-    offset: usize,
-    local_opcode_idx: usize,
-    name: &str,
-    x: Vec<BigUint>,
-    y: Vec<BigUint>,
-    var_len: usize,
-) {
-    let core = FieldExpressionCoreChip::new(
-        expr,
-        offset,
-        vec![local_opcode_idx],
-        vec![],
-        tester.memory_controller().borrow().range_checker.clone(),
-        name,
-        false,
-    );
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
-    let adapter =
-        Rv32VecHeapAdapterChip::<F, 2, INPUT_SIZE, INPUT_SIZE, BLOCK_SIZE, BLOCK_SIZE>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        );
-
-    let x_limbs = x
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<[BabyBear; NUM_LIMBS]>>();
-    let y_limbs = y
-        .iter()
-        .map(|y| {
-            biguint_to_limbs::<NUM_LIMBS>(y.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<[BabyBear; NUM_LIMBS]>>();
-    let mut chip = VmChipWrapper::new(adapter, core, tester.offline_memory_mutex_arc());
-
-    let res = chip.core.air.expr.execute([x, y].concat(), vec![]);
-    assert_eq!(res.len(), var_len);
-
-    let instruction = rv32_write_heap_default(
-        &mut tester,
-        x_limbs,
-        y_limbs,
-        chip.core.air.offset + local_opcode_idx,
-    );
-    tester.execute(&mut chip, &instruction);
-
-    let run_tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    run_tester.simple_test().expect("Verification failed");
-}
-
-#[test]
-fn test_fp12_add_bn254() {
-    let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: BN254_MODULUS.clone(),
-        num_limbs: BN254_NUM_LIMBS,
-        limb_bits: BN254_LIMB_BITS,
-    };
-    let expr = fp12_add_expr(
-        config,
-        tester.memory_controller().borrow().range_checker.bus(),
-    );
-
-    let x = bn254_fq12_to_biguint_vec(bn254_fq12_random(1));
-    let y = bn254_fq12_to_biguint_vec(bn254_fq12_random(2));
-
-    test_fp12_fn::<12, BN254_NUM_LIMBS, BN254_LIMB_BITS, BN254_BLOCK_SIZE>(
-        tester,
-        expr,
-        Bn254Fp12Opcode::CLASS_OFFSET,
-        Fp12Opcode::ADD as usize,
-        "Bn254Fp12Add",
-        x,
-        y,
-        12,
-    );
-}
-
-#[test]
-fn test_fp12_sub_bn254() {
-    let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: BN254_MODULUS.clone(),
-        num_limbs: BN254_NUM_LIMBS,
-        limb_bits: BN254_LIMB_BITS,
-    };
-    let expr = fp12_sub_expr(
-        config,
-        tester.memory_controller().borrow().range_checker.bus(),
-    );
-
-    let x = bn254_fq12_to_biguint_vec(bn254_fq12_random(59));
-    let y = bn254_fq12_to_biguint_vec(bn254_fq12_random(3));
-
-    test_fp12_fn::<12, BN254_NUM_LIMBS, BN254_LIMB_BITS, BN254_BLOCK_SIZE>(
-        tester,
-        expr,
-        Bn254Fp12Opcode::CLASS_OFFSET,
-        Fp12Opcode::SUB as usize,
-        "Bn254Fp12Sub",
-        x,
-        y,
-        12,
-    );
-}
-
-#[test]
-fn test_fp12_mul_bn254() {
-    let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: BN254_MODULUS.clone(),
-        num_limbs: BN254_NUM_LIMBS,
-        limb_bits: BN254_LIMB_BITS,
-    };
-    let xi = BN254_XI_ISIZE;
-    let expr = fp12_mul_expr(
-        config,
-        tester.memory_controller().borrow().range_checker.bus(),
-        xi,
-    );
-
-    let x = bn254_fq12_to_biguint_vec(bn254_fq12_random(5));
-    let y = bn254_fq12_to_biguint_vec(bn254_fq12_random(25));
-
-    test_fp12_fn::<12, BN254_NUM_LIMBS, BN254_LIMB_BITS, BN254_BLOCK_SIZE>(
-        tester,
-        expr,
-        Bn254Fp12Opcode::CLASS_OFFSET,
-        Fp12Opcode::MUL as usize,
-        "Bn254Fp12Mul",
-        x,
-        y,
-        33,
-    );
-}
-
-#[test]
-fn test_fp12_add_bls12381() {
-    let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: BLS12_381_MODULUS.clone(),
-        num_limbs: BLS12_381_NUM_LIMBS,
-        limb_bits: BLS12_381_LIMB_BITS,
-    };
-    let expr = fp12_add_expr(
-        config,
-        tester.memory_controller().borrow().range_checker.bus(),
-    );
-
-    let x = bls12381_fq12_random(3);
-    let y = bls12381_fq12_random(99);
-
-    test_fp12_fn::<36, BLS12_381_NUM_LIMBS, BLS12_381_LIMB_BITS, BLS12_381_BLOCK_SIZE>(
-        tester,
-        expr,
-        Bls12381Fp12Opcode::CLASS_OFFSET,
-        Fp12Opcode::ADD as usize,
-        "Bls12381Fp12Add",
-        x,
-        y,
-        12,
-    );
-}
-
-#[test]
-fn test_fp12_sub_bls12381() {
-    let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: BLS12_381_MODULUS.clone(),
-        num_limbs: BLS12_381_NUM_LIMBS,
-        limb_bits: BLS12_381_LIMB_BITS,
-    };
-    let expr = fp12_sub_expr(
-        config,
-        tester.memory_controller().borrow().range_checker.bus(),
-    );
-
-    let x = bls12381_fq12_random(8);
-    let y = bls12381_fq12_random(9);
-
-    test_fp12_fn::<36, BLS12_381_NUM_LIMBS, BLS12_381_LIMB_BITS, BLS12_381_BLOCK_SIZE>(
-        tester,
-        expr,
-        Bls12381Fp12Opcode::CLASS_OFFSET,
-        Fp12Opcode::SUB as usize,
-        "Bls12381Fp12Sub",
-        x,
-        y,
-        12,
-    );
-}
-
-// NOTE[yj]: This test requires RUST_MIN_STACK=8388608 to run without overflowing the stack, so it
-// is ignored by the test runner for now
-#[test]
-#[ignore]
-fn test_fp12_mul_bls12381() {
-    let tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: BLS12_381_MODULUS.clone(),
-        num_limbs: BLS12_381_NUM_LIMBS,
-        limb_bits: BLS12_381_LIMB_BITS,
-    };
-    let xi = BLS12_381_XI_ISIZE;
-    let expr = fp12_mul_expr(
-        config,
-        tester.memory_controller().borrow().range_checker.bus(),
-        xi,
-    );
-
-    let x = bls12381_fq12_random(5);
-    let y = bls12381_fq12_random(25);
-
-    test_fp12_fn::<36, BLS12_381_NUM_LIMBS, BLS12_381_LIMB_BITS, BLS12_381_BLOCK_SIZE>(
-        tester,
-        expr,
-        Bls12381Fp12Opcode::CLASS_OFFSET,
-        Fp12Opcode::MUL as usize,
-        "Bls12381Fp12Mul",
-        x,
-        y,
-        46,
-    );
-}
diff --git a/extensions/pairing/circuit/src/lib.rs b/extensions/pairing/circuit/src/lib.rs
index b2b962b7f7..7edefa5490 100644
--- a/extensions/pairing/circuit/src/lib.rs
+++ b/extensions/pairing/circuit/src/lib.rs
@@ -1,11 +1,12 @@
+pub use openvm_pairing_guest::{
+    bls12_381::{BLS12_381_COMPLEX_STRUCT_NAME, BLS12_381_ECC_STRUCT_NAME},
+    bn254::BN254_COMPLEX_STRUCT_NAME,
+};
+
 mod config;
 mod fp12;
-mod fp12_chip;
-mod pairing_chip;
 mod pairing_extension;
 
 pub use config::*;
 pub use fp12::*;
-pub use fp12_chip::*;
-pub use pairing_chip::*;
 pub use pairing_extension::*;
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/d_type/mod.rs b/extensions/pairing/circuit/src/pairing_chip/line/d_type/mod.rs
deleted file mode 100644
index 08857995f3..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/d_type/mod.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-mod mul_013_by_013;
-mod mul_by_01234;
-
-pub use mul_013_by_013::*;
-pub use mul_by_01234::*;
-
-#[cfg(test)]
-mod tests;
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/d_type/mul_013_by_013.rs b/extensions/pairing/circuit/src/pairing_chip/line/d_type/mul_013_by_013.rs
deleted file mode 100644
index 36d1012e9b..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/d_type/mul_013_by_013.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
-
-use openvm_algebra_circuit::Fp2;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-// Input: line0.b, line0.c, line1.b, line1.c <Fp2>: 2 x 4 field elements
-// Output: 5 Fp2 coefficients -> 10 field elements
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct EcLineMul013By013Chip<
-    F: PrimeField32,
-    const INPUT_BLOCKS: usize,
-    const OUTPUT_BLOCKS: usize,
-    const BLOCK_SIZE: usize,
->(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 2, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<
-        F: PrimeField32,
-        const INPUT_BLOCKS: usize,
-        const OUTPUT_BLOCKS: usize,
-        const BLOCK_SIZE: usize,
-    > EcLineMul013By013Chip<F, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        range_checker: SharedVariableRangeCheckerChip,
-        config: ExprBuilderConfig,
-        xi: [isize; 2],
-        offset: usize,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        assert!(
-            xi[0].unsigned_abs() < 1 << config.limb_bits,
-            "expect xi to be small"
-        ); // not a hard rule, but we expect xi to be small
-        assert!(
-            xi[1].unsigned_abs() < 1 << config.limb_bits,
-            "expect xi to be small"
-        );
-        let expr = mul_013_by_013_expr(config, range_checker.bus(), xi);
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![PairingOpcode::MUL_013_BY_013 as usize],
-            vec![],
-            range_checker,
-            "Mul013By013",
-            true,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-pub fn mul_013_by_013_expr(
-    config: ExprBuilderConfig,
-    range_bus: VariableRangeCheckerBus,
-    xi: [isize; 2],
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config.clone(), range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut b0 = Fp2::new(builder.clone());
-    let mut c0 = Fp2::new(builder.clone());
-    let mut b1 = Fp2::new(builder.clone());
-    let mut c1 = Fp2::new(builder.clone());
-
-    // where w⁶ = xi
-    // l0 * l1 = 1 + (b0 + b1)w + (b0b1)w² + (c0 + c1)w³ + (b0c1 + b1c0)w⁴ + (c0c1)w⁶
-    //         = (1 + c0c1 * xi) + (b0 + b1)w + (b0b1)w² + (c0 + c1)w³ + (b0c1 + b1c0)w⁴
-    let l0 = c0.mul(&mut c1).int_mul(xi).int_add([1, 0]);
-    let l1 = b0.add(&mut b1);
-    let l2 = b0.mul(&mut b1);
-    let l3 = c0.add(&mut c1);
-    let l4 = b0.mul(&mut c1).add(&mut b1.mul(&mut c0));
-
-    [l0, l1, l2, l3, l4].map(|mut l| l.save_output());
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/d_type/mul_by_01234.rs b/extensions/pairing/circuit/src/pairing_chip/line/d_type/mul_by_01234.rs
deleted file mode 100644
index 996372e994..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/d_type/mul_by_01234.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
-
-use openvm_algebra_circuit::Fp2;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::Rv32VecHeapTwoReadsAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-use crate::Fp12;
-
-// Input: Fp12 (12 field elements), [Fp2; 5] (5 x 2 field elements)
-// Output: Fp12 (12 field elements)
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct EcLineMulBy01234Chip<
-    F: PrimeField32,
-    const INPUT_BLOCKS1: usize,
-    const INPUT_BLOCKS2: usize,
-    const OUTPUT_BLOCKS: usize,
-    const BLOCK_SIZE: usize,
->(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapTwoReadsAdapterChip<
-            F,
-            INPUT_BLOCKS1,
-            INPUT_BLOCKS2,
-            OUTPUT_BLOCKS,
-            BLOCK_SIZE,
-            BLOCK_SIZE,
-        >,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<
-        F: PrimeField32,
-        const INPUT_BLOCKS1: usize,
-        const INPUT_BLOCKS2: usize,
-        const OUTPUT_BLOCKS: usize,
-        const BLOCK_SIZE: usize,
-    > EcLineMulBy01234Chip<F, INPUT_BLOCKS1, INPUT_BLOCKS2, OUTPUT_BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapTwoReadsAdapterChip<
-            F,
-            INPUT_BLOCKS1,
-            INPUT_BLOCKS2,
-            OUTPUT_BLOCKS,
-            BLOCK_SIZE,
-            BLOCK_SIZE,
-        >,
-        config: ExprBuilderConfig,
-        xi: [isize; 2],
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        assert!(
-            xi[0].unsigned_abs() < 1 << config.limb_bits,
-            "expect xi to be small"
-        ); // not a hard rule, but we expect xi to be small
-        assert!(
-            xi[1].unsigned_abs() < 1 << config.limb_bits,
-            "expect xi to be small"
-        );
-        let expr = mul_by_01234_expr(config, range_checker.bus(), xi);
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![PairingOpcode::MUL_BY_01234 as usize],
-            vec![],
-            range_checker.clone(),
-            "MulBy01234",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-pub fn mul_by_01234_expr(
-    config: ExprBuilderConfig,
-    range_bus: VariableRangeCheckerBus,
-    xi: [isize; 2],
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config.clone(), range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut f = Fp12::new(builder.clone());
-    let mut x0 = Fp2::new(builder.clone());
-    let mut x1 = Fp2::new(builder.clone());
-    let mut x2 = Fp2::new(builder.clone());
-    let mut x3 = Fp2::new(builder.clone());
-    let mut x4 = Fp2::new(builder.clone());
-
-    let mut r = f.mul_by_01234(&mut x0, &mut x1, &mut x2, &mut x3, &mut x4, xi);
-    r.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/d_type/tests.rs b/extensions/pairing/circuit/src/pairing_chip/line/d_type/tests.rs
deleted file mode 100644
index 81da3169fa..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/d_type/tests.rs
+++ /dev/null
@@ -1,287 +0,0 @@
-use halo2curves_axiom::{
-    bn256::{Fq, Fq12, Fq2, G1Affine},
-    ff::Field,
-};
-use openvm_circuit::arch::testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-};
-use openvm_ecc_guest::AffinePoint;
-use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-use openvm_mod_circuit_builder::{
-    test_utils::{
-        biguint_to_limbs, bn254_fq12_to_biguint_vec, bn254_fq2_to_biguint_vec, bn254_fq_to_biguint,
-    },
-    ExprBuilderConfig,
-};
-use openvm_pairing_guest::{
-    bn254::{BN254_LIMB_BITS, BN254_MODULUS, BN254_NUM_LIMBS, BN254_XI_ISIZE},
-    halo2curves_shims::bn254::{tangent_line_013, Bn254},
-    pairing::{Evaluatable, LineMulDType, UnevaluatedLine},
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::{
-    rv32_write_heap_default, rv32_write_heap_default_with_increment, Rv32VecHeapAdapterChip,
-    Rv32VecHeapTwoReadsAdapterChip,
-};
-use openvm_stark_backend::p3_field::FieldAlgebra;
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
-use rand::{rngs::StdRng, SeedableRng};
-
-use super::{super::EvaluateLineChip, *};
-
-type F = BabyBear;
-const NUM_LIMBS: usize = 32;
-const LIMB_BITS: usize = 8;
-const BLOCK_SIZE: usize = 32;
-
-#[test]
-fn test_mul_013_by_013() {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let adapter = Rv32VecHeapAdapterChip::<F, 2, 4, 10, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-    let mut chip = EcLineMul013By013Chip::new(
-        adapter,
-        tester.memory_controller().borrow().range_checker.clone(),
-        ExprBuilderConfig {
-            modulus: BN254_MODULUS.clone(),
-            num_limbs: NUM_LIMBS,
-            limb_bits: LIMB_BITS,
-        },
-        BN254_XI_ISIZE,
-        PairingOpcode::CLASS_OFFSET,
-        tester.offline_memory_mutex_arc(),
-    );
-
-    let mut rng0 = StdRng::seed_from_u64(8);
-    let mut rng1 = StdRng::seed_from_u64(95);
-    let rnd_pt_0 = G1Affine::random(&mut rng0);
-    let rnd_pt_1 = G1Affine::random(&mut rng1);
-    let ec_pt_0 = AffinePoint::<Fq> {
-        x: rnd_pt_0.x,
-        y: rnd_pt_0.y,
-    };
-    let ec_pt_1 = AffinePoint::<Fq> {
-        x: rnd_pt_1.x,
-        y: rnd_pt_1.y,
-    };
-    let line0 = tangent_line_013::<Fq, Fq2>(ec_pt_0);
-    let line1 = tangent_line_013::<Fq, Fq2>(ec_pt_1);
-    let input_line0 = [
-        bn254_fq2_to_biguint_vec(line0.b),
-        bn254_fq2_to_biguint_vec(line0.c),
-    ]
-    .concat();
-    let input_line1 = [
-        bn254_fq2_to_biguint_vec(line1.b),
-        bn254_fq2_to_biguint_vec(line1.c),
-    ]
-    .concat();
-
-    let vars = chip
-        .0
-        .core
-        .expr()
-        .execute([input_line0.clone(), input_line1.clone()].concat(), vec![]);
-    let output_indices = chip.0.core.expr().builder.output_indices.clone();
-    let output = output_indices
-        .iter()
-        .map(|i| vars[*i].clone())
-        .collect::<Vec<_>>();
-    assert_eq!(output.len(), 10);
-
-    let r_cmp = Bn254::mul_013_by_013(&line0, &line1);
-    let r_cmp_bigint = r_cmp
-        .map(|x| [bn254_fq_to_biguint(x.c0), bn254_fq_to_biguint(x.c1)])
-        .concat();
-
-    for i in 0..10 {
-        assert_eq!(output[i], r_cmp_bigint[i]);
-    }
-
-    let input_line0_limbs = input_line0
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<_>>();
-    let input_line1_limbs = input_line1
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<_>>();
-
-    let instruction = rv32_write_heap_default(
-        &mut tester,
-        input_line0_limbs,
-        input_line1_limbs,
-        chip.0.core.air.offset + PairingOpcode::MUL_013_BY_013 as usize,
-    );
-
-    tester.execute(&mut chip, &instruction);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
-
-#[test]
-fn test_mul_by_01234() {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let adapter = Rv32VecHeapTwoReadsAdapterChip::<F, 12, 10, 12, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-    let mut chip = EcLineMulBy01234Chip::new(
-        adapter,
-        ExprBuilderConfig {
-            modulus: BN254_MODULUS.clone(),
-            num_limbs: NUM_LIMBS,
-            limb_bits: LIMB_BITS,
-        },
-        BN254_XI_ISIZE,
-        PairingOpcode::CLASS_OFFSET,
-        tester.range_checker(),
-        tester.offline_memory_mutex_arc(),
-    );
-
-    let mut rng = StdRng::seed_from_u64(8);
-    let f = Fq12::random(&mut rng);
-    let x0 = Fq2::random(&mut rng);
-    let x1 = Fq2::random(&mut rng);
-    let x2 = Fq2::random(&mut rng);
-    let x3 = Fq2::random(&mut rng);
-    let x4 = Fq2::random(&mut rng);
-
-    let input_f = bn254_fq12_to_biguint_vec(f);
-    let input_x = [
-        bn254_fq2_to_biguint_vec(x0),
-        bn254_fq2_to_biguint_vec(x1),
-        bn254_fq2_to_biguint_vec(x2),
-        bn254_fq2_to_biguint_vec(x3),
-        bn254_fq2_to_biguint_vec(x4),
-    ]
-    .concat();
-
-    let vars = chip
-        .0
-        .core
-        .expr()
-        .execute([input_f.clone(), input_x.clone()].concat(), vec![]);
-    let output_indices = chip.0.core.expr().builder.output_indices.clone();
-    let output = output_indices
-        .iter()
-        .map(|i| vars[*i].clone())
-        .collect::<Vec<_>>();
-    assert_eq!(output.len(), 12);
-
-    let r_cmp = Bn254::mul_by_01234(&f, &[x0, x1, x2, x3, x4]);
-    let r_cmp_bigint = bn254_fq12_to_biguint_vec(r_cmp);
-
-    for i in 0..12 {
-        assert_eq!(output[i], r_cmp_bigint[i]);
-    }
-
-    let input_f_limbs = input_f
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<_>>();
-    let input_x_limbs = input_x
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<_>>();
-
-    let instruction = rv32_write_heap_default_with_increment(
-        &mut tester,
-        input_f_limbs,
-        input_x_limbs,
-        512,
-        chip.0.core.air.offset + PairingOpcode::MUL_BY_01234 as usize,
-    );
-
-    tester.execute(&mut chip, &instruction);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
-
-#[test]
-fn test_evaluate_line() {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let config = ExprBuilderConfig {
-        modulus: BN254_MODULUS.clone(),
-        limb_bits: BN254_LIMB_BITS,
-        num_limbs: BN254_NUM_LIMBS,
-    };
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let adapter = Rv32VecHeapTwoReadsAdapterChip::<F, 4, 2, 4, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-    let mut chip = EvaluateLineChip::new(
-        adapter,
-        config,
-        PairingOpcode::CLASS_OFFSET,
-        tester.range_checker(),
-        tester.offline_memory_mutex_arc(),
-    );
-
-    let mut rng = StdRng::seed_from_u64(42);
-    let uneval_b = Fq2::random(&mut rng);
-    let uneval_c = Fq2::random(&mut rng);
-    let x_over_y = Fq::random(&mut rng);
-    let y_inv = Fq::random(&mut rng);
-    let mut inputs = vec![];
-    inputs.extend(bn254_fq2_to_biguint_vec(uneval_b));
-    inputs.extend(bn254_fq2_to_biguint_vec(uneval_c));
-    inputs.push(bn254_fq_to_biguint(x_over_y));
-    inputs.push(bn254_fq_to_biguint(y_inv));
-    let input_limbs = inputs
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect();
-
-    let uneval: UnevaluatedLine<Fq2> = UnevaluatedLine {
-        b: uneval_b,
-        c: uneval_c,
-    };
-    let evaluated = uneval.evaluate(&(x_over_y, y_inv));
-
-    let result = chip.0.core.expr().execute_with_output(inputs, vec![]);
-    assert_eq!(result.len(), 4);
-    assert_eq!(result[0], bn254_fq_to_biguint(evaluated.b.c0));
-    assert_eq!(result[1], bn254_fq_to_biguint(evaluated.b.c1));
-    assert_eq!(result[2], bn254_fq_to_biguint(evaluated.c.c0));
-    assert_eq!(result[3], bn254_fq_to_biguint(evaluated.c.c1));
-
-    let instruction = rv32_write_heap_default(
-        &mut tester,
-        input_limbs,
-        vec![],
-        chip.0.core.air.offset + PairingOpcode::EVALUATE_LINE as usize,
-    );
-
-    tester.execute(&mut chip, &instruction);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/evaluate_line.rs b/extensions/pairing/circuit/src/pairing_chip/line/evaluate_line.rs
deleted file mode 100644
index dc0a8cdfe1..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/evaluate_line.rs
+++ /dev/null
@@ -1,102 +0,0 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
-
-use openvm_algebra_circuit::Fp2;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::Rv32VecHeapTwoReadsAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-// Input: UnevaluatedLine<Fp2>, (Fp, Fp)
-// Output: EvaluatedLine<Fp2>
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct EvaluateLineChip<
-    F: PrimeField32,
-    const INPUT_BLOCKS1: usize,
-    const INPUT_BLOCKS2: usize,
-    const OUTPUT_BLOCKS: usize,
-    const BLOCK_SIZE: usize,
->(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapTwoReadsAdapterChip<
-            F,
-            INPUT_BLOCKS1,
-            INPUT_BLOCKS2,
-            OUTPUT_BLOCKS,
-            BLOCK_SIZE,
-            BLOCK_SIZE,
-        >,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<
-        F: PrimeField32,
-        const INPUT_BLOCKS1: usize,
-        const INPUT_BLOCKS2: usize,
-        const OUTPUT_BLOCKS: usize,
-        const BLOCK_SIZE: usize,
-    > EvaluateLineChip<F, INPUT_BLOCKS1, INPUT_BLOCKS2, OUTPUT_BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapTwoReadsAdapterChip<
-            F,
-            INPUT_BLOCKS1,
-            INPUT_BLOCKS2,
-            OUTPUT_BLOCKS,
-            BLOCK_SIZE,
-            BLOCK_SIZE,
-        >,
-        config: ExprBuilderConfig,
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let expr = evaluate_line_expr(config, range_checker.bus());
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![PairingOpcode::EVALUATE_LINE as usize],
-            vec![],
-            range_checker,
-            "EvaluateLine",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-pub fn evaluate_line_expr(
-    config: ExprBuilderConfig,
-    range_bus: VariableRangeCheckerBus,
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut uneval_b = Fp2::new(builder.clone());
-    let mut uneval_c = Fp2::new(builder.clone());
-
-    let mut x_over_y = ExprBuilder::new_input(builder.clone());
-    let mut y_inv = ExprBuilder::new_input(builder.clone());
-
-    let mut b = uneval_b.scalar_mul(&mut x_over_y);
-    let mut c = uneval_c.scalar_mul(&mut y_inv);
-    b.save_output();
-    c.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/m_type/mod.rs b/extensions/pairing/circuit/src/pairing_chip/line/m_type/mod.rs
deleted file mode 100644
index b454d260ce..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/m_type/mod.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-mod mul_023_by_023;
-mod mul_by_02345;
-
-pub use mul_023_by_023::*;
-pub use mul_by_02345::*;
-
-#[cfg(test)]
-mod tests;
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/m_type/mul_023_by_023.rs b/extensions/pairing/circuit/src/pairing_chip/line/m_type/mul_023_by_023.rs
deleted file mode 100644
index 0d760b886e..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/m_type/mul_023_by_023.rs
+++ /dev/null
@@ -1,101 +0,0 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
-
-use openvm_algebra_circuit::Fp2;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-// Input: line0.b, line0.c, line1.b, line1.c <Fp2>: 2 x 4 field elements
-// Output: 5 Fp2 coefficients -> 10 field elements
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct EcLineMul023By023Chip<
-    F: PrimeField32,
-    const INPUT_BLOCKS: usize,
-    const OUTPUT_BLOCKS: usize,
-    const BLOCK_SIZE: usize,
->(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 2, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<
-        F: PrimeField32,
-        const INPUT_BLOCKS: usize,
-        const OUTPUT_BLOCKS: usize,
-        const BLOCK_SIZE: usize,
-    > EcLineMul023By023Chip<F, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        range_checker: SharedVariableRangeCheckerChip,
-        config: ExprBuilderConfig,
-        xi: [isize; 2],
-        offset: usize,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        assert!(
-            xi[0].unsigned_abs() < 1 << config.limb_bits,
-            "expect xi to be small"
-        ); // not a hard rule, but we expect xi to be small
-        assert!(
-            xi[1].unsigned_abs() < 1 << config.limb_bits,
-            "expect xi to be small"
-        );
-        let expr = mul_023_by_023_expr(config, range_checker.bus(), xi);
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![PairingOpcode::MUL_023_BY_023 as usize],
-            vec![],
-            range_checker,
-            "Mul023By023",
-            true,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-pub fn mul_023_by_023_expr(
-    config: ExprBuilderConfig,
-    range_bus: VariableRangeCheckerBus,
-    xi: [isize; 2],
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config.clone(), range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut b0 = Fp2::new(builder.clone()); // x2
-    let mut c0 = Fp2::new(builder.clone()); // x3
-    let mut b1 = Fp2::new(builder.clone()); // y2
-    let mut c1 = Fp2::new(builder.clone()); // y3
-
-    // where w⁶ = xi
-    // l0 * l1 = c0c1 + (c0b1 + c1b0)w² + (c0 + c1)w³ + (b0b1)w⁴ + (b0 +b1)w⁵ + w⁶
-    //         = (c0c1 + xi) + (c0b1 + c1b0)w² + (c0 + c1)w³ + (b0b1)w⁴ + (b0 + b1)w⁵
-    let l0 = c0.mul(&mut c1).int_add(xi);
-    let l2 = c0.mul(&mut b1).add(&mut c1.mul(&mut b0));
-    let l3 = c0.add(&mut c1);
-    let l4 = b0.mul(&mut b1);
-    let l5 = b0.add(&mut b1);
-
-    [l0, l2, l3, l4, l5].map(|mut l| l.save_output());
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/m_type/mul_by_02345.rs b/extensions/pairing/circuit/src/pairing_chip/line/m_type/mul_by_02345.rs
deleted file mode 100644
index ad0e91e7bd..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/m_type/mul_by_02345.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
-
-use openvm_algebra_circuit::Fp2;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::Rv32VecHeapTwoReadsAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-use crate::Fp12;
-
-// Input: 2 Fp12: 2 x 12 field elements
-// Output: Fp12 -> 12 field elements
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct EcLineMulBy02345Chip<
-    F: PrimeField32,
-    const INPUT_BLOCKS1: usize,
-    const INPUT_BLOCKS2: usize,
-    const OUTPUT_BLOCKS: usize,
-    const BLOCK_SIZE: usize,
->(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapTwoReadsAdapterChip<
-            F,
-            INPUT_BLOCKS1,
-            INPUT_BLOCKS2,
-            OUTPUT_BLOCKS,
-            BLOCK_SIZE,
-            BLOCK_SIZE,
-        >,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<
-        F: PrimeField32,
-        const INPUT_BLOCKS1: usize,
-        const INPUT_BLOCKS2: usize,
-        const OUTPUT_BLOCKS: usize,
-        const BLOCK_SIZE: usize,
-    > EcLineMulBy02345Chip<F, INPUT_BLOCKS1, INPUT_BLOCKS2, OUTPUT_BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapTwoReadsAdapterChip<
-            F,
-            INPUT_BLOCKS1,
-            INPUT_BLOCKS2,
-            OUTPUT_BLOCKS,
-            BLOCK_SIZE,
-            BLOCK_SIZE,
-        >,
-        range_checker: SharedVariableRangeCheckerChip,
-        config: ExprBuilderConfig,
-        xi: [isize; 2],
-        offset: usize,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        assert!(
-            xi[0].unsigned_abs() < 1 << config.limb_bits,
-            "expect xi to be small"
-        ); // not a hard rule, but we expect xi to be small
-        assert!(
-            xi[1].unsigned_abs() < 1 << config.limb_bits,
-            "expect xi to be small"
-        );
-        let expr = mul_by_02345_expr(config, range_checker.bus(), xi);
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![PairingOpcode::MUL_BY_02345 as usize],
-            vec![],
-            range_checker,
-            "MulBy02345",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-pub fn mul_by_02345_expr(
-    config: ExprBuilderConfig,
-    range_bus: VariableRangeCheckerBus,
-    xi: [isize; 2],
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config.clone(), range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut f = Fp12::new(builder.clone());
-    let mut x0 = Fp2::new(builder.clone());
-    let mut x2 = Fp2::new(builder.clone());
-    let mut x3 = Fp2::new(builder.clone());
-    let mut x4 = Fp2::new(builder.clone());
-    let mut x5 = Fp2::new(builder.clone());
-
-    let mut r = f.mul_by_02345(&mut x0, &mut x2, &mut x3, &mut x4, &mut x5, xi);
-    r.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/m_type/tests.rs b/extensions/pairing/circuit/src/pairing_chip/line/m_type/tests.rs
deleted file mode 100644
index 4331d2278e..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/m_type/tests.rs
+++ /dev/null
@@ -1,217 +0,0 @@
-use halo2curves_axiom::{
-    bls12_381::{Fq, Fq12, Fq2, G1Affine},
-    ff::Field,
-};
-use openvm_circuit::arch::testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-};
-use openvm_ecc_guest::AffinePoint;
-use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-use openvm_mod_circuit_builder::{test_utils::*, ExprBuilderConfig};
-use openvm_pairing_guest::{
-    bls12_381::{BLS12_381_LIMB_BITS, BLS12_381_MODULUS, BLS12_381_NUM_LIMBS, BLS12_381_XI_ISIZE},
-    halo2curves_shims::bls12_381::{tangent_line_023, Bls12_381},
-    pairing::LineMulMType,
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::{
-    rv32_write_heap_default_with_increment, Rv32VecHeapAdapterChip, Rv32VecHeapTwoReadsAdapterChip,
-};
-use openvm_stark_backend::p3_field::FieldAlgebra;
-use openvm_stark_sdk::p3_baby_bear::BabyBear;
-use rand::{rngs::StdRng, SeedableRng};
-
-use super::*;
-
-type F = BabyBear;
-const NUM_LIMBS: usize = 48;
-const LIMB_BITS: usize = 8;
-const BLOCK_SIZE: usize = 16;
-
-#[test]
-fn test_mul_023_by_023() {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let adapter = Rv32VecHeapAdapterChip::<F, 2, 12, 30, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-    let mut chip = EcLineMul023By023Chip::new(
-        adapter,
-        tester.memory_controller().borrow().range_checker.clone(),
-        ExprBuilderConfig {
-            modulus: BLS12_381_MODULUS.clone(),
-            num_limbs: BLS12_381_NUM_LIMBS,
-            limb_bits: BLS12_381_LIMB_BITS,
-        },
-        BLS12_381_XI_ISIZE,
-        PairingOpcode::CLASS_OFFSET,
-        tester.offline_memory_mutex_arc(),
-    );
-
-    let mut rng0 = StdRng::seed_from_u64(15);
-    let mut rng1 = StdRng::seed_from_u64(95);
-    let rnd_pt_0 = G1Affine::random(&mut rng0);
-    let rnd_pt_1 = G1Affine::random(&mut rng1);
-    let ec_pt_0 = AffinePoint::<Fq> {
-        x: rnd_pt_0.x,
-        y: rnd_pt_0.y,
-    };
-    let ec_pt_1 = AffinePoint::<Fq> {
-        x: rnd_pt_1.x,
-        y: rnd_pt_1.y,
-    };
-    let line0 = tangent_line_023::<Fq, Fq2>(ec_pt_0);
-    let line1 = tangent_line_023::<Fq, Fq2>(ec_pt_1);
-    let input_line0 = [
-        bls12381_fq2_to_biguint_vec(line0.b),
-        bls12381_fq2_to_biguint_vec(line0.c),
-    ]
-    .concat();
-    let input_line1 = [
-        bls12381_fq2_to_biguint_vec(line1.b),
-        bls12381_fq2_to_biguint_vec(line1.c),
-    ]
-    .concat();
-
-    let vars = chip
-        .0
-        .core
-        .expr()
-        .execute([input_line0.clone(), input_line1.clone()].concat(), vec![]);
-    let output_indices = chip.0.core.expr().builder.output_indices.clone();
-    let output = output_indices
-        .iter()
-        .map(|i| vars[*i].clone())
-        .collect::<Vec<_>>();
-    assert_eq!(output.len(), 10);
-
-    let r_cmp = Bls12_381::mul_023_by_023(&line0, &line1);
-    let r_cmp_bigint = r_cmp
-        .map(|x| [bls12381_fq_to_biguint(x.c0), bls12381_fq_to_biguint(x.c1)])
-        .concat();
-
-    for i in 0..10 {
-        assert_eq!(output[i], r_cmp_bigint[i]);
-    }
-
-    let input_line0_limbs = input_line0
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<_>>();
-    let input_line1_limbs = input_line1
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<_>>();
-
-    let instruction = rv32_write_heap_default_with_increment(
-        &mut tester,
-        input_line0_limbs,
-        input_line1_limbs,
-        512,
-        chip.0.core.air.offset + PairingOpcode::MUL_023_BY_023 as usize,
-    );
-
-    tester.execute(&mut chip, &instruction);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
-
-// NOTE[yj]: this test requires `RUST_MIN_STACK=8388608` to run otherwise it will overflow the stack
-#[test]
-#[ignore]
-fn test_mul_by_02345() {
-    let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let adapter = Rv32VecHeapTwoReadsAdapterChip::<F, 36, 30, 36, BLOCK_SIZE, BLOCK_SIZE>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        bitwise_chip.clone(),
-    );
-    let mut chip = EcLineMulBy02345Chip::new(
-        adapter,
-        tester.memory_controller().borrow().range_checker.clone(),
-        ExprBuilderConfig {
-            modulus: BLS12_381_MODULUS.clone(),
-            num_limbs: BLS12_381_NUM_LIMBS,
-            limb_bits: BLS12_381_LIMB_BITS,
-        },
-        BLS12_381_XI_ISIZE,
-        PairingOpcode::CLASS_OFFSET,
-        tester.offline_memory_mutex_arc(),
-    );
-
-    let mut rng = StdRng::seed_from_u64(19);
-    let f = Fq12::random(&mut rng);
-    let x0 = Fq2::random(&mut rng);
-    let x2 = Fq2::random(&mut rng);
-    let x3 = Fq2::random(&mut rng);
-    let x4 = Fq2::random(&mut rng);
-    let x5 = Fq2::random(&mut rng);
-
-    let input_f = bls12381_fq12_to_biguint_vec(f);
-    let input_x = [
-        bls12381_fq2_to_biguint_vec(x0),
-        bls12381_fq2_to_biguint_vec(x2),
-        bls12381_fq2_to_biguint_vec(x3),
-        bls12381_fq2_to_biguint_vec(x4),
-        bls12381_fq2_to_biguint_vec(x5),
-    ]
-    .concat();
-
-    let vars = chip
-        .0
-        .core
-        .expr()
-        .execute([input_f.clone(), input_x.clone()].concat(), vec![]);
-    let output_indices = chip.0.core.expr().builder.output_indices.clone();
-    let output = output_indices
-        .iter()
-        .map(|i| vars[*i].clone())
-        .collect::<Vec<_>>();
-    assert_eq!(output.len(), 12);
-
-    let r_cmp = Bls12_381::mul_by_02345(&f, &[x0, x2, x3, x4, x5]);
-    let r_cmp_bigint = bls12381_fq12_to_biguint_vec(r_cmp);
-
-    for i in 0..12 {
-        assert_eq!(output[i], r_cmp_bigint[i]);
-    }
-
-    let input_f_limbs = input_f
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<_>>();
-    let input_x_limbs = input_x
-        .iter()
-        .map(|x| {
-            biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS).map(BabyBear::from_canonical_u32)
-        })
-        .collect::<Vec<_>>();
-
-    let instruction = rv32_write_heap_default_with_increment(
-        &mut tester,
-        input_f_limbs,
-        input_x_limbs,
-        1024,
-        chip.0.core.air.offset + PairingOpcode::MUL_BY_02345 as usize,
-    );
-
-    tester.execute(&mut chip, &instruction);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test().expect("Verification failed");
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/line/mod.rs b/extensions/pairing/circuit/src/pairing_chip/line/mod.rs
deleted file mode 100644
index acf02c72be..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/line/mod.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-mod d_type;
-mod evaluate_line;
-mod m_type;
-
-pub use d_type::*;
-pub use evaluate_line::*;
-pub use m_type::*;
diff --git a/extensions/pairing/circuit/src/pairing_chip/miller_double_and_add_step.rs b/extensions/pairing/circuit/src/pairing_chip/miller_double_and_add_step.rs
deleted file mode 100644
index 77084428c9..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/miller_double_and_add_step.rs
+++ /dev/null
@@ -1,215 +0,0 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
-
-use openvm_algebra_circuit::Fp2;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-// Input: two AffinePoint<Fp2>: 4 field elements each
-// Output: (AffinePoint<Fp2>, UnevaluatedLine<Fp2>, UnevaluatedLine<Fp2>) -> 2*2 + 2*2 + 2*2 = 12
-// field elements
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct MillerDoubleAndAddStepChip<
-    F: PrimeField32,
-    const INPUT_BLOCKS: usize,
-    const OUTPUT_BLOCKS: usize,
-    const BLOCK_SIZE: usize,
->(
-    pub  VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 2, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<
-        F: PrimeField32,
-        const INPUT_BLOCKS: usize,
-        const OUTPUT_BLOCKS: usize,
-        const BLOCK_SIZE: usize,
-    > MillerDoubleAndAddStepChip<F, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 2, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        config: ExprBuilderConfig,
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let expr = miller_double_and_add_step_expr(config, range_checker.bus());
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![PairingOpcode::MILLER_DOUBLE_AND_ADD_STEP as usize],
-            vec![],
-            range_checker,
-            "MillerDoubleAndAddStep",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-// Ref: openvm_pairing_guest::miller_step
-pub fn miller_double_and_add_step_expr(
-    config: ExprBuilderConfig,
-    range_bus: VariableRangeCheckerBus,
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut x_s = Fp2::new(builder.clone());
-    let mut y_s = Fp2::new(builder.clone());
-    let mut x_q = Fp2::new(builder.clone());
-    let mut y_q = Fp2::new(builder.clone());
-
-    // λ1 = (y_s - y_q) / (x_s - x_q)
-    let mut lambda1 = y_s.sub(&mut y_q).div(&mut x_s.sub(&mut x_q));
-    let mut x_sq = lambda1.square().sub(&mut x_s).sub(&mut x_q);
-    // λ2 = -λ1 - 2y_s / (x_{s+q} - x_s)
-    let mut lambda2 = lambda1
-        .neg()
-        .sub(&mut y_s.int_mul([2, 0]).div(&mut x_sq.sub(&mut x_s)));
-    let mut x_sqs = lambda2.square().sub(&mut x_s).sub(&mut x_sq);
-    let mut y_sqs = lambda2.mul(&mut (x_s.sub(&mut x_sqs))).sub(&mut y_s);
-
-    x_sqs.save_output();
-    y_sqs.save_output();
-
-    let mut b0 = lambda1.neg();
-    let mut c0 = lambda1.mul(&mut x_s).sub(&mut y_s);
-    b0.save_output();
-    c0.save_output();
-
-    let mut b1 = lambda2.neg();
-    let mut c1 = lambda2.mul(&mut x_s).sub(&mut y_s);
-    b1.save_output();
-    c1.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
-
-#[cfg(test)]
-mod tests {
-    use halo2curves_axiom::bn256::G2Affine;
-    use openvm_circuit::arch::testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
-    use openvm_circuit_primitives::bitwise_op_lookup::{
-        BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-    };
-    use openvm_ecc_guest::AffinePoint;
-    use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-    use openvm_mod_circuit_builder::test_utils::{biguint_to_limbs, bn254_fq_to_biguint};
-    use openvm_pairing_guest::{
-        bn254::BN254_MODULUS, halo2curves_shims::bn254::Bn254, pairing::MillerStep,
-    };
-    use openvm_pairing_transpiler::PairingOpcode;
-    use openvm_rv32_adapters::{rv32_write_heap_default, Rv32VecHeapAdapterChip};
-    use openvm_stark_backend::p3_field::FieldAlgebra;
-    use openvm_stark_sdk::p3_baby_bear::BabyBear;
-    use rand::{rngs::StdRng, SeedableRng};
-
-    use super::*;
-
-    type F = BabyBear;
-    const NUM_LIMBS: usize = 32;
-    const LIMB_BITS: usize = 8;
-    const BLOCK_SIZE: usize = 32;
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_miller_double_and_add() {
-        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-        let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-        let adapter = Rv32VecHeapAdapterChip::<F, 2, 4, 12, BLOCK_SIZE, BLOCK_SIZE>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        );
-        let mut chip = MillerDoubleAndAddStepChip::new(
-            adapter,
-            ExprBuilderConfig {
-                modulus: BN254_MODULUS.clone(),
-                limb_bits: LIMB_BITS,
-                num_limbs: NUM_LIMBS,
-            },
-            PairingOpcode::CLASS_OFFSET,
-            tester.range_checker(),
-            tester.offline_memory_mutex_arc(),
-        );
-
-        let mut rng0 = StdRng::seed_from_u64(2);
-        let Q = G2Affine::random(&mut rng0);
-        let Q2 = G2Affine::random(&mut rng0);
-        let inputs = [
-            Q.x.c0, Q.x.c1, Q.y.c0, Q.y.c1, Q2.x.c0, Q2.x.c1, Q2.y.c0, Q2.y.c1,
-        ]
-        .map(bn254_fq_to_biguint);
-
-        let Q_ecpoint = AffinePoint { x: Q.x, y: Q.y };
-        let Q_ecpoint2 = AffinePoint { x: Q2.x, y: Q2.y };
-        let (Q_daa, l_qa, l_sqs) = Bn254::miller_double_and_add_step(&Q_ecpoint, &Q_ecpoint2);
-        let result = chip
-            .0
-            .core
-            .expr()
-            .execute_with_output(inputs.to_vec(), vec![]);
-        assert_eq!(result.len(), 12); // AffinePoint<Fp2> and 4 Fp2 coefficients
-        assert_eq!(result[0], bn254_fq_to_biguint(Q_daa.x.c0));
-        assert_eq!(result[1], bn254_fq_to_biguint(Q_daa.x.c1));
-        assert_eq!(result[2], bn254_fq_to_biguint(Q_daa.y.c0));
-        assert_eq!(result[3], bn254_fq_to_biguint(Q_daa.y.c1));
-        assert_eq!(result[4], bn254_fq_to_biguint(l_qa.b.c0));
-        assert_eq!(result[5], bn254_fq_to_biguint(l_qa.b.c1));
-        assert_eq!(result[6], bn254_fq_to_biguint(l_qa.c.c0));
-        assert_eq!(result[7], bn254_fq_to_biguint(l_qa.c.c1));
-        assert_eq!(result[8], bn254_fq_to_biguint(l_sqs.b.c0));
-        assert_eq!(result[9], bn254_fq_to_biguint(l_sqs.b.c1));
-        assert_eq!(result[10], bn254_fq_to_biguint(l_sqs.c.c0));
-        assert_eq!(result[11], bn254_fq_to_biguint(l_sqs.c.c1));
-
-        let input1_limbs = inputs[0..4]
-            .iter()
-            .map(|x| {
-                biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS)
-                    .map(BabyBear::from_canonical_u32)
-            })
-            .collect::<Vec<_>>();
-
-        let input2_limbs = inputs[4..8]
-            .iter()
-            .map(|x| {
-                biguint_to_limbs::<NUM_LIMBS>(x.clone(), LIMB_BITS)
-                    .map(BabyBear::from_canonical_u32)
-            })
-            .collect::<Vec<_>>();
-
-        let instruction = rv32_write_heap_default(
-            &mut tester,
-            input1_limbs,
-            input2_limbs,
-            chip.0.core.air.offset + PairingOpcode::MILLER_DOUBLE_AND_ADD_STEP as usize,
-        );
-
-        tester.execute(&mut chip, &instruction);
-        let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-        tester.simple_test().expect("Verification failed");
-    }
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/miller_double_step.rs b/extensions/pairing/circuit/src/pairing_chip/miller_double_step.rs
deleted file mode 100644
index 519eb473a5..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/miller_double_step.rs
+++ /dev/null
@@ -1,253 +0,0 @@
-use std::{
-    cell::RefCell,
-    rc::Rc,
-    sync::{Arc, Mutex},
-};
-
-use openvm_algebra_circuit::Fp2;
-use openvm_circuit::{arch::VmChipWrapper, system::memory::OfflineMemory};
-use openvm_circuit_derive::InstructionExecutor;
-use openvm_circuit_primitives::var_range::{
-    SharedVariableRangeCheckerChip, VariableRangeCheckerBus,
-};
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
-use openvm_mod_circuit_builder::{
-    ExprBuilder, ExprBuilderConfig, FieldExpr, FieldExpressionCoreChip,
-};
-use openvm_pairing_transpiler::PairingOpcode;
-use openvm_rv32_adapters::Rv32VecHeapAdapterChip;
-use openvm_stark_backend::p3_field::PrimeField32;
-
-// Input: AffinePoint<Fp2>: 4 field elements
-// Output: (AffinePoint<Fp2>, Fp2, Fp2) -> 8 field elements
-#[derive(Chip, ChipUsageGetter, InstructionExecutor)]
-pub struct MillerDoubleStepChip<
-    F: PrimeField32,
-    const INPUT_BLOCKS: usize,
-    const OUTPUT_BLOCKS: usize,
-    const BLOCK_SIZE: usize,
->(
-    VmChipWrapper<
-        F,
-        Rv32VecHeapAdapterChip<F, 1, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        FieldExpressionCoreChip,
-    >,
-);
-
-impl<
-        F: PrimeField32,
-        const INPUT_BLOCKS: usize,
-        const OUTPUT_BLOCKS: usize,
-        const BLOCK_SIZE: usize,
-    > MillerDoubleStepChip<F, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE>
-{
-    pub fn new(
-        adapter: Rv32VecHeapAdapterChip<F, 1, INPUT_BLOCKS, OUTPUT_BLOCKS, BLOCK_SIZE, BLOCK_SIZE>,
-        config: ExprBuilderConfig,
-        offset: usize,
-        range_checker: SharedVariableRangeCheckerChip,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    ) -> Self {
-        let expr = miller_double_step_expr(config, range_checker.bus());
-        let core = FieldExpressionCoreChip::new(
-            expr,
-            offset,
-            vec![PairingOpcode::MILLER_DOUBLE_STEP as usize],
-            vec![],
-            range_checker,
-            "MillerDoubleStep",
-            false,
-        );
-        Self(VmChipWrapper::new(adapter, core, offline_memory))
-    }
-}
-
-// Ref: https://github.com/openvm-org/openvm/blob/f7d6fa7b8ef247e579740eb652fcdf5a04259c28/lib/ecc-execution/src/common/miller_step.rs#L7
-pub fn miller_double_step_expr(
-    config: ExprBuilderConfig,
-    range_bus: VariableRangeCheckerBus,
-) -> FieldExpr {
-    config.check_valid();
-    let builder = ExprBuilder::new(config, range_bus.range_max_bits);
-    let builder = Rc::new(RefCell::new(builder));
-
-    let mut x_s = Fp2::new(builder.clone());
-    let mut y_s = Fp2::new(builder.clone());
-
-    let mut three_x_square = x_s.square().int_mul([3, 0]);
-    let mut lambda = three_x_square.div(&mut y_s.int_mul([2, 0]));
-    let mut x_2s = lambda.square().sub(&mut x_s.int_mul([2, 0]));
-    let mut y_2s = lambda.mul(&mut (x_s.sub(&mut x_2s))).sub(&mut y_s);
-    x_2s.save_output();
-    y_2s.save_output();
-
-    let mut b = lambda.neg();
-    let mut c = lambda.mul(&mut x_s).sub(&mut y_s);
-    b.save_output();
-    c.save_output();
-
-    let builder = builder.borrow().clone();
-    FieldExpr::new(builder, range_bus, false)
-}
-
-#[cfg(test)]
-mod tests {
-    use openvm_circuit::arch::testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
-    use openvm_circuit_primitives::bitwise_op_lookup::{
-        BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-    };
-    use openvm_ecc_guest::AffinePoint;
-    use openvm_instructions::{riscv::RV32_CELL_BITS, LocalOpcode};
-    use openvm_mod_circuit_builder::test_utils::{
-        biguint_to_limbs, bls12381_fq_to_biguint, bn254_fq_to_biguint,
-    };
-    use openvm_pairing_guest::{
-        bls12_381::{BLS12_381_LIMB_BITS, BLS12_381_MODULUS, BLS12_381_NUM_LIMBS},
-        bn254::{BN254_LIMB_BITS, BN254_MODULUS, BN254_NUM_LIMBS},
-        halo2curves_shims::{bls12_381::Bls12_381, bn254::Bn254},
-        pairing::MillerStep,
-    };
-    use openvm_pairing_transpiler::PairingOpcode;
-    use openvm_rv32_adapters::{rv32_write_heap_default, Rv32VecHeapAdapterChip};
-    use openvm_stark_backend::p3_field::FieldAlgebra;
-    use openvm_stark_sdk::p3_baby_bear::BabyBear;
-    use rand::{rngs::StdRng, SeedableRng};
-
-    use super::*;
-
-    type F = BabyBear;
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_miller_double_bn254() {
-        use halo2curves_axiom::bn256::G2Affine;
-        const NUM_LIMBS: usize = 32;
-        const LIMB_BITS: usize = 8;
-        const BLOCK_SIZE: usize = 32;
-
-        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-        let config = ExprBuilderConfig {
-            modulus: BN254_MODULUS.clone(),
-            limb_bits: BN254_LIMB_BITS,
-            num_limbs: BN254_NUM_LIMBS,
-        };
-        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-        let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-        let adapter = Rv32VecHeapAdapterChip::<F, 1, 4, 8, BLOCK_SIZE, BLOCK_SIZE>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        );
-        let mut chip = MillerDoubleStepChip::new(
-            adapter,
-            config,
-            PairingOpcode::CLASS_OFFSET,
-            tester.range_checker(),
-            tester.offline_memory_mutex_arc(),
-        );
-
-        let mut rng0 = StdRng::seed_from_u64(2);
-        let Q = G2Affine::random(&mut rng0);
-        let inputs = [Q.x.c0, Q.x.c1, Q.y.c0, Q.y.c1].map(bn254_fq_to_biguint);
-
-        let Q_ecpoint = AffinePoint { x: Q.x, y: Q.y };
-        let (Q_acc_init, l_init) = Bn254::miller_double_step(&Q_ecpoint);
-        let result = chip
-            .0
-            .core
-            .expr()
-            .execute_with_output(inputs.to_vec(), vec![]);
-        assert_eq!(result.len(), 8); // AffinePoint<Fp2> and two Fp2 coefficients
-        assert_eq!(result[0], bn254_fq_to_biguint(Q_acc_init.x.c0));
-        assert_eq!(result[1], bn254_fq_to_biguint(Q_acc_init.x.c1));
-        assert_eq!(result[2], bn254_fq_to_biguint(Q_acc_init.y.c0));
-        assert_eq!(result[3], bn254_fq_to_biguint(Q_acc_init.y.c1));
-        assert_eq!(result[4], bn254_fq_to_biguint(l_init.b.c0));
-        assert_eq!(result[5], bn254_fq_to_biguint(l_init.b.c1));
-        assert_eq!(result[6], bn254_fq_to_biguint(l_init.c.c0));
-        assert_eq!(result[7], bn254_fq_to_biguint(l_init.c.c1));
-
-        let input_limbs = inputs
-            .map(|x| biguint_to_limbs::<NUM_LIMBS>(x, LIMB_BITS).map(BabyBear::from_canonical_u32));
-
-        let instruction = rv32_write_heap_default(
-            &mut tester,
-            input_limbs.to_vec(),
-            vec![],
-            chip.0.core.air.offset + PairingOpcode::MILLER_DOUBLE_STEP as usize,
-        );
-
-        tester.execute(&mut chip, &instruction);
-        let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-        tester.simple_test().expect("Verification failed");
-    }
-
-    #[test]
-    #[allow(non_snake_case)]
-    fn test_miller_double_bls12_381() {
-        use halo2curves_axiom::bls12_381::G2Affine;
-        const NUM_LIMBS: usize = 48;
-        const LIMB_BITS: usize = 8;
-        const BLOCK_SIZE: usize = 16;
-
-        let mut tester: VmChipTestBuilder<F> = VmChipTestBuilder::default();
-        let config = ExprBuilderConfig {
-            modulus: BLS12_381_MODULUS.clone(),
-            limb_bits: BLS12_381_LIMB_BITS,
-            num_limbs: BLS12_381_NUM_LIMBS,
-        };
-        let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-        let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-        let adapter = Rv32VecHeapAdapterChip::<F, 1, 12, 24, BLOCK_SIZE, BLOCK_SIZE>::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            tester.address_bits(),
-            bitwise_chip.clone(),
-        );
-        let mut chip = MillerDoubleStepChip::new(
-            adapter,
-            config,
-            PairingOpcode::CLASS_OFFSET,
-            tester.range_checker(),
-            tester.offline_memory_mutex_arc(),
-        );
-
-        let mut rng0 = StdRng::seed_from_u64(12);
-        let Q = G2Affine::random(&mut rng0);
-        let inputs = [Q.x.c0, Q.x.c1, Q.y.c0, Q.y.c1].map(bls12381_fq_to_biguint);
-
-        let Q_ecpoint = AffinePoint { x: Q.x, y: Q.y };
-        let (Q_acc_init, l_init) = Bls12_381::miller_double_step(&Q_ecpoint);
-        let result = chip
-            .0
-            .core
-            .expr()
-            .execute_with_output(inputs.to_vec(), vec![]);
-        assert_eq!(result.len(), 8); // AffinePoint<Fp2> and two Fp2 coefficients
-        assert_eq!(result[0], bls12381_fq_to_biguint(Q_acc_init.x.c0));
-        assert_eq!(result[1], bls12381_fq_to_biguint(Q_acc_init.x.c1));
-        assert_eq!(result[2], bls12381_fq_to_biguint(Q_acc_init.y.c0));
-        assert_eq!(result[3], bls12381_fq_to_biguint(Q_acc_init.y.c1));
-        assert_eq!(result[4], bls12381_fq_to_biguint(l_init.b.c0));
-        assert_eq!(result[5], bls12381_fq_to_biguint(l_init.b.c1));
-        assert_eq!(result[6], bls12381_fq_to_biguint(l_init.c.c0));
-        assert_eq!(result[7], bls12381_fq_to_biguint(l_init.c.c1));
-
-        let input_limbs = inputs
-            .map(|x| biguint_to_limbs::<NUM_LIMBS>(x, LIMB_BITS).map(BabyBear::from_canonical_u32));
-
-        let instruction = rv32_write_heap_default(
-            &mut tester,
-            input_limbs.to_vec(),
-            vec![],
-            chip.0.core.air.offset + PairingOpcode::MILLER_DOUBLE_STEP as usize,
-        );
-
-        tester.execute(&mut chip, &instruction);
-        let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-        tester.simple_test().expect("Verification failed");
-    }
-}
diff --git a/extensions/pairing/circuit/src/pairing_chip/mod.rs b/extensions/pairing/circuit/src/pairing_chip/mod.rs
deleted file mode 100644
index df00df16ce..0000000000
--- a/extensions/pairing/circuit/src/pairing_chip/mod.rs
+++ /dev/null
@@ -1,8 +0,0 @@
-mod line;
-mod miller_double_step;
-
-pub use line::*;
-pub use miller_double_step::*;
-
-mod miller_double_and_add_step;
-pub use miller_double_and_add_step::*;
diff --git a/extensions/pairing/circuit/src/pairing_extension.rs b/extensions/pairing/circuit/src/pairing_extension.rs
index c75687f404..b81499948e 100644
--- a/extensions/pairing/circuit/src/pairing_extension.rs
+++ b/extensions/pairing/circuit/src/pairing_extension.rs
@@ -2,12 +2,14 @@ use derive_more::derive::From;
 use num_bigint::BigUint;
 use num_traits::{FromPrimitive, Zero};
 use openvm_circuit::{
-    arch::{VmExtension, VmInventory, VmInventoryBuilder, VmInventoryError},
-    system::phantom::PhantomChip,
+    arch::{
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError,
+        ExecutorInventoryBuilder, ExecutorInventoryError, VmCircuitExtension, VmExecutionExtension,
+        VmProverExtension,
+    },
+    system::phantom::PhantomExecutor,
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor};
-use openvm_circuit_primitives::bitwise_op_lookup::SharedBitwiseOperationLookupChip;
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
 use openvm_ecc_circuit::CurveConfig;
 use openvm_instructions::PhantomDiscriminant;
 use openvm_pairing_guest::{
@@ -17,12 +19,10 @@ use openvm_pairing_guest::{
     bn254::{BN254_ECC_STRUCT_NAME, BN254_MODULUS, BN254_ORDER, BN254_XI_ISIZE},
 };
 use openvm_pairing_transpiler::PairingPhantom;
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::{config::StarkGenericConfig, engine::StarkEngine, p3_field::Field};
 use serde::{Deserialize, Serialize};
 use strum::FromRepr;
 
-use super::*;
-
 // All the supported pairing curves.
 #[derive(Clone, Copy, Debug, FromRepr, Serialize, Deserialize)]
 #[repr(usize)]
@@ -59,43 +59,48 @@ impl PairingCurve {
     }
 }
 
-#[derive(Clone, Debug, derive_new::new, Serialize, Deserialize)]
+#[derive(Clone, Debug, From, derive_new::new, Serialize, Deserialize)]
 pub struct PairingExtension {
     pub supported_curves: Vec<PairingCurve>,
 }
 
-#[derive(Chip, ChipUsageGetter, InstructionExecutor, AnyEnum)]
-pub enum PairingExtensionExecutor<F: PrimeField32> {
-    // bn254 (32 limbs)
-    MillerDoubleAndAddStepRv32_32(MillerDoubleAndAddStepChip<F, 4, 12, 32>),
-    EvaluateLineRv32_32(EvaluateLineChip<F, 4, 2, 4, 32>),
-    // bls12-381 (48 limbs)
-    MillerDoubleAndAddStepRv32_48(MillerDoubleAndAddStepChip<F, 12, 36, 16>),
-    EvaluateLineRv32_48(EvaluateLineChip<F, 12, 6, 12, 16>),
+#[derive(Clone, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum PairingExtensionExecutor<F: Field> {
+    Phantom(PhantomExecutor<F>),
 }
 
-#[derive(ChipUsageGetter, Chip, AnyEnum, From)]
-pub enum PairingExtensionPeriphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    Phantom(PhantomChip<F>),
-}
-
-impl<F: PrimeField32> VmExtension<F> for PairingExtension {
+impl<F: Field> VmExecutionExtension<F> for PairingExtension {
     type Executor = PairingExtensionExecutor<F>;
-    type Periphery = PairingExtensionPeriphery<F>;
 
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let inventory = VmInventory::new();
-
-        builder.add_phantom_sub_executor(
+        inventory: &mut ExecutorInventoryBuilder<F, PairingExtensionExecutor<F>>,
+    ) -> Result<(), ExecutorInventoryError> {
+        inventory.add_phantom_sub_executor(
             phantom::PairingHintSubEx,
             PhantomDiscriminant(PairingPhantom::HintFinalExp as u16),
         )?;
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for PairingExtension {
+    fn extend_circuit(&self, _inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
+        Ok(())
+    }
+}
 
-        Ok(inventory)
+pub struct PairingProverExt;
+impl<E, RA> VmProverExtension<E, RA, PairingExtension> for PairingProverExt
+where
+    E: StarkEngine,
+{
+    fn extend_prover(
+        &self,
+        _: &PairingExtension,
+        _inventory: &mut ChipInventory<E::SC, RA, E::PB>,
+    ) -> Result<(), ChipInventoryError> {
+        Ok(())
     }
 }
 
@@ -106,7 +111,7 @@ pub(crate) mod phantom {
     use halo2curves_axiom::ff;
     use openvm_circuit::{
         arch::{PhantomSubExecutor, Streams},
-        system::memory::MemoryController,
+        system::memory::online::GuestMemory,
     };
     use openvm_ecc_guest::{algebra::field::FieldExtension, AffinePoint};
     use openvm_instructions::{
@@ -118,53 +123,52 @@ pub(crate) mod phantom {
         bn254::BN254_NUM_LIMBS,
         pairing::{FinalExp, MultiMillerLoop},
     };
-    use openvm_rv32im_circuit::adapters::{compose, unsafe_read_rv32_register};
-    use openvm_stark_backend::p3_field::PrimeField32;
+    use openvm_rv32im_circuit::adapters::{memory_read, read_rv32_register};
+    use openvm_stark_backend::p3_field::Field;
+    use rand::rngs::StdRng;
 
     use super::PairingCurve;
 
     pub struct PairingHintSubEx;
 
-    impl<F: PrimeField32> PhantomSubExecutor<F> for PairingHintSubEx {
+    impl<F: Field> PhantomSubExecutor<F> for PairingHintSubEx {
         fn phantom_execute(
-            &mut self,
-            memory: &MemoryController<F>,
+            &self,
+            memory: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            a: F,
-            b: F,
+            a: u32,
+            b: u32,
             c_upper: u16,
         ) -> eyre::Result<()> {
-            let rs1 = unsafe_read_rv32_register(memory, a);
-            let rs2 = unsafe_read_rv32_register(memory, b);
+            let rs1 = read_rv32_register(memory, a);
+            let rs2 = read_rv32_register(memory, b);
             hint_pairing(memory, &mut streams.hint_stream, rs1, rs2, c_upper)
         }
     }
 
-    fn hint_pairing<F: PrimeField32>(
-        memory: &MemoryController<F>,
+    fn hint_pairing<F: Field>(
+        memory: &GuestMemory,
         hint_stream: &mut VecDeque<F>,
         rs1: u32,
         rs2: u32,
         c_upper: u16,
     ) -> eyre::Result<()> {
-        let p_ptr = compose(memory.unsafe_read(
-            F::from_canonical_u32(RV32_MEMORY_AS),
-            F::from_canonical_u32(rs1),
-        ));
+        let p_ptr = u32::from_le_bytes(memory_read(memory, RV32_MEMORY_AS, rs1));
         // len in bytes
-        let p_len = compose(memory.unsafe_read(
-            F::from_canonical_u32(RV32_MEMORY_AS),
-            F::from_canonical_u32(rs1 + RV32_REGISTER_NUM_LIMBS as u32),
-        ));
-        let q_ptr = compose(memory.unsafe_read(
-            F::from_canonical_u32(RV32_MEMORY_AS),
-            F::from_canonical_u32(rs2),
+        let p_len = u32::from_le_bytes(memory_read(
+            memory,
+            RV32_MEMORY_AS,
+            rs1 + RV32_REGISTER_NUM_LIMBS as u32,
         ));
+
+        let q_ptr = u32::from_le_bytes(memory_read(memory, RV32_MEMORY_AS, rs2));
         // len in bytes
-        let q_len = compose(memory.unsafe_read(
-            F::from_canonical_u32(RV32_MEMORY_AS),
-            F::from_canonical_u32(rs2 + RV32_REGISTER_NUM_LIMBS as u32),
+        let q_len = u32::from_le_bytes(memory_read(
+            memory,
+            RV32_MEMORY_AS,
+            rs2 + RV32_REGISTER_NUM_LIMBS as u32,
         ));
 
         match PairingCurve::from_repr(c_upper as usize) {
@@ -178,8 +182,8 @@ pub(crate) mod phantom {
                 let p = (0..p_len)
                     .map(|i| -> eyre::Result<_> {
                         let ptr = p_ptr + i * 2 * (N as u32);
-                        let x = read_fp::<N, F, Fq>(memory, ptr)?;
-                        let y = read_fp::<N, F, Fq>(memory, ptr + N as u32)?;
+                        let x = read_fp::<N, Fq>(memory, ptr)?;
+                        let y = read_fp::<N, Fq>(memory, ptr + N as u32)?;
                         Ok(AffinePoint::new(x, y))
                     })
                     .collect::<eyre::Result<Vec<_>>>()?;
@@ -187,8 +191,8 @@ pub(crate) mod phantom {
                     .map(|i| -> eyre::Result<_> {
                         let mut ptr = q_ptr + i * 4 * (N as u32);
                         let mut read_fp2 = || -> eyre::Result<_> {
-                            let c0 = read_fp::<N, F, Fq>(memory, ptr)?;
-                            let c1 = read_fp::<N, F, Fq>(memory, ptr + N as u32)?;
+                            let c0 = read_fp::<N, Fq>(memory, ptr)?;
+                            let c1 = read_fp::<N, Fq>(memory, ptr + N as u32)?;
                             ptr += 2 * N as u32;
                             Ok(Fq2::new(c0, c1))
                         };
@@ -220,8 +224,8 @@ pub(crate) mod phantom {
                 let p = (0..p_len)
                     .map(|i| -> eyre::Result<_> {
                         let ptr = p_ptr + i * 2 * (N as u32);
-                        let x = read_fp::<N, F, Fq>(memory, ptr)?;
-                        let y = read_fp::<N, F, Fq>(memory, ptr + N as u32)?;
+                        let x = read_fp::<N, Fq>(memory, ptr)?;
+                        let y = read_fp::<N, Fq>(memory, ptr + N as u32)?;
                         Ok(AffinePoint::new(x, y))
                     })
                     .collect::<eyre::Result<Vec<_>>>()?;
@@ -229,8 +233,8 @@ pub(crate) mod phantom {
                     .map(|i| -> eyre::Result<_> {
                         let mut ptr = q_ptr + i * 4 * (N as u32);
                         let mut read_fp2 = || -> eyre::Result<_> {
-                            let c0 = read_fp::<N, F, Fq>(memory, ptr)?;
-                            let c1 = read_fp::<N, F, Fq>(memory, ptr + N as u32)?;
+                            let c0 = read_fp::<N, Fq>(memory, ptr)?;
+                            let c1 = read_fp::<N, Fq>(memory, ptr + N as u32)?;
                             ptr += 2 * N as u32;
                             Ok(Fq2 { c0, c1 })
                         };
@@ -259,24 +263,21 @@ pub(crate) mod phantom {
         Ok(())
     }
 
-    fn read_fp<const N: usize, F: PrimeField32, Fp: ff::PrimeField>(
-        memory: &MemoryController<F>,
+    fn read_fp<const N: usize, Fp: ff::PrimeField>(
+        memory: &GuestMemory,
         ptr: u32,
     ) -> eyre::Result<Fp>
     where
         Fp::Repr: From<[u8; N]>,
     {
-        let mut repr = [0u8; N];
-        for (i, byte) in repr.iter_mut().enumerate() {
-            *byte = memory
-                .unsafe_read_cell(
-                    F::from_canonical_u32(RV32_MEMORY_AS),
-                    F::from_canonical_u32(ptr + i as u32),
-                )
-                .as_canonical_u32()
-                .try_into()?;
-        }
-        Fp::from_repr(repr.into())
+        let repr: &[u8; N] = unsafe {
+            memory
+                .memory
+                .get_slice::<u8>((RV32_MEMORY_AS, ptr), N)
+                .try_into()
+                .unwrap()
+        };
+        Fp::from_repr((*repr).into())
             .into_option()
             .ok_or(eyre::eyre!("bad ff::PrimeField repr"))
     }
diff --git a/extensions/pairing/guest/src/halo2curves_shims/bn254/final_exp.rs b/extensions/pairing/guest/src/halo2curves_shims/bn254/final_exp.rs
index f4808e08b6..7cc7e78c40 100644
--- a/extensions/pairing/guest/src/halo2curves_shims/bn254/final_exp.rs
+++ b/extensions/pairing/guest/src/halo2curves_shims/bn254/final_exp.rs
@@ -2,11 +2,32 @@ use halo2curves_axiom::{
     bn256::{Fq, Fq12, Fq2},
     ff::Field,
 };
+use lazy_static::lazy_static;
 use openvm_ecc_guest::{algebra::field::FieldExtension, AffinePoint};
 
 use super::{Bn254, EXP1, EXP2, M_INV, R_INV, U27_COEFF_0, U27_COEFF_1};
 use crate::pairing::{FinalExp, MultiMillerLoop};
 
+lazy_static! {
+    pub static ref UNITY_ROOT_27: Fq12 = {
+        let u0 = U27_COEFF_0.to_u64_digits();
+        let u1 = U27_COEFF_1.to_u64_digits();
+        let u_coeffs = Fq2::from_coeffs([
+            Fq::from_raw([u0[0], u0[1], u0[2], u0[3]]),
+            Fq::from_raw([u1[0], u1[1], u1[2], u1[3]]),
+        ]);
+        Fq12::from_coeffs([
+            Fq2::ZERO,
+            Fq2::ZERO,
+            u_coeffs,
+            Fq2::ZERO,
+            Fq2::ZERO,
+            Fq2::ZERO,
+        ])
+    };
+    pub static ref UNITY_ROOT_27_EXP2: Fq12 = UNITY_ROOT_27.pow(EXP2.to_u64_digits());
+}
+
 #[allow(non_snake_case)]
 impl FinalExp for Bn254 {
     type Fp = Fq;
@@ -50,21 +71,7 @@ impl FinalExp for Bn254 {
         // Cubic nonresidue power
         let u;
 
-        // get the 27th root of unity
-        let u0 = U27_COEFF_0.to_u64_digits();
-        let u1 = U27_COEFF_1.to_u64_digits();
-        let u_coeffs = Fq2::from_coeffs([
-            Fq::from_raw([u0[0], u0[1], u0[2], u0[3]]),
-            Fq::from_raw([u1[0], u1[1], u1[2], u1[3]]),
-        ]);
-        let unity_root_27 = Fq12::from_coeffs([
-            Fq2::ZERO,
-            Fq2::ZERO,
-            u_coeffs,
-            Fq2::ZERO,
-            Fq2::ZERO,
-            Fq2::ZERO,
-        ]);
+        let unity_root_27 = *UNITY_ROOT_27;
         debug_assert_eq!(unity_root_27.pow([27]), Fq12::one());
 
         if f.pow(EXP1.to_u64_digits()) == Fq12::ONE {
@@ -115,8 +122,9 @@ impl FinalExp for Bn254 {
 
         tonelli_shanks_loop(&mut x3, &mut tmp, &mut t);
 
+        let unity_root_27_exp2 = *UNITY_ROOT_27_EXP2;
         while t != 0 {
-            tmp = unity_root_27.pow(EXP2.to_u64_digits());
+            tmp = unity_root_27_exp2;
             x *= tmp;
 
             x3 = x.square() * x * c_inv;
diff --git a/extensions/pairing/transpiler/Cargo.toml b/extensions/pairing/transpiler/Cargo.toml
index a5557b03d1..9ce32bc85c 100644
--- a/extensions/pairing/transpiler/Cargo.toml
+++ b/extensions/pairing/transpiler/Cargo.toml
@@ -14,4 +14,3 @@ openvm-transpiler = { workspace = true }
 rrs-lib = { workspace = true }
 strum = { workspace = true }
 openvm-pairing-guest = { workspace = true }
-openvm-instructions-derive = { workspace = true }
diff --git a/extensions/pairing/transpiler/src/lib.rs b/extensions/pairing/transpiler/src/lib.rs
index 7777c37c91..e80deaf154 100644
--- a/extensions/pairing/transpiler/src/lib.rs
+++ b/extensions/pairing/transpiler/src/lib.rs
@@ -1,71 +1,11 @@
 use openvm_instructions::{
-    instruction::Instruction, riscv::RV32_REGISTER_NUM_LIMBS, LocalOpcode, PhantomDiscriminant,
+    instruction::Instruction, riscv::RV32_REGISTER_NUM_LIMBS, PhantomDiscriminant,
 };
-use openvm_instructions_derive::LocalOpcode;
 use openvm_pairing_guest::{PairingBaseFunct7, OPCODE, PAIRING_FUNCT3};
 use openvm_stark_backend::p3_field::PrimeField32;
 use openvm_transpiler::{TranspilerExtension, TranspilerOutput};
 use rrs_lib::instruction_formats::RType;
-use strum::{EnumCount, EnumIter, FromRepr};
-
-// NOTE: the following opcodes are enabled only in testing and not enabled in the VM Extension
-#[derive(
-    Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, EnumCount, EnumIter, FromRepr, LocalOpcode,
-)]
-#[opcode_offset = 0x750]
-#[repr(usize)]
-#[allow(non_camel_case_types)]
-pub enum PairingOpcode {
-    MILLER_DOUBLE_AND_ADD_STEP,
-    MILLER_DOUBLE_STEP,
-    EVALUATE_LINE,
-    MUL_013_BY_013,
-    MUL_023_BY_023,
-    MUL_BY_01234,
-    MUL_BY_02345,
-}
-
-// NOTE: Fp12 opcodes are only enabled in testing and not enabled in the VM Extension
-#[derive(
-    Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, EnumCount, EnumIter, FromRepr, LocalOpcode,
-)]
-#[opcode_offset = 0x700]
-#[repr(usize)]
-#[allow(non_camel_case_types)]
-pub enum Fp12Opcode {
-    ADD,
-    SUB,
-    MUL,
-}
-const FP12_OPS: usize = 4;
-
-pub struct Bn254Fp12Opcode(Fp12Opcode);
-
-impl LocalOpcode for Bn254Fp12Opcode {
-    const CLASS_OFFSET: usize = Fp12Opcode::CLASS_OFFSET;
-
-    fn from_usize(value: usize) -> Self {
-        Self(Fp12Opcode::from_usize(value))
-    }
-
-    fn local_usize(&self) -> usize {
-        self.0.local_usize()
-    }
-}
-
-pub struct Bls12381Fp12Opcode(Fp12Opcode);
-
-impl LocalOpcode for Bls12381Fp12Opcode {
-    const CLASS_OFFSET: usize = Fp12Opcode::CLASS_OFFSET + FP12_OPS;
-
-    fn from_usize(value: usize) -> Self {
-        Self(Fp12Opcode::from_usize(value - FP12_OPS))
-    }
-
-    fn local_usize(&self) -> usize {
-        self.0.local_usize() + FP12_OPS
-    }
-}
+use strum::FromRepr;
 
 #[derive(Copy, Clone, Debug, PartialEq, Eq, FromRepr)]
 #[repr(u16)]
diff --git a/extensions/rv32-adapters/Cargo.toml b/extensions/rv32-adapters/Cargo.toml
index adf133555b..54ec529e2c 100644
--- a/extensions/rv32-adapters/Cargo.toml
+++ b/extensions/rv32-adapters/Cargo.toml
@@ -19,9 +19,6 @@ openvm-instructions = { workspace = true }
 itertools.workspace = true
 derive-new.workspace = true
 rand.workspace = true
-serde = { workspace = true, features = ["derive"] }
-serde-big-array.workspace = true
-serde_with.workspace = true
 
 [dev-dependencies]
 openvm-stark-sdk = { workspace = true }
diff --git a/extensions/rv32-adapters/src/eq_mod.rs b/extensions/rv32-adapters/src/eq_mod.rs
index ab80481f19..0d06ae83e2 100644
--- a/extensions/rv32-adapters/src/eq_mod.rs
+++ b/extensions/rv32-adapters/src/eq_mod.rs
@@ -1,26 +1,26 @@
 use std::{
     array::from_fn,
     borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
 };
 
 use itertools::izip;
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, MinimalInstruction, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+    system::memory::{
+        offline_checker::{
+            MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+            MemoryWriteBytesAuxRecord,
         },
-        program::ProgramBus,
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
@@ -29,16 +29,13 @@ use openvm_instructions::{
     riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
 };
 use openvm_rv32im_circuit::adapters::{
-    read_rv32_register, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+    tracing_read, tracing_write, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
 };
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
-use serde_with::serde_as;
 
 /// This adapter reads from NUM_READS <= 2 pointers and writes to a register.
 /// * The data is read from the heap (address space 2), and the pointers are read from registers
@@ -47,7 +44,7 @@ use serde_with::serde_as;
 ///   starting from the addresses in `rs[0]` (and `rs[1]` if `R = 2`).
 /// * Writes are to 32-bit register rd.
 #[repr(C)]
-#[derive(AlignedBorrow)]
+#[derive(AlignedBorrow, Debug)]
 pub struct Rv32IsEqualModAdapterCols<
     T,
     const NUM_READS: usize,
@@ -227,209 +224,233 @@ impl<
     }
 }
 
-pub struct Rv32IsEqualModAdapterChip<
-    F: Field,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32IsEqualModAdapterRecord<
+    const NUM_READS: usize,
+    const BLOCKS_PER_READ: usize,
+    const BLOCK_SIZE: usize,
+    const TOTAL_READ_SIZE: usize,
+> {
+    pub from_pc: u32,
+    pub timestamp: u32,
+
+    pub rs_ptr: [u32; NUM_READS],
+    pub rs_val: [u32; NUM_READS],
+    pub rs_read_aux: [MemoryReadAuxRecord; NUM_READS],
+    pub heap_read_aux: [[MemoryReadAuxRecord; BLOCKS_PER_READ]; NUM_READS],
+
+    pub rd_ptr: u32,
+    pub writes_aux: MemoryWriteBytesAuxRecord<RV32_REGISTER_NUM_LIMBS>,
+}
+
+#[derive(Clone, Copy)]
+pub struct Rv32IsEqualModAdapterExecutor<
     const NUM_READS: usize,
     const BLOCKS_PER_READ: usize,
     const BLOCK_SIZE: usize,
     const TOTAL_READ_SIZE: usize,
 > {
-    pub air: Rv32IsEqualModAdapterAir<NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE, TOTAL_READ_SIZE>,
+    pointer_max_bits: usize,
+}
+
+#[derive(derive_new::new)]
+pub struct Rv32IsEqualModAdapterFiller<
+    const NUM_READS: usize,
+    const BLOCKS_PER_READ: usize,
+    const BLOCK_SIZE: usize,
+    const TOTAL_READ_SIZE: usize,
+> {
+    pointer_max_bits: usize,
     pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    _marker: PhantomData<F>,
 }
 
 impl<
-        F: PrimeField32,
         const NUM_READS: usize,
         const BLOCKS_PER_READ: usize,
         const BLOCK_SIZE: usize,
         const TOTAL_READ_SIZE: usize,
-    > Rv32IsEqualModAdapterChip<F, NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE, TOTAL_READ_SIZE>
+    > Rv32IsEqualModAdapterExecutor<NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE, TOTAL_READ_SIZE>
 {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        address_bits: usize,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    ) -> Self {
+    pub fn new(pointer_max_bits: usize) -> Self {
         assert!(NUM_READS <= 2);
         assert_eq!(TOTAL_READ_SIZE, BLOCKS_PER_READ * BLOCK_SIZE);
         assert!(
-            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - address_bits < RV32_CELL_BITS,
-            "address_bits={address_bits} needs to be large enough for high limb range check"
+            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - pointer_max_bits < RV32_CELL_BITS,
+            "pointer_max_bits={pointer_max_bits} needs to be large enough for high limb range check"
         );
-        Self {
-            air: Rv32IsEqualModAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                bus: bitwise_lookup_chip.bus(),
-                address_bits,
-            },
-            bitwise_lookup_chip,
-            _marker: PhantomData,
-        }
+        Self { pointer_max_bits }
     }
 }
 
-#[repr(C)]
-#[serde_as]
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct Rv32IsEqualModReadRecord<
-    const NUM_READS: usize,
-    const BLOCKS_PER_READ: usize,
-    const BLOCK_SIZE: usize,
-> {
-    #[serde(with = "BigArray")]
-    pub rs: [RecordId; NUM_READS],
-    #[serde_as(as = "[[_; BLOCKS_PER_READ]; NUM_READS]")]
-    pub reads: [[RecordId; BLOCKS_PER_READ]; NUM_READS],
-}
-
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct Rv32IsEqualModWriteRecord {
-    pub from_state: ExecutionState<u32>,
-    pub rd_id: RecordId,
-}
-
 impl<
         F: PrimeField32,
         const NUM_READS: usize,
         const BLOCKS_PER_READ: usize,
         const BLOCK_SIZE: usize,
         const TOTAL_READ_SIZE: usize,
-    > VmAdapterChip<F>
-    for Rv32IsEqualModAdapterChip<F, NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE, TOTAL_READ_SIZE>
+    > AdapterTraceExecutor<F>
+    for Rv32IsEqualModAdapterExecutor<NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE, TOTAL_READ_SIZE>
+where
+    F: PrimeField32,
 {
-    type ReadRecord = Rv32IsEqualModReadRecord<NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE>;
-    type WriteRecord = Rv32IsEqualModWriteRecord;
-    type Air = Rv32IsEqualModAdapterAir<NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE, TOTAL_READ_SIZE>;
-    type Interface = BasicAdapterInterface<
-        F,
-        MinimalInstruction<F>,
+    const WIDTH: usize =
+        Rv32IsEqualModAdapterCols::<F, NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE>::width();
+    type ReadData = [[u8; TOTAL_READ_SIZE]; NUM_READS];
+    type WriteData = [u8; RV32_REGISTER_NUM_LIMBS];
+    type RecordMut<'a> = &'a mut Rv32IsEqualModAdapterRecord<
         NUM_READS,
-        1,
+        BLOCKS_PER_READ,
+        BLOCK_SIZE,
         TOTAL_READ_SIZE,
-        RV32_REGISTER_NUM_LIMBS,
     >;
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.timestamp = memory.timestamp;
+    }
+
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
         let Instruction { b, c, d, e, .. } = *instruction;
 
         debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
         debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
 
-        let mut rs_vals = [0; NUM_READS];
-        let rs_records: [_; NUM_READS] = from_fn(|i| {
-            let addr = if i == 0 { b } else { c };
-            let (record, val) = read_rv32_register(memory, d, addr);
-            rs_vals[i] = val;
-            record
-        });
+        // Read register values
+        record.rs_val = from_fn(|i| {
+            record.rs_ptr[i] = if i == 0 { b } else { c }.as_canonical_u32();
 
-        let read_records = rs_vals.map(|address| {
-            debug_assert!(address < (1 << self.air.address_bits));
-            from_fn(|i| {
-                memory
-                    .read::<BLOCK_SIZE>(e, F::from_canonical_u32(address + (i * BLOCK_SIZE) as u32))
-            })
+            u32::from_le_bytes(tracing_read(
+                memory,
+                RV32_REGISTER_AS,
+                record.rs_ptr[i],
+                &mut record.rs_read_aux[i].prev_timestamp,
+            ))
         });
 
-        let read_data = read_records.map(|r| {
-            let read = r.map(|x| x.1);
-            let mut read_it = read.iter().flatten();
-            from_fn(|_| *(read_it.next().unwrap()))
-        });
-        let record = Rv32IsEqualModReadRecord {
-            rs: rs_records,
-            reads: read_records.map(|r| r.map(|x| x.0)),
-        };
-
-        Ok((read_data, record))
+        // Read memory values
+        from_fn(|i| {
+            debug_assert!(
+                record.rs_val[i] as usize + TOTAL_READ_SIZE - 1 < (1 << self.pointer_max_bits)
+            );
+            from_fn::<_, BLOCKS_PER_READ, _>(|j| {
+                tracing_read::<BLOCK_SIZE>(
+                    memory,
+                    RV32_MEMORY_AS,
+                    record.rs_val[i] + (j * BLOCK_SIZE) as u32,
+                    &mut record.heap_read_aux[i][j].prev_timestamp,
+                )
+            })
+            .concat()
+            .try_into()
+            .unwrap()
+        })
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    fn write(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction { a, d, .. } = *instruction;
-        let (rd_id, _) = memory.write(d, a, output.writes[0]);
-
-        debug_assert!(
-            memory.timestamp() - from_state.timestamp
-                == (NUM_READS * (BLOCKS_PER_READ + 1) + 1) as u32,
-            "timestamp delta is {}, expected {}",
-            memory.timestamp() - from_state.timestamp,
-            NUM_READS * (BLOCKS_PER_READ + 1) + 1
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
+    ) {
+        let Instruction { a, .. } = *instruction;
+        record.rd_ptr = a.as_canonical_u32();
+        tracing_write(
+            memory,
+            RV32_REGISTER_AS,
+            record.rd_ptr,
+            data,
+            &mut record.writes_aux.prev_timestamp,
+            &mut record.writes_aux.prev_data,
         );
-
-        Ok((
-            ExecutionState {
-                pc: from_state.pc + DEFAULT_PC_STEP,
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state, rd_id },
-        ))
     }
+}
 
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
-    ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let row_slice: &mut Rv32IsEqualModAdapterCols<F, NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE> =
-            row_slice.borrow_mut();
-        row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
-
-        let rs = read_record.rs.map(|r| memory.record_by_id(r));
-        for (i, r) in rs.iter().enumerate() {
-            row_slice.rs_ptr[i] = r.pointer;
-            row_slice.rs_val[i].copy_from_slice(r.data_slice());
-            aux_cols_factory.generate_read_aux(r, &mut row_slice.rs_read_aux[i]);
-            for (j, x) in read_record.reads[i].iter().enumerate() {
-                let read = memory.record_by_id(*x);
-                aux_cols_factory.generate_read_aux(read, &mut row_slice.heap_read_aux[i][j]);
-            }
-        }
-
-        let rd = memory.record_by_id(write_record.rd_id);
-        row_slice.rd_ptr = rd.pointer;
-        aux_cols_factory.generate_write_aux(rd, &mut row_slice.writes_aux);
-
-        // Range checks
-        let need_range_check: [u32; 2] = from_fn(|i| {
-            if i < NUM_READS {
-                rs[i]
-                    .data_at(RV32_REGISTER_NUM_LIMBS - 1)
-                    .as_canonical_u32()
+impl<
+        F: PrimeField32,
+        const NUM_READS: usize,
+        const BLOCKS_PER_READ: usize,
+        const BLOCK_SIZE: usize,
+        const TOTAL_READ_SIZE: usize,
+    > AdapterTraceFiller<F>
+    for Rv32IsEqualModAdapterFiller<NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE, TOTAL_READ_SIZE>
+{
+    const WIDTH: usize =
+        Rv32IsEqualModAdapterCols::<F, NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE>::width();
+
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &Rv32IsEqualModAdapterRecord<
+            NUM_READS,
+            BLOCKS_PER_READ,
+            BLOCK_SIZE,
+            TOTAL_READ_SIZE,
+        > = unsafe { get_record_from_slice(&mut adapter_row, ()) };
+
+        let cols: &mut Rv32IsEqualModAdapterCols<F, NUM_READS, BLOCKS_PER_READ, BLOCK_SIZE> =
+            adapter_row.borrow_mut();
+
+        let mut timestamp = record.timestamp + (NUM_READS + NUM_READS * BLOCKS_PER_READ) as u32 + 1;
+        let mut timestamp_mm = || {
+            timestamp -= 1;
+            timestamp
+        };
+        // Do range checks before writing anything:
+        debug_assert!(self.pointer_max_bits <= RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS);
+        let limb_shift_bits = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - self.pointer_max_bits;
+        const MSL_SHIFT: usize = RV32_CELL_BITS * (RV32_REGISTER_NUM_LIMBS - 1);
+        self.bitwise_lookup_chip.request_range(
+            (record.rs_val[0] >> MSL_SHIFT) << limb_shift_bits,
+            if NUM_READS > 1 {
+                (record.rs_val[1] >> MSL_SHIFT) << limb_shift_bits
             } else {
                 0
-            }
-        });
-        let limb_shift_bits = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - self.air.address_bits;
-        self.bitwise_lookup_chip.request_range(
-            need_range_check[0] << limb_shift_bits,
-            need_range_check[1] << limb_shift_bits,
+            },
         );
-    }
+        // Writing in reverse order
+        cols.writes_aux
+            .set_prev_data(record.writes_aux.prev_data.map(F::from_canonical_u8));
+        mem_helper.fill(
+            record.writes_aux.prev_timestamp,
+            timestamp_mm(),
+            cols.writes_aux.as_mut(),
+        );
+        cols.rd_ptr = F::from_canonical_u32(record.rd_ptr);
+
+        // **NOTE**: Must iterate everything in reverse order to avoid overwriting the records
+        cols.heap_read_aux
+            .iter_mut()
+            .rev()
+            .zip(record.heap_read_aux.iter().rev())
+            .for_each(|(col_reads, record_reads)| {
+                col_reads
+                    .iter_mut()
+                    .rev()
+                    .zip(record_reads.iter().rev())
+                    .for_each(|(col, record)| {
+                        mem_helper.fill(record.prev_timestamp, timestamp_mm(), col.as_mut());
+                    });
+            });
+
+        cols.rs_read_aux
+            .iter_mut()
+            .rev()
+            .zip(record.rs_read_aux.iter().rev())
+            .for_each(|(col, record)| {
+                mem_helper.fill(record.prev_timestamp, timestamp_mm(), col.as_mut());
+            });
+
+        cols.rs_val = record
+            .rs_val
+            .map(|val| val.to_le_bytes().map(F::from_canonical_u8));
+        cols.rs_ptr = record.rs_ptr.map(|ptr| F::from_canonical_u32(ptr));
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        cols.from_state.timestamp = F::from_canonical_u32(record.timestamp);
+        cols.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/rv32-adapters/src/heap.rs b/extensions/rv32-adapters/src/heap.rs
index cd9f93abbc..10409d97e9 100644
--- a/extensions/rv32-adapters/src/heap.rs
+++ b/extensions/rv32-adapters/src/heap.rs
@@ -1,38 +1,28 @@
-use std::{
-    array::{self, from_fn},
-    borrow::Borrow,
-    marker::PhantomData,
-};
+use std::borrow::Borrow;
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
-    },
-    system::{
-        memory::{offline_checker::MemoryBridge, MemoryController, OfflineMemory},
-        program::ProgramBus,
+        AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller, BasicAdapterInterface,
+        ExecutionBridge, MinimalInstruction, VmAdapterAir,
     },
+    system::memory::{offline_checker::MemoryBridge, online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::bitwise_op_lookup::{
     BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
 };
 use openvm_instructions::{
     instruction::Instruction,
-    program::DEFAULT_PC_STEP,
-    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    riscv::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
 };
-use openvm_rv32im_circuit::adapters::read_rv32_register;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::BaseAir,
     p3_field::{Field, PrimeField32},
 };
 
-use super::{
-    vec_heap_generate_trace_row_impl, Rv32VecHeapAdapterAir, Rv32VecHeapAdapterCols,
-    Rv32VecHeapReadRecord, Rv32VecHeapWriteRecord,
+use crate::{
+    Rv32VecHeapAdapterAir, Rv32VecHeapAdapterCols, Rv32VecHeapAdapterExecutor,
+    Rv32VecHeapAdapterFiller, Rv32VecHeapAdapterRecord,
 };
 
 /// This adapter reads from NUM_READS <= 2 pointers and writes to 1 pointer.
@@ -101,137 +91,95 @@ impl<
     }
 }
 
-pub struct Rv32HeapAdapterChip<
-    F: Field,
+#[derive(Clone, Copy)]
+pub struct Rv32HeapAdapterExecutor<
     const NUM_READS: usize,
     const READ_SIZE: usize,
     const WRITE_SIZE: usize,
-> {
-    pub air: Rv32HeapAdapterAir<NUM_READS, READ_SIZE, WRITE_SIZE>,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    _marker: PhantomData<F>,
+>(Rv32VecHeapAdapterExecutor<NUM_READS, 1, 1, READ_SIZE, WRITE_SIZE>);
+
+impl<const NUM_READS: usize, const READ_SIZE: usize, const WRITE_SIZE: usize>
+    Rv32HeapAdapterExecutor<NUM_READS, READ_SIZE, WRITE_SIZE>
+{
+    pub fn new(pointer_max_bits: usize) -> Self {
+        assert!(NUM_READS <= 2);
+        assert!(
+            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - pointer_max_bits < RV32_CELL_BITS,
+            "pointer_max_bits={pointer_max_bits} needs to be large enough for high limb range check"
+        );
+        Rv32HeapAdapterExecutor(Rv32VecHeapAdapterExecutor::new(pointer_max_bits))
+    }
 }
 
-impl<F: PrimeField32, const NUM_READS: usize, const READ_SIZE: usize, const WRITE_SIZE: usize>
-    Rv32HeapAdapterChip<F, NUM_READS, READ_SIZE, WRITE_SIZE>
+pub struct Rv32HeapAdapterFiller<
+    const NUM_READS: usize,
+    const READ_SIZE: usize,
+    const WRITE_SIZE: usize,
+>(Rv32VecHeapAdapterFiller<NUM_READS, 1, 1, READ_SIZE, WRITE_SIZE>);
+
+impl<const NUM_READS: usize, const READ_SIZE: usize, const WRITE_SIZE: usize>
+    Rv32HeapAdapterFiller<NUM_READS, READ_SIZE, WRITE_SIZE>
 {
     pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        address_bits: usize,
+        pointer_max_bits: usize,
         bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
     ) -> Self {
         assert!(NUM_READS <= 2);
         assert!(
-            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - address_bits < RV32_CELL_BITS,
-            "address_bits={address_bits} needs to be large enough for high limb range check"
+            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - pointer_max_bits < RV32_CELL_BITS,
+            "pointer_max_bits={pointer_max_bits} needs to be large enough for high limb range check"
         );
-        Self {
-            air: Rv32HeapAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                bus: bitwise_lookup_chip.bus(),
-                address_bits,
-            },
+        Rv32HeapAdapterFiller(Rv32VecHeapAdapterFiller::new(
+            pointer_max_bits,
             bitwise_lookup_chip,
-            _marker: PhantomData,
-        }
+        ))
     }
 }
 
 impl<F: PrimeField32, const NUM_READS: usize, const READ_SIZE: usize, const WRITE_SIZE: usize>
-    VmAdapterChip<F> for Rv32HeapAdapterChip<F, NUM_READS, READ_SIZE, WRITE_SIZE>
+    AdapterTraceExecutor<F> for Rv32HeapAdapterExecutor<NUM_READS, READ_SIZE, WRITE_SIZE>
+where
+    F: PrimeField32,
 {
-    type ReadRecord = Rv32VecHeapReadRecord<F, NUM_READS, 1, READ_SIZE>;
-    type WriteRecord = Rv32VecHeapWriteRecord<1, WRITE_SIZE>;
-    type Air = Rv32HeapAdapterAir<NUM_READS, READ_SIZE, WRITE_SIZE>;
-    type Interface =
-        BasicAdapterInterface<F, MinimalInstruction<F>, NUM_READS, 1, READ_SIZE, WRITE_SIZE>;
-
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { a, b, c, d, e, .. } = *instruction;
-
-        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
-        debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
-
-        let mut rs_vals = [0; NUM_READS];
-        let rs_records: [_; NUM_READS] = from_fn(|i| {
-            let addr = if i == 0 { b } else { c };
-            let (record, val) = read_rv32_register(memory, d, addr);
-            rs_vals[i] = val;
-            record
-        });
-        let (rd_record, rd_val) = read_rv32_register(memory, d, a);
-
-        let read_records = rs_vals.map(|address| {
-            debug_assert!(address as usize + READ_SIZE - 1 < (1 << self.air.address_bits));
-            [memory.read::<READ_SIZE>(e, F::from_canonical_u32(address))]
-        });
-        let read_data = read_records.map(|r| r[0].1);
-
-        let record = Rv32VecHeapReadRecord {
-            rs: rs_records,
-            rd: rd_record,
-            rd_val: F::from_canonical_u32(rd_val),
-            reads: read_records.map(|r| array::from_fn(|i| r[i].0)),
-        };
-
-        Ok((read_data, record))
+    const WIDTH: usize =
+        Rv32VecHeapAdapterCols::<F, NUM_READS, 1, 1, READ_SIZE, WRITE_SIZE>::width();
+    type ReadData = [[u8; READ_SIZE]; NUM_READS];
+    type WriteData = [[u8; WRITE_SIZE]; 1];
+    type RecordMut<'a> = &'a mut Rv32VecHeapAdapterRecord<NUM_READS, 1, 1, READ_SIZE, WRITE_SIZE>;
+
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let e = instruction.e;
-        let writes = [memory.write(e, read_record.rd_val, output.writes[0]).0];
-
-        let timestamp_delta = memory.timestamp() - from_state.timestamp;
-        debug_assert!(
-            timestamp_delta == 6,
-            "timestamp delta is {}, expected 6",
-            timestamp_delta
-        );
-
-        Ok((
-            ExecutionState {
-                pc: from_state.pc + DEFAULT_PC_STEP,
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state, writes },
-        ))
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let read_data = AdapterTraceExecutor::<F>::read(&self.0, memory, instruction, record);
+        read_data.map(|r| r[0])
     }
 
-    fn generate_trace_row(
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        vec_heap_generate_trace_row_impl(
-            row_slice,
-            &read_record,
-            &write_record,
-            self.bitwise_lookup_chip.clone(),
-            self.air.address_bits,
-            memory,
-        );
+        AdapterTraceExecutor::<F>::write(&self.0, memory, instruction, data, record);
     }
+}
+
+impl<F: PrimeField32, const NUM_READS: usize, const READ_SIZE: usize, const WRITE_SIZE: usize>
+    AdapterTraceFiller<F> for Rv32HeapAdapterFiller<NUM_READS, READ_SIZE, WRITE_SIZE>
+{
+    const WIDTH: usize =
+        Rv32VecHeapAdapterCols::<F, NUM_READS, 1, 1, READ_SIZE, WRITE_SIZE>::width();
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, adapter_row: &mut [F]) {
+        AdapterTraceFiller::<F>::fill_trace_row(&self.0, mem_helper, adapter_row);
     }
 }
diff --git a/extensions/rv32-adapters/src/heap_branch.rs b/extensions/rv32-adapters/src/heap_branch.rs
index 29c9a151c9..3585e5e91f 100644
--- a/extensions/rv32-adapters/src/heap_branch.rs
+++ b/extensions/rv32-adapters/src/heap_branch.rs
@@ -1,27 +1,23 @@
 use std::{
     array::from_fn,
     borrow::{Borrow, BorrowMut},
-    iter::once,
-    marker::PhantomData,
 };
 
 use itertools::izip;
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, ImmInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, ImmInstruction, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
-        },
-        program::ProgramBus,
+    system::memory::{
+        offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord},
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
@@ -29,16 +25,12 @@ use openvm_instructions::{
     program::DEFAULT_PC_STEP,
     riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
 };
-use openvm_rv32im_circuit::adapters::{
-    read_rv32_register, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
-};
+use openvm_rv32im_circuit::adapters::{tracing_read, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
 
 /// This adapter reads from NUM_READS <= 2 pointers.
 /// * The data is read from the heap (address space 2), and the pointers are read from registers
@@ -170,158 +162,162 @@ impl<AB: InteractionBuilder, const NUM_READS: usize, const READ_SIZE: usize> VmA
     }
 }
 
-pub struct Rv32HeapBranchAdapterChip<F: Field, const NUM_READS: usize, const READ_SIZE: usize> {
-    pub air: Rv32HeapBranchAdapterAir<NUM_READS, READ_SIZE>,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32HeapBranchAdapterRecord<const NUM_READS: usize> {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+
+    pub rs_ptr: [u32; NUM_READS],
+    pub rs_vals: [u32; NUM_READS],
+
+    pub rs_read_aux: [MemoryReadAuxRecord; NUM_READS],
+    pub heap_read_aux: [MemoryReadAuxRecord; NUM_READS],
+}
+
+#[derive(Clone, Copy)]
+pub struct Rv32HeapBranchAdapterExecutor<const NUM_READS: usize, const READ_SIZE: usize> {
+    pub pointer_max_bits: usize,
+}
+
+#[derive(derive_new::new)]
+pub struct Rv32HeapBranchAdapterFiller<const NUM_READS: usize, const READ_SIZE: usize> {
+    pub pointer_max_bits: usize,
     pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    _marker: PhantomData<F>,
 }
 
-impl<F: PrimeField32, const NUM_READS: usize, const READ_SIZE: usize>
-    Rv32HeapBranchAdapterChip<F, NUM_READS, READ_SIZE>
+impl<const NUM_READS: usize, const READ_SIZE: usize>
+    Rv32HeapBranchAdapterExecutor<NUM_READS, READ_SIZE>
 {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        address_bits: usize,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    ) -> Self {
+    pub fn new(pointer_max_bits: usize) -> Self {
         assert!(NUM_READS <= 2);
         assert!(
-            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - address_bits < RV32_CELL_BITS,
-            "address_bits={address_bits} needs to be large enough for high limb range check"
+            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - pointer_max_bits < RV32_CELL_BITS,
+            "pointer_max_bits={pointer_max_bits} needs to be large enough for high limb range check"
         );
-        Self {
-            air: Rv32HeapBranchAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                bus: bitwise_lookup_chip.bus(),
-                address_bits,
-            },
-            bitwise_lookup_chip,
-            _marker: PhantomData,
-        }
+        Self { pointer_max_bits }
     }
 }
 
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct Rv32HeapBranchReadRecord<const NUM_READS: usize, const READ_SIZE: usize> {
-    #[serde(with = "BigArray")]
-    pub rs_reads: [RecordId; NUM_READS],
-    #[serde(with = "BigArray")]
-    pub heap_reads: [RecordId; NUM_READS],
-}
-
-impl<F: PrimeField32, const NUM_READS: usize, const READ_SIZE: usize> VmAdapterChip<F>
-    for Rv32HeapBranchAdapterChip<F, NUM_READS, READ_SIZE>
+impl<F: PrimeField32, const NUM_READS: usize, const READ_SIZE: usize> AdapterTraceExecutor<F>
+    for Rv32HeapBranchAdapterExecutor<NUM_READS, READ_SIZE>
 {
-    type ReadRecord = Rv32HeapBranchReadRecord<NUM_READS, READ_SIZE>;
-    type WriteRecord = ExecutionState<u32>;
-    type Air = Rv32HeapBranchAdapterAir<NUM_READS, READ_SIZE>;
-    type Interface = BasicAdapterInterface<F, ImmInstruction<F>, NUM_READS, 0, READ_SIZE, 0>;
-
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    const WIDTH: usize = Rv32HeapBranchAdapterCols::<F, NUM_READS, READ_SIZE>::width();
+    type ReadData = [[u8; READ_SIZE]; NUM_READS];
+    type WriteData = ();
+    type RecordMut<'a> = &'a mut Rv32HeapBranchAdapterRecord<NUM_READS>;
+
+    fn start(pc: u32, memory: &TracingMemory, adapter_record: &mut Self::RecordMut<'_>) {
+        adapter_record.from_pc = pc;
+        adapter_record.from_timestamp = memory.timestamp;
+    }
+
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
         let Instruction { a, b, d, e, .. } = *instruction;
 
         debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
         debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
 
-        let mut rs_vals = [0; NUM_READS];
-        let rs_records: [_; NUM_READS] = from_fn(|i| {
-            let addr = if i == 0 { a } else { b };
-            let (record, val) = read_rv32_register(memory, d, addr);
-            rs_vals[i] = val;
-            record
-        });
-
-        let heap_records = rs_vals.map(|address| {
-            assert!(address as usize + READ_SIZE - 1 < (1 << self.air.address_bits));
-            memory.read::<READ_SIZE>(e, F::from_canonical_u32(address))
+        // Read register values
+        record.rs_vals = from_fn(|i| {
+            record.rs_ptr[i] = if i == 0 { a } else { b }.as_canonical_u32();
+            u32::from_le_bytes(tracing_read(
+                memory,
+                RV32_REGISTER_AS,
+                record.rs_ptr[i],
+                &mut record.rs_read_aux[i].prev_timestamp,
+            ))
         });
 
-        let record = Rv32HeapBranchReadRecord {
-            rs_reads: rs_records,
-            heap_reads: heap_records.map(|r| r.0),
-        };
-        Ok((heap_records.map(|r| r.1), record))
+        // Read memory values
+        from_fn(|i| {
+            debug_assert!(
+                record.rs_vals[i] as usize + READ_SIZE - 1 < (1 << self.pointer_max_bits)
+            );
+            tracing_read(
+                memory,
+                RV32_MEMORY_AS,
+                record.rs_vals[i],
+                &mut record.heap_read_aux[i].prev_timestamp,
+            )
+        })
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    fn write(
+        &self,
+        _memory: &mut TracingMemory,
         _instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let timestamp_delta = memory.timestamp() - from_state.timestamp;
-        debug_assert!(
-            timestamp_delta == 4,
-            "timestamp delta is {}, expected 4",
-            timestamp_delta
-        );
-
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            from_state,
-        ))
+        _data: Self::WriteData,
+        _record: &mut Self::RecordMut<'_>,
+    ) {
+        // This adapter doesn't write anything
     }
+}
 
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
-    ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let row_slice: &mut Rv32HeapBranchAdapterCols<_, NUM_READS, READ_SIZE> =
-            row_slice.borrow_mut();
-        row_slice.from_state = write_record.map(F::from_canonical_u32);
+impl<F: PrimeField32, const NUM_READS: usize, const READ_SIZE: usize> AdapterTraceFiller<F>
+    for Rv32HeapBranchAdapterFiller<NUM_READS, READ_SIZE>
+{
+    const WIDTH: usize = Rv32HeapBranchAdapterCols::<F, NUM_READS, READ_SIZE>::width();
 
-        let rs_reads = read_record.rs_reads.map(|r| memory.record_by_id(r));
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &Rv32HeapBranchAdapterRecord<NUM_READS> =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let cols: &mut Rv32HeapBranchAdapterCols<F, NUM_READS, READ_SIZE> =
+            adapter_row.borrow_mut();
 
-        for (i, rs_read) in rs_reads.iter().enumerate() {
-            row_slice.rs_ptr[i] = rs_read.pointer;
-            row_slice.rs_val[i].copy_from_slice(rs_read.data_slice());
-            aux_cols_factory.generate_read_aux(rs_read, &mut row_slice.rs_read_aux[i]);
-        }
+        // Range checks:
+        // **NOTE**: Must do the range checks before overwriting the records
+        debug_assert!(self.pointer_max_bits <= RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS);
+        let limb_shift_bits = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - self.pointer_max_bits;
+        const MSL_SHIFT: usize = RV32_CELL_BITS * (RV32_REGISTER_NUM_LIMBS - 1);
+        self.bitwise_lookup_chip.request_range(
+            (record.rs_vals[0] >> MSL_SHIFT) << limb_shift_bits,
+            if NUM_READS > 1 {
+                (record.rs_vals[1] >> MSL_SHIFT) << limb_shift_bits
+            } else {
+                0
+            },
+        );
 
-        for (i, heap_read) in read_record.heap_reads.iter().enumerate() {
-            let record = memory.record_by_id(*heap_read);
-            aux_cols_factory.generate_read_aux(record, &mut row_slice.heap_read_aux[i]);
+        // **NOTE**: Must iterate everything in reverse order to avoid overwriting the records
+        for i in (0..NUM_READS).rev() {
+            mem_helper.fill(
+                record.heap_read_aux[i].prev_timestamp,
+                record.from_timestamp + (i + NUM_READS) as u32,
+                cols.heap_read_aux[i].as_mut(),
+            );
         }
 
-        // Range checks:
-        let need_range_check: Vec<u32> = rs_reads
-            .iter()
-            .map(|record| {
-                record
-                    .data_at(RV32_REGISTER_NUM_LIMBS - 1)
-                    .as_canonical_u32()
-            })
-            .chain(once(0)) // in case NUM_READS is odd
-            .collect();
-        debug_assert!(self.air.address_bits <= RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS);
-        let limb_shift_bits = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - self.air.address_bits;
-        for pair in need_range_check.chunks_exact(2) {
-            self.bitwise_lookup_chip
-                .request_range(pair[0] << limb_shift_bits, pair[1] << limb_shift_bits);
+        for i in (0..NUM_READS).rev() {
+            mem_helper.fill(
+                record.rs_read_aux[i].prev_timestamp,
+                record.from_timestamp + i as u32,
+                cols.rs_read_aux[i].as_mut(),
+            );
         }
-    }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        cols.rs_val
+            .iter_mut()
+            .rev()
+            .zip(record.rs_vals.iter().rev())
+            .for_each(|(col, record)| {
+                *col = record.to_le_bytes().map(F::from_canonical_u8);
+            });
+
+        cols.rs_ptr
+            .iter_mut()
+            .rev()
+            .zip(record.rs_ptr.iter().rev())
+            .for_each(|(col, record)| {
+                *col = F::from_canonical_u32(*record);
+            });
+
+        cols.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        cols.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/rv32-adapters/src/lib.rs b/extensions/rv32-adapters/src/lib.rs
index d84c82f617..6d884daedf 100644
--- a/extensions/rv32-adapters/src/lib.rs
+++ b/extensions/rv32-adapters/src/lib.rs
@@ -2,13 +2,11 @@ mod eq_mod;
 mod heap;
 mod heap_branch;
 mod vec_heap;
-mod vec_heap_two_reads;
 
 pub use eq_mod::*;
 pub use heap::*;
 pub use heap_branch::*;
 pub use vec_heap::*;
-pub use vec_heap_two_reads::*;
 
 #[cfg(any(test, feature = "test-utils"))]
 mod test_utils;
diff --git a/extensions/rv32-adapters/src/vec_heap.rs b/extensions/rv32-adapters/src/vec_heap.rs
index fab0df3334..ea3fc80113 100644
--- a/extensions/rv32-adapters/src/vec_heap.rs
+++ b/extensions/rv32-adapters/src/vec_heap.rs
@@ -2,25 +2,26 @@ use std::{
     array::from_fn,
     borrow::{Borrow, BorrowMut},
     iter::{once, zip},
-    marker::PhantomData,
 };
 
 use itertools::izip;
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, ExecutionBridge, ExecutionBus, ExecutionState,
-        Result, VecHeapAdapterInterface, VmAdapterAir, VmAdapterChip, VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        ExecutionBridge, ExecutionState, VecHeapAdapterInterface, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+    system::memory::{
+        offline_checker::{
+            MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+            MemoryWriteBytesAuxRecord,
         },
-        program::ProgramBus,
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
@@ -29,15 +30,13 @@ use openvm_instructions::{
     riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
 };
 use openvm_rv32im_circuit::adapters::{
-    abstract_compose, read_rv32_register, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+    abstract_compose, tracing_read, tracing_write, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
 };
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
-use serde_with::serde_as;
 
 /// This adapter reads from R (R <= 2) pointers and writes to 1 pointer.
 /// * The data is read from the heap (address space 2), and the pointers are read from registers
@@ -46,89 +45,8 @@ use serde_with::serde_as;
 ///   starting from the addresses in `rs[0]` (and `rs[1]` if `R = 2`).
 /// * Writes take the form of `BLOCKS_PER_WRITE` consecutive writes of size `WRITE_SIZE` to the
 ///   heap, starting from the address in `rd`.
-#[derive(Clone)]
-pub struct Rv32VecHeapAdapterChip<
-    F: Field,
-    const NUM_READS: usize,
-    const BLOCKS_PER_READ: usize,
-    const BLOCKS_PER_WRITE: usize,
-    const READ_SIZE: usize,
-    const WRITE_SIZE: usize,
-> {
-    pub air:
-        Rv32VecHeapAdapterAir<NUM_READS, BLOCKS_PER_READ, BLOCKS_PER_WRITE, READ_SIZE, WRITE_SIZE>,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    _marker: PhantomData<F>,
-}
-
-impl<
-        F: PrimeField32,
-        const NUM_READS: usize,
-        const BLOCKS_PER_READ: usize,
-        const BLOCKS_PER_WRITE: usize,
-        const READ_SIZE: usize,
-        const WRITE_SIZE: usize,
-    >
-    Rv32VecHeapAdapterChip<F, NUM_READS, BLOCKS_PER_READ, BLOCKS_PER_WRITE, READ_SIZE, WRITE_SIZE>
-{
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        address_bits: usize,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    ) -> Self {
-        assert!(NUM_READS <= 2);
-        assert!(
-            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - address_bits < RV32_CELL_BITS,
-            "address_bits={address_bits} needs to be large enough for high limb range check"
-        );
-        Self {
-            air: Rv32VecHeapAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                bus: bitwise_lookup_chip.bus(),
-                address_bits,
-            },
-            bitwise_lookup_chip,
-            _marker: PhantomData,
-        }
-    }
-}
-
 #[repr(C)]
-#[serde_as]
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-#[serde(bound = "F: Field")]
-pub struct Rv32VecHeapReadRecord<
-    F: Field,
-    const NUM_READS: usize,
-    const BLOCKS_PER_READ: usize,
-    const READ_SIZE: usize,
-> {
-    /// Read register value from address space e=1
-    #[serde_as(as = "[_; NUM_READS]")]
-    pub rs: [RecordId; NUM_READS],
-    /// Read register value from address space d=1
-    pub rd: RecordId,
-
-    pub rd_val: F,
-
-    #[serde_as(as = "[[_; BLOCKS_PER_READ]; NUM_READS]")]
-    pub reads: [[RecordId; BLOCKS_PER_READ]; NUM_READS],
-}
-
-#[repr(C)]
-#[serde_as]
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct Rv32VecHeapWriteRecord<const BLOCKS_PER_WRITE: usize, const WRITE_SIZE: usize> {
-    pub from_state: ExecutionState<u32>,
-    #[serde_as(as = "[_; BLOCKS_PER_WRITE]")]
-    pub writes: [RecordId; BLOCKS_PER_WRITE],
-}
-
-#[repr(C)]
-#[derive(AlignedBorrow)]
+#[derive(AlignedBorrow, Debug)]
 pub struct Rv32VecHeapAdapterCols<
     T,
     const NUM_READS: usize,
@@ -346,6 +264,55 @@ impl<
     }
 }
 
+// Intermediate type that should not be copied or cloned and should be directly written to
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32VecHeapAdapterRecord<
+    const NUM_READS: usize,
+    const BLOCKS_PER_READ: usize,
+    const BLOCKS_PER_WRITE: usize,
+    const READ_SIZE: usize,
+    const WRITE_SIZE: usize,
+> {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+
+    pub rs_ptrs: [u32; NUM_READS],
+    pub rd_ptr: u32,
+
+    pub rs_vals: [u32; NUM_READS],
+    pub rd_val: u32,
+
+    pub rs_read_aux: [MemoryReadAuxRecord; NUM_READS],
+    pub rd_read_aux: MemoryReadAuxRecord,
+
+    pub reads_aux: [[MemoryReadAuxRecord; BLOCKS_PER_READ]; NUM_READS],
+    pub writes_aux: [MemoryWriteBytesAuxRecord<WRITE_SIZE>; BLOCKS_PER_WRITE],
+}
+
+#[derive(derive_new::new, Clone, Copy)]
+pub struct Rv32VecHeapAdapterExecutor<
+    const NUM_READS: usize,
+    const BLOCKS_PER_READ: usize,
+    const BLOCKS_PER_WRITE: usize,
+    const READ_SIZE: usize,
+    const WRITE_SIZE: usize,
+> {
+    pointer_max_bits: usize,
+}
+
+#[derive(derive_new::new)]
+pub struct Rv32VecHeapAdapterFiller<
+    const NUM_READS: usize,
+    const BLOCKS_PER_READ: usize,
+    const BLOCKS_PER_WRITE: usize,
+    const READ_SIZE: usize,
+    const WRITE_SIZE: usize,
+> {
+    pointer_max_bits: usize,
+    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+}
+
 impl<
         F: PrimeField32,
         const NUM_READS: usize,
@@ -353,9 +320,8 @@ impl<
         const BLOCKS_PER_WRITE: usize,
         const READ_SIZE: usize,
         const WRITE_SIZE: usize,
-    > VmAdapterChip<F>
-    for Rv32VecHeapAdapterChip<
-        F,
+    > AdapterTraceExecutor<F>
+    for Rv32VecHeapAdapterExecutor<
         NUM_READS,
         BLOCKS_PER_READ,
         BLOCKS_PER_WRITE,
@@ -363,184 +329,246 @@ impl<
         WRITE_SIZE,
     >
 {
-    type ReadRecord = Rv32VecHeapReadRecord<F, NUM_READS, BLOCKS_PER_READ, READ_SIZE>;
-    type WriteRecord = Rv32VecHeapWriteRecord<BLOCKS_PER_WRITE, WRITE_SIZE>;
-    type Air =
-        Rv32VecHeapAdapterAir<NUM_READS, BLOCKS_PER_READ, BLOCKS_PER_WRITE, READ_SIZE, WRITE_SIZE>;
-    type Interface = VecHeapAdapterInterface<
+    const WIDTH: usize = Rv32VecHeapAdapterCols::<
         F,
         NUM_READS,
         BLOCKS_PER_READ,
         BLOCKS_PER_WRITE,
         READ_SIZE,
         WRITE_SIZE,
+    >::width();
+    type ReadData = [[[u8; READ_SIZE]; BLOCKS_PER_READ]; NUM_READS];
+    type WriteData = [[u8; WRITE_SIZE]; BLOCKS_PER_WRITE];
+    type RecordMut<'a> = &'a mut Rv32VecHeapAdapterRecord<
+        NUM_READS,
+        BLOCKS_PER_READ,
+        BLOCKS_PER_WRITE,
+        READ_SIZE,
+        WRITE_SIZE,
     >;
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
+    }
+
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { a, b, c, d, e, .. } = *instruction;
+        record: &mut &mut Rv32VecHeapAdapterRecord<
+            NUM_READS,
+            BLOCKS_PER_READ,
+            BLOCKS_PER_WRITE,
+            READ_SIZE,
+            WRITE_SIZE,
+        >,
+    ) -> Self::ReadData {
+        let &Instruction { a, b, c, d, e, .. } = instruction;
 
         debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
         debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
 
         // Read register values
-        let mut rs_vals = [0; NUM_READS];
-        let rs_records: [_; NUM_READS] = from_fn(|i| {
-            let addr = if i == 0 { b } else { c };
-            let (record, val) = read_rv32_register(memory, d, addr);
-            rs_vals[i] = val;
-            record
+        record.rs_vals = from_fn(|i| {
+            record.rs_ptrs[i] = if i == 0 { b } else { c }.as_canonical_u32();
+            u32::from_le_bytes(tracing_read(
+                memory,
+                RV32_REGISTER_AS,
+                record.rs_ptrs[i],
+                &mut record.rs_read_aux[i].prev_timestamp,
+            ))
         });
-        let (rd_record, rd_val) = read_rv32_register(memory, d, a);
+
+        record.rd_ptr = a.as_canonical_u32();
+        record.rd_val = u32::from_le_bytes(tracing_read(
+            memory,
+            RV32_REGISTER_AS,
+            a.as_canonical_u32(),
+            &mut record.rd_read_aux.prev_timestamp,
+        ));
 
         // Read memory values
-        let read_records = rs_vals.map(|address| {
-            assert!(
-                address as usize + READ_SIZE * BLOCKS_PER_READ - 1 < (1 << self.air.address_bits)
+        from_fn(|i| {
+            debug_assert!(
+                (record.rs_vals[i] + (READ_SIZE * BLOCKS_PER_READ - 1) as u32)
+                    < (1 << self.pointer_max_bits) as u32
             );
-            from_fn(|i| {
-                memory.read::<READ_SIZE>(e, F::from_canonical_u32(address + (i * READ_SIZE) as u32))
+            from_fn(|j| {
+                tracing_read(
+                    memory,
+                    RV32_MEMORY_AS,
+                    record.rs_vals[i] + (j * READ_SIZE) as u32,
+                    &mut record.reads_aux[i][j].prev_timestamp,
+                )
             })
-        });
-        let read_data = read_records.map(|r| r.map(|x| x.1));
-        assert!(rd_val as usize + WRITE_SIZE * BLOCKS_PER_WRITE - 1 < (1 << self.air.address_bits));
-
-        let record = Rv32VecHeapReadRecord {
-            rs: rs_records,
-            rd: rd_record,
-            rd_val: F::from_canonical_u32(rd_val),
-            reads: read_records.map(|r| r.map(|x| x.0)),
-        };
-
-        Ok((read_data, record))
-    }
-
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let e = instruction.e;
-        let mut i = 0;
-        let writes = output.writes.map(|write| {
-            let (record_id, _) = memory.write(
-                e,
-                read_record.rd_val + F::from_canonical_u32((i * WRITE_SIZE) as u32),
-                write,
-            );
-            i += 1;
-            record_id
-        });
-
-        Ok((
-            ExecutionState {
-                pc: from_state.pc + DEFAULT_PC_STEP,
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state, writes },
-        ))
+        })
     }
 
-    fn generate_trace_row(
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut &mut Rv32VecHeapAdapterRecord<
+            NUM_READS,
+            BLOCKS_PER_READ,
+            BLOCKS_PER_WRITE,
+            READ_SIZE,
+            WRITE_SIZE,
+        >,
     ) {
-        vec_heap_generate_trace_row_impl(
-            row_slice,
-            &read_record,
-            &write_record,
-            self.bitwise_lookup_chip.clone(),
-            self.air.address_bits,
-            memory,
-        )
-    }
+        debug_assert_eq!(instruction.e.as_canonical_u32(), RV32_MEMORY_AS);
+
+        debug_assert!(
+            record.rd_val as usize + WRITE_SIZE * BLOCKS_PER_WRITE - 1
+                < (1 << self.pointer_max_bits)
+        );
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        #[allow(clippy::needless_range_loop)]
+        for i in 0..BLOCKS_PER_WRITE {
+            tracing_write(
+                memory,
+                RV32_MEMORY_AS,
+                record.rd_val + (i * WRITE_SIZE) as u32,
+                data[i],
+                &mut record.writes_aux[i].prev_timestamp,
+                &mut record.writes_aux[i].prev_data,
+            );
+        }
     }
 }
 
-pub(super) fn vec_heap_generate_trace_row_impl<
-    F: PrimeField32,
-    const NUM_READS: usize,
-    const BLOCKS_PER_READ: usize,
-    const BLOCKS_PER_WRITE: usize,
-    const READ_SIZE: usize,
-    const WRITE_SIZE: usize,
->(
-    row_slice: &mut [F],
-    read_record: &Rv32VecHeapReadRecord<F, NUM_READS, BLOCKS_PER_READ, READ_SIZE>,
-    write_record: &Rv32VecHeapWriteRecord<BLOCKS_PER_WRITE, WRITE_SIZE>,
-    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    address_bits: usize,
-    memory: &OfflineMemory<F>,
-) {
-    let aux_cols_factory = memory.aux_cols_factory();
-    let row_slice: &mut Rv32VecHeapAdapterCols<
+impl<
+        F: PrimeField32,
+        const NUM_READS: usize,
+        const BLOCKS_PER_READ: usize,
+        const BLOCKS_PER_WRITE: usize,
+        const READ_SIZE: usize,
+        const WRITE_SIZE: usize,
+    > AdapterTraceFiller<F>
+    for Rv32VecHeapAdapterFiller<
+        NUM_READS,
+        BLOCKS_PER_READ,
+        BLOCKS_PER_WRITE,
+        READ_SIZE,
+        WRITE_SIZE,
+    >
+{
+    const WIDTH: usize = Rv32VecHeapAdapterCols::<
         F,
         NUM_READS,
         BLOCKS_PER_READ,
         BLOCKS_PER_WRITE,
         READ_SIZE,
         WRITE_SIZE,
-    > = row_slice.borrow_mut();
-    row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
-
-    let rd = memory.record_by_id(read_record.rd);
-    let rs = read_record
-        .rs
-        .into_iter()
-        .map(|r| memory.record_by_id(r))
-        .collect::<Vec<_>>();
-
-    row_slice.rd_ptr = rd.pointer;
-    row_slice.rd_val.copy_from_slice(rd.data_slice());
-
-    for (i, r) in rs.iter().enumerate() {
-        row_slice.rs_ptr[i] = r.pointer;
-        row_slice.rs_val[i].copy_from_slice(r.data_slice());
-        aux_cols_factory.generate_read_aux(r, &mut row_slice.rs_read_aux[i]);
-    }
+    >::width();
 
-    aux_cols_factory.generate_read_aux(rd, &mut row_slice.rd_read_aux);
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &Rv32VecHeapAdapterRecord<
+            NUM_READS,
+            BLOCKS_PER_READ,
+            BLOCKS_PER_WRITE,
+            READ_SIZE,
+            WRITE_SIZE,
+        > = unsafe { get_record_from_slice(&mut adapter_row, ()) };
 
-    for (i, reads) in read_record.reads.iter().enumerate() {
-        for (j, &x) in reads.iter().enumerate() {
-            let record = memory.record_by_id(x);
-            aux_cols_factory.generate_read_aux(record, &mut row_slice.reads_aux[i][j]);
+        let cols: &mut Rv32VecHeapAdapterCols<
+            F,
+            NUM_READS,
+            BLOCKS_PER_READ,
+            BLOCKS_PER_WRITE,
+            READ_SIZE,
+            WRITE_SIZE,
+        > = adapter_row.borrow_mut();
+
+        // Range checks:
+        // **NOTE**: Must do the range checks before overwriting the records
+        debug_assert!(self.pointer_max_bits <= RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS);
+        let limb_shift_bits = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - self.pointer_max_bits;
+        const MSL_SHIFT: usize = RV32_CELL_BITS * (RV32_REGISTER_NUM_LIMBS - 1);
+        if NUM_READS > 1 {
+            self.bitwise_lookup_chip.request_range(
+                (record.rs_vals[0] >> MSL_SHIFT) << limb_shift_bits,
+                (record.rs_vals[1] >> MSL_SHIFT) << limb_shift_bits,
+            );
+            self.bitwise_lookup_chip.request_range(
+                (record.rd_val >> MSL_SHIFT) << limb_shift_bits,
+                (record.rd_val >> MSL_SHIFT) << limb_shift_bits,
+            );
+        } else {
+            self.bitwise_lookup_chip.request_range(
+                (record.rs_vals[0] >> MSL_SHIFT) << limb_shift_bits,
+                (record.rd_val >> MSL_SHIFT) << limb_shift_bits,
+            );
         }
-    }
 
-    for (i, &w) in write_record.writes.iter().enumerate() {
-        let record = memory.record_by_id(w);
-        aux_cols_factory.generate_write_aux(record, &mut row_slice.writes_aux[i]);
-    }
+        let timestamp_delta = NUM_READS + 1 + NUM_READS * BLOCKS_PER_READ + BLOCKS_PER_WRITE;
+        let mut timestamp = record.from_timestamp + timestamp_delta as u32;
+        let mut timestamp_mm = || {
+            timestamp -= 1;
+            timestamp
+        };
 
-    // Range checks:
-    let need_range_check: Vec<u32> = rs
-        .iter()
-        .chain(std::iter::repeat_n(&rd, 2))
-        .map(|record| {
-            record
-                .data_at(RV32_REGISTER_NUM_LIMBS - 1)
-                .as_canonical_u32()
-        })
-        .collect();
-    debug_assert!(address_bits <= RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS);
-    let limb_shift_bits = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - address_bits;
-    for pair in need_range_check.chunks_exact(2) {
-        bitwise_lookup_chip.request_range(pair[0] << limb_shift_bits, pair[1] << limb_shift_bits);
+        // **NOTE**: Must iterate everything in reverse order to avoid overwriting the records
+        record
+            .writes_aux
+            .iter()
+            .rev()
+            .zip(cols.writes_aux.iter_mut().rev())
+            .for_each(|(write, cols_write)| {
+                cols_write.set_prev_data(write.prev_data.map(F::from_canonical_u8));
+                mem_helper.fill(write.prev_timestamp, timestamp_mm(), cols_write.as_mut());
+            });
+
+        record
+            .reads_aux
+            .iter()
+            .zip(cols.reads_aux.iter_mut())
+            .rev()
+            .for_each(|(reads, cols_reads)| {
+                reads
+                    .iter()
+                    .zip(cols_reads.iter_mut())
+                    .rev()
+                    .for_each(|(read, cols_read)| {
+                        mem_helper.fill(read.prev_timestamp, timestamp_mm(), cols_read.as_mut());
+                    });
+            });
+
+        mem_helper.fill(
+            record.rd_read_aux.prev_timestamp,
+            timestamp_mm(),
+            cols.rd_read_aux.as_mut(),
+        );
+
+        record
+            .rs_read_aux
+            .iter()
+            .zip(cols.rs_read_aux.iter_mut())
+            .rev()
+            .for_each(|(aux, cols_aux)| {
+                mem_helper.fill(aux.prev_timestamp, timestamp_mm(), cols_aux.as_mut());
+            });
+
+        cols.rd_val = record.rd_val.to_le_bytes().map(F::from_canonical_u8);
+        cols.rs_val
+            .iter_mut()
+            .rev()
+            .zip(record.rs_vals.iter().rev())
+            .for_each(|(cols_val, val)| {
+                *cols_val = val.to_le_bytes().map(F::from_canonical_u8);
+            });
+        cols.rd_ptr = F::from_canonical_u32(record.rd_ptr);
+        cols.rs_ptr
+            .iter_mut()
+            .rev()
+            .zip(record.rs_ptrs.iter().rev())
+            .for_each(|(cols_ptr, ptr)| {
+                *cols_ptr = F::from_canonical_u32(*ptr);
+            });
+        cols.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        cols.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/rv32-adapters/src/vec_heap_two_reads.rs b/extensions/rv32-adapters/src/vec_heap_two_reads.rs
deleted file mode 100644
index f829db8bbc..0000000000
--- a/extensions/rv32-adapters/src/vec_heap_two_reads.rs
+++ /dev/null
@@ -1,577 +0,0 @@
-use std::{
-    array::from_fn,
-    borrow::{Borrow, BorrowMut},
-    iter::zip,
-    marker::PhantomData,
-};
-
-use itertools::izip;
-use openvm_circuit::{
-    arch::{
-        AdapterAirContext, AdapterRuntimeContext, ExecutionBridge, ExecutionBus, ExecutionState,
-        Result, VecHeapTwoReadsAdapterInterface, VmAdapterAir, VmAdapterChip, VmAdapterInterface,
-    },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
-        },
-        program::ProgramBus,
-    },
-};
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
-};
-use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{
-    instruction::Instruction,
-    program::DEFAULT_PC_STEP,
-    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
-};
-use openvm_rv32im_circuit::adapters::{
-    abstract_compose, read_rv32_register, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
-};
-use openvm_stark_backend::{
-    interaction::InteractionBuilder,
-    p3_air::BaseAir,
-    p3_field::{Field, FieldAlgebra, PrimeField32},
-};
-use serde::{Deserialize, Serialize};
-use serde_with::serde_as;
-
-/// This adapter reads from 2 pointers and writes to 1 pointer.
-/// * The data is read from the heap (address space 2), and the pointers are read from registers
-///   (address space 1).
-/// * Reads take the form of `BLOCKS_PER_READX` consecutive reads of size `READ_SIZE` from the heap,
-///   starting from the addresses in `rs[X]`
-/// * NOTE that the two reads can read different numbers of blocks.
-/// * Writes take the form of `BLOCKS_PER_WRITE` consecutive writes of size `WRITE_SIZE` to the
-///   heap, starting from the address in `rd`.
-pub struct Rv32VecHeapTwoReadsAdapterChip<
-    F: Field,
-    const BLOCKS_PER_READ1: usize,
-    const BLOCKS_PER_READ2: usize,
-    const BLOCKS_PER_WRITE: usize,
-    const READ_SIZE: usize,
-    const WRITE_SIZE: usize,
-> {
-    pub air: Rv32VecHeapTwoReadsAdapterAir<
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    _marker: PhantomData<F>,
-}
-
-impl<
-        F: PrimeField32,
-        const BLOCKS_PER_READ1: usize,
-        const BLOCKS_PER_READ2: usize,
-        const BLOCKS_PER_WRITE: usize,
-        const READ_SIZE: usize,
-        const WRITE_SIZE: usize,
-    >
-    Rv32VecHeapTwoReadsAdapterChip<
-        F,
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >
-{
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        address_bits: usize,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    ) -> Self {
-        assert!(
-            RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - address_bits < RV32_CELL_BITS,
-            "address_bits={address_bits} needs to be large enough for high limb range check"
-        );
-        Self {
-            air: Rv32VecHeapTwoReadsAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                bus: bitwise_lookup_chip.bus(),
-                address_bits,
-            },
-            bitwise_lookup_chip,
-            _marker: PhantomData,
-        }
-    }
-}
-
-#[repr(C)]
-#[serde_as]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct Rv32VecHeapTwoReadsReadRecord<
-    F: Field,
-    const BLOCKS_PER_READ1: usize,
-    const BLOCKS_PER_READ2: usize,
-    const READ_SIZE: usize,
-> {
-    /// Read register value from address space e=1
-    pub rs1: RecordId,
-    pub rs2: RecordId,
-    /// Read register value from address space d=1
-    pub rd: RecordId,
-
-    pub rd_val: F,
-
-    #[serde_as(as = "[_; BLOCKS_PER_READ1]")]
-    pub reads1: [RecordId; BLOCKS_PER_READ1],
-    #[serde_as(as = "[_; BLOCKS_PER_READ2]")]
-    pub reads2: [RecordId; BLOCKS_PER_READ2],
-}
-
-#[repr(C)]
-#[serde_as]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct Rv32VecHeapTwoReadsWriteRecord<const BLOCKS_PER_WRITE: usize, const WRITE_SIZE: usize> {
-    pub from_state: ExecutionState<u32>,
-    #[serde_as(as = "[_; BLOCKS_PER_WRITE]")]
-    pub writes: [RecordId; BLOCKS_PER_WRITE],
-}
-
-#[repr(C)]
-#[derive(AlignedBorrow)]
-pub struct Rv32VecHeapTwoReadsAdapterCols<
-    T,
-    const BLOCKS_PER_READ1: usize,
-    const BLOCKS_PER_READ2: usize,
-    const BLOCKS_PER_WRITE: usize,
-    const READ_SIZE: usize,
-    const WRITE_SIZE: usize,
-> {
-    pub from_state: ExecutionState<T>,
-
-    pub rs1_ptr: T,
-    pub rs2_ptr: T,
-    pub rd_ptr: T,
-
-    pub rs1_val: [T; RV32_REGISTER_NUM_LIMBS],
-    pub rs2_val: [T; RV32_REGISTER_NUM_LIMBS],
-    pub rd_val: [T; RV32_REGISTER_NUM_LIMBS],
-
-    pub rs1_read_aux: MemoryReadAuxCols<T>,
-    pub rs2_read_aux: MemoryReadAuxCols<T>,
-    pub rd_read_aux: MemoryReadAuxCols<T>,
-
-    pub reads1_aux: [MemoryReadAuxCols<T>; BLOCKS_PER_READ1],
-    pub reads2_aux: [MemoryReadAuxCols<T>; BLOCKS_PER_READ2],
-    pub writes_aux: [MemoryWriteAuxCols<T, WRITE_SIZE>; BLOCKS_PER_WRITE],
-}
-
-#[allow(dead_code)]
-#[derive(Clone, Copy, Debug, derive_new::new)]
-pub struct Rv32VecHeapTwoReadsAdapterAir<
-    const BLOCKS_PER_READ1: usize,
-    const BLOCKS_PER_READ2: usize,
-    const BLOCKS_PER_WRITE: usize,
-    const READ_SIZE: usize,
-    const WRITE_SIZE: usize,
-> {
-    pub(super) execution_bridge: ExecutionBridge,
-    pub(super) memory_bridge: MemoryBridge,
-    pub bus: BitwiseOperationLookupBus,
-    /// The max number of bits for an address in memory
-    address_bits: usize,
-}
-
-impl<
-        F: Field,
-        const BLOCKS_PER_READ1: usize,
-        const BLOCKS_PER_READ2: usize,
-        const BLOCKS_PER_WRITE: usize,
-        const READ_SIZE: usize,
-        const WRITE_SIZE: usize,
-    > BaseAir<F>
-    for Rv32VecHeapTwoReadsAdapterAir<
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >
-{
-    fn width(&self) -> usize {
-        Rv32VecHeapTwoReadsAdapterCols::<
-            F,
-            BLOCKS_PER_READ1,
-            BLOCKS_PER_READ2,
-            BLOCKS_PER_WRITE,
-            READ_SIZE,
-            WRITE_SIZE,
-        >::width()
-    }
-}
-
-impl<
-        AB: InteractionBuilder,
-        const BLOCKS_PER_READ1: usize,
-        const BLOCKS_PER_READ2: usize,
-        const BLOCKS_PER_WRITE: usize,
-        const READ_SIZE: usize,
-        const WRITE_SIZE: usize,
-    > VmAdapterAir<AB>
-    for Rv32VecHeapTwoReadsAdapterAir<
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >
-{
-    type Interface = VecHeapTwoReadsAdapterInterface<
-        AB::Expr,
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >;
-
-    fn eval(
-        &self,
-        builder: &mut AB,
-        local: &[AB::Var],
-        ctx: AdapterAirContext<AB::Expr, Self::Interface>,
-    ) {
-        let cols: &Rv32VecHeapTwoReadsAdapterCols<
-            _,
-            BLOCKS_PER_READ1,
-            BLOCKS_PER_READ2,
-            BLOCKS_PER_WRITE,
-            READ_SIZE,
-            WRITE_SIZE,
-        > = local.borrow();
-        let timestamp = cols.from_state.timestamp;
-        let mut timestamp_delta: usize = 0;
-        let mut timestamp_pp = || {
-            timestamp_delta += 1;
-            timestamp + AB::F::from_canonical_usize(timestamp_delta - 1)
-        };
-
-        let ptrs = [cols.rs1_ptr, cols.rs2_ptr, cols.rd_ptr];
-        let vals = [cols.rs1_val, cols.rs2_val, cols.rd_val];
-        let auxs = [&cols.rs1_read_aux, &cols.rs2_read_aux, &cols.rd_read_aux];
-
-        // Read register values for rs1, rs2, rd
-        for (ptr, val, aux) in izip!(ptrs, vals, auxs) {
-            self.memory_bridge
-                .read(
-                    MemoryAddress::new(AB::F::from_canonical_u32(RV32_REGISTER_AS), ptr),
-                    val,
-                    timestamp_pp(),
-                    aux,
-                )
-                .eval(builder, ctx.instruction.is_valid.clone());
-        }
-
-        // Range checks: see Rv32VecHeaperAdapterAir
-        let need_range_check = [&cols.rs1_val, &cols.rs2_val, &cols.rd_val, &cols.rd_val]
-            .map(|val| val[RV32_REGISTER_NUM_LIMBS - 1]);
-
-        // range checks constrain to RV32_CELL_BITS bits, so we need to shift the limbs to constrain
-        // the correct amount of bits
-        let limb_shift = AB::F::from_canonical_usize(
-            1 << (RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - self.address_bits),
-        );
-
-        // Note: since limbs are read from memory we already know that limb[i] < 2^RV32_CELL_BITS
-        //       thus range checking limb[i] * shift < 2^RV32_CELL_BITS, gives us that
-        //       limb[i] < 2^(addr_bits - (RV32_CELL_BITS * (RV32_REGISTER_NUM_LIMBS - 1)))
-        for pair in need_range_check.chunks_exact(2) {
-            self.bus
-                .send_range(pair[0] * limb_shift, pair[1] * limb_shift)
-                .eval(builder, ctx.instruction.is_valid.clone());
-        }
-
-        let rd_val_f: AB::Expr = abstract_compose(cols.rd_val);
-        let rs1_val_f: AB::Expr = abstract_compose(cols.rs1_val);
-        let rs2_val_f: AB::Expr = abstract_compose(cols.rs2_val);
-
-        let e = AB::F::from_canonical_u32(RV32_MEMORY_AS);
-        // Reads from heap
-        for (i, (read, aux)) in zip(ctx.reads.0, &cols.reads1_aux).enumerate() {
-            self.memory_bridge
-                .read(
-                    MemoryAddress::new(
-                        e,
-                        rs1_val_f.clone() + AB::Expr::from_canonical_usize(i * READ_SIZE),
-                    ),
-                    read,
-                    timestamp_pp(),
-                    aux,
-                )
-                .eval(builder, ctx.instruction.is_valid.clone());
-        }
-        for (i, (read, aux)) in zip(ctx.reads.1, &cols.reads2_aux).enumerate() {
-            self.memory_bridge
-                .read(
-                    MemoryAddress::new(
-                        e,
-                        rs2_val_f.clone() + AB::Expr::from_canonical_usize(i * READ_SIZE),
-                    ),
-                    read,
-                    timestamp_pp(),
-                    aux,
-                )
-                .eval(builder, ctx.instruction.is_valid.clone());
-        }
-
-        // Writes to heap
-        for (i, (write, aux)) in zip(ctx.writes, &cols.writes_aux).enumerate() {
-            self.memory_bridge
-                .write(
-                    MemoryAddress::new(
-                        e,
-                        rd_val_f.clone() + AB::Expr::from_canonical_usize(i * WRITE_SIZE),
-                    ),
-                    write,
-                    timestamp_pp(),
-                    aux,
-                )
-                .eval(builder, ctx.instruction.is_valid.clone());
-        }
-
-        self.execution_bridge
-            .execute_and_increment_or_set_pc(
-                ctx.instruction.opcode,
-                [
-                    cols.rd_ptr.into(),
-                    cols.rs1_ptr.into(),
-                    cols.rs2_ptr.into(),
-                    AB::Expr::from_canonical_u32(RV32_REGISTER_AS),
-                    e.into(),
-                ],
-                cols.from_state,
-                AB::F::from_canonical_usize(timestamp_delta),
-                (DEFAULT_PC_STEP, ctx.to_pc),
-            )
-            .eval(builder, ctx.instruction.is_valid.clone());
-    }
-
-    fn get_from_pc(&self, local: &[AB::Var]) -> AB::Var {
-        let cols: &Rv32VecHeapTwoReadsAdapterCols<
-            _,
-            BLOCKS_PER_READ1,
-            BLOCKS_PER_READ2,
-            BLOCKS_PER_WRITE,
-            READ_SIZE,
-            WRITE_SIZE,
-        > = local.borrow();
-        cols.from_state.pc
-    }
-}
-
-impl<
-        F: PrimeField32,
-        const BLOCKS_PER_READ1: usize,
-        const BLOCKS_PER_READ2: usize,
-        const BLOCKS_PER_WRITE: usize,
-        const READ_SIZE: usize,
-        const WRITE_SIZE: usize,
-    > VmAdapterChip<F>
-    for Rv32VecHeapTwoReadsAdapterChip<
-        F,
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >
-{
-    type ReadRecord =
-        Rv32VecHeapTwoReadsReadRecord<F, BLOCKS_PER_READ1, BLOCKS_PER_READ2, READ_SIZE>;
-    type WriteRecord = Rv32VecHeapTwoReadsWriteRecord<BLOCKS_PER_WRITE, WRITE_SIZE>;
-    type Air = Rv32VecHeapTwoReadsAdapterAir<
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >;
-    type Interface = VecHeapTwoReadsAdapterInterface<
-        F,
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    >;
-
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { a, b, c, d, e, .. } = *instruction;
-
-        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
-        debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
-
-        let (rs1_record, rs1_val) = read_rv32_register(memory, d, b);
-        let (rs2_record, rs2_val) = read_rv32_register(memory, d, c);
-        let (rd_record, rd_val) = read_rv32_register(memory, d, a);
-
-        assert!(rs1_val as usize + READ_SIZE * BLOCKS_PER_READ1 - 1 < (1 << self.air.address_bits));
-        let read1_records = from_fn(|i| {
-            memory.read::<READ_SIZE>(e, F::from_canonical_u32(rs1_val + (i * READ_SIZE) as u32))
-        });
-        let read1_data = read1_records.map(|r| r.1);
-        assert!(rs2_val as usize + READ_SIZE * BLOCKS_PER_READ2 - 1 < (1 << self.air.address_bits));
-        let read2_records = from_fn(|i| {
-            memory.read::<READ_SIZE>(e, F::from_canonical_u32(rs2_val + (i * READ_SIZE) as u32))
-        });
-        let read2_data = read2_records.map(|r| r.1);
-        assert!(rd_val as usize + WRITE_SIZE * BLOCKS_PER_WRITE - 1 < (1 << self.air.address_bits));
-
-        let record = Rv32VecHeapTwoReadsReadRecord {
-            rs1: rs1_record,
-            rs2: rs2_record,
-            rd: rd_record,
-            rd_val: F::from_canonical_u32(rd_val),
-            reads1: read1_records.map(|r| r.0),
-            reads2: read2_records.map(|r| r.0),
-        };
-
-        Ok(((read1_data, read2_data), record))
-    }
-
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let e = instruction.e;
-        let mut i = 0;
-        let writes = output.writes.map(|write| {
-            let (record_id, _) = memory.write(
-                e,
-                read_record.rd_val + F::from_canonical_u32((i * WRITE_SIZE) as u32),
-                write,
-            );
-            i += 1;
-            record_id
-        });
-
-        Ok((
-            ExecutionState {
-                pc: from_state.pc + DEFAULT_PC_STEP,
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state, writes },
-        ))
-    }
-
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
-    ) {
-        vec_heap_two_reads_generate_trace_row_impl(
-            row_slice,
-            &read_record,
-            &write_record,
-            self.bitwise_lookup_chip.clone(),
-            self.air.address_bits,
-            memory,
-        )
-    }
-
-    fn air(&self) -> &Self::Air {
-        &self.air
-    }
-}
-
-pub(super) fn vec_heap_two_reads_generate_trace_row_impl<
-    F: PrimeField32,
-    const BLOCKS_PER_READ1: usize,
-    const BLOCKS_PER_READ2: usize,
-    const BLOCKS_PER_WRITE: usize,
-    const READ_SIZE: usize,
-    const WRITE_SIZE: usize,
->(
-    row_slice: &mut [F],
-    read_record: &Rv32VecHeapTwoReadsReadRecord<F, BLOCKS_PER_READ1, BLOCKS_PER_READ2, READ_SIZE>,
-    write_record: &Rv32VecHeapTwoReadsWriteRecord<BLOCKS_PER_WRITE, WRITE_SIZE>,
-    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    address_bits: usize,
-    memory: &OfflineMemory<F>,
-) {
-    let aux_cols_factory = memory.aux_cols_factory();
-    let row_slice: &mut Rv32VecHeapTwoReadsAdapterCols<
-        F,
-        BLOCKS_PER_READ1,
-        BLOCKS_PER_READ2,
-        BLOCKS_PER_WRITE,
-        READ_SIZE,
-        WRITE_SIZE,
-    > = row_slice.borrow_mut();
-    row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
-
-    let rd = memory.record_by_id(read_record.rd);
-    let rs1 = memory.record_by_id(read_record.rs1);
-    let rs2 = memory.record_by_id(read_record.rs2);
-
-    row_slice.rd_ptr = rd.pointer;
-    row_slice.rs1_ptr = rs1.pointer;
-    row_slice.rs2_ptr = rs2.pointer;
-
-    row_slice.rd_val.copy_from_slice(rd.data_slice());
-    row_slice.rs1_val.copy_from_slice(rs1.data_slice());
-    row_slice.rs2_val.copy_from_slice(rs2.data_slice());
-
-    aux_cols_factory.generate_read_aux(rs1, &mut row_slice.rs1_read_aux);
-    aux_cols_factory.generate_read_aux(rs2, &mut row_slice.rs2_read_aux);
-    aux_cols_factory.generate_read_aux(rd, &mut row_slice.rd_read_aux);
-
-    for (i, r) in read_record.reads1.iter().enumerate() {
-        let record = memory.record_by_id(*r);
-        aux_cols_factory.generate_read_aux(record, &mut row_slice.reads1_aux[i]);
-    }
-
-    for (i, r) in read_record.reads2.iter().enumerate() {
-        let record = memory.record_by_id(*r);
-        aux_cols_factory.generate_read_aux(record, &mut row_slice.reads2_aux[i]);
-    }
-
-    for (i, w) in write_record.writes.iter().enumerate() {
-        let record = memory.record_by_id(*w);
-        aux_cols_factory.generate_write_aux(record, &mut row_slice.writes_aux[i]);
-    }
-    // Range checks:
-    let need_range_check = [
-        &read_record.rs1,
-        &read_record.rs2,
-        &read_record.rd,
-        &read_record.rd,
-    ]
-    .map(|record| {
-        memory
-            .record_by_id(*record)
-            .data_at(RV32_REGISTER_NUM_LIMBS - 1)
-            .as_canonical_u32()
-    });
-    debug_assert!(address_bits <= RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS);
-    let limb_shift_bits = RV32_CELL_BITS * RV32_REGISTER_NUM_LIMBS - address_bits;
-    for pair in need_range_check.chunks_exact(2) {
-        bitwise_lookup_chip.request_range(pair[0] << limb_shift_bits, pair[1] << limb_shift_bits);
-    }
-}
diff --git a/extensions/rv32im/circuit/Cargo.toml b/extensions/rv32im/circuit/Cargo.toml
index 8b20385104..9f6bbb6824 100644
--- a/extensions/rv32im/circuit/Cargo.toml
+++ b/extensions/rv32im/circuit/Cargo.toml
@@ -21,15 +21,16 @@ derive-new.workspace = true
 derive_more = { workspace = true, features = ["from"] }
 rand.workspace = true
 eyre.workspace = true
+
 # for div_rem:
 num-bigint.workspace = true
 num-integer.workspace = true
 serde = { workspace = true, features = ["derive", "std"] }
-serde-big-array.workspace = true
 
 [dev-dependencies]
 openvm-stark-sdk = { workspace = true }
 openvm-circuit = { workspace = true, features = ["test-utils"] }
+test-case.workspace = true
 
 [features]
 default = ["parallel", "jemalloc"]
diff --git a/extensions/rv32im/circuit/src/adapters/alu.rs b/extensions/rv32im/circuit/src/adapters/alu.rs
index b61e2a224a..08cfa31b08 100644
--- a/extensions/rv32im/circuit/src/adapters/alu.rs
+++ b/extensions/rv32im/circuit/src/adapters/alu.rs
@@ -1,25 +1,23 @@
-use std::{
-    borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
-};
+use std::borrow::{Borrow, BorrowMut};
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, MinimalInstruction, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+    system::memory::{
+        offline_checker::{
+            MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+            MemoryWriteBytesAuxRecord,
         },
-        program::ProgramBus,
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     utils::not,
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
@@ -32,60 +30,10 @@ use openvm_stark_backend::{
     p3_air::{AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
-
-use super::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
-
-/// Reads instructions of the form OP a, b, c, d, e where \[a:4\]_d = \[b:4\]_d op \[c:4\]_e.
-/// Operand d can only be 1, and e can be either 1 (for register reads) or 0 (when c
-/// is an immediate).
-pub struct Rv32BaseAluAdapterChip<F: Field> {
-    pub air: Rv32BaseAluAdapterAir,
-    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32> Rv32BaseAluAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-    ) -> Self {
-        Self {
-            air: Rv32BaseAluAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                bitwise_lookup_bus: bitwise_lookup_chip.bus(),
-            },
-            bitwise_lookup_chip,
-            _marker: PhantomData,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct Rv32BaseAluReadRecord<F: Field> {
-    /// Read register value from address space d=1
-    pub rs1: RecordId,
-    /// Either
-    /// - read rs2 register value or
-    /// - if `rs2_is_imm` is true, this is None
-    pub rs2: Option<RecordId>,
-    /// immediate value of rs2 or 0
-    pub rs2_imm: F,
-}
 
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct Rv32BaseAluWriteRecord<F: Field> {
-    pub from_state: ExecutionState<u32>,
-    /// Write to destination register
-    pub rd: (RecordId, [F; 4]),
-}
+use super::{
+    tracing_read, tracing_read_imm, tracing_write, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+};
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -101,7 +49,9 @@ pub struct Rv32BaseAluAdapterCols<T> {
     pub writes_aux: MemoryWriteAuxCols<T, RV32_REGISTER_NUM_LIMBS>,
 }
 
-#[allow(dead_code)]
+/// Reads instructions of the form OP a, b, c, d, e where \[a:4\]_d = \[b:4\]_d op \[c:4\]_e.
+/// Operand d can only be 1, and e can be either 1 (for register reads) or 0 (when c
+/// is an immediate).
 #[derive(Clone, Copy, Debug, derive_new::new)]
 pub struct Rv32BaseAluAdapterAir {
     pub(super) execution_bridge: ExecutionBridge,
@@ -213,129 +163,169 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for Rv32BaseAluAdapterAir {
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for Rv32BaseAluAdapterChip<F> {
-    type ReadRecord = Rv32BaseAluReadRecord<F>;
-    type WriteRecord = Rv32BaseAluWriteRecord<F>;
-    type Air = Rv32BaseAluAdapterAir;
-    type Interface = BasicAdapterInterface<
-        F,
-        MinimalInstruction<F>,
-        2,
-        1,
-        RV32_REGISTER_NUM_LIMBS,
-        RV32_REGISTER_NUM_LIMBS,
-    >;
+#[derive(Clone, derive_new::new)]
+pub struct Rv32BaseAluAdapterExecutor<const LIMB_BITS: usize>;
+
+#[derive(derive_new::new)]
+pub struct Rv32BaseAluAdapterFiller<const LIMB_BITS: usize> {
+    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
+}
+
+// Intermediate type that should not be copied or cloned and should be directly written to
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32BaseAluAdapterRecord {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+
+    pub rd_ptr: u32,
+    pub rs1_ptr: u32,
+    /// Pointer if rs2 was a read, immediate value otherwise
+    pub rs2: u32,
+    /// 1 if rs2 was a read, 0 if an immediate
+    pub rs2_as: u8,
+
+    pub reads_aux: [MemoryReadAuxRecord; 2],
+    pub writes_aux: MemoryWriteBytesAuxRecord<RV32_REGISTER_NUM_LIMBS>,
+}
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+impl<F: PrimeField32, const LIMB_BITS: usize> AdapterTraceExecutor<F>
+    for Rv32BaseAluAdapterExecutor<LIMB_BITS>
+{
+    const WIDTH: usize = size_of::<Rv32BaseAluAdapterCols<u8>>();
+    type ReadData = [[u8; RV32_REGISTER_NUM_LIMBS]; 2];
+    type WriteData = [[u8; RV32_REGISTER_NUM_LIMBS]; 1];
+    type RecordMut<'a> = &'a mut Rv32BaseAluAdapterRecord;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut &mut Rv32BaseAluAdapterRecord) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
+    }
+
+    // @dev cannot get rid of double &mut due to trait
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { b, c, d, e, .. } = *instruction;
+        record: &mut &mut Rv32BaseAluAdapterRecord,
+    ) -> Self::ReadData {
+        let &Instruction { b, c, d, e, .. } = instruction;
 
         debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
         debug_assert!(
-            e.as_canonical_u32() == RV32_IMM_AS || e.as_canonical_u32() == RV32_REGISTER_AS
+            e.as_canonical_u32() == RV32_REGISTER_AS || e.as_canonical_u32() == RV32_IMM_AS
         );
 
-        let rs1 = memory.read::<RV32_REGISTER_NUM_LIMBS>(d, b);
-        let (rs2, rs2_data, rs2_imm) = if e.is_zero() {
-            let c_u32 = c.as_canonical_u32();
-            debug_assert_eq!(c_u32 >> 24, 0);
-            memory.increment_timestamp();
-            (
-                None,
-                [
-                    c_u32 as u8,
-                    (c_u32 >> 8) as u8,
-                    (c_u32 >> 16) as u8,
-                    (c_u32 >> 16) as u8,
-                ]
-                .map(F::from_canonical_u8),
-                c,
+        record.rs1_ptr = b.as_canonical_u32();
+        let rs1 = tracing_read(
+            memory,
+            RV32_REGISTER_AS,
+            record.rs1_ptr,
+            &mut record.reads_aux[0].prev_timestamp,
+        );
+
+        let rs2 = if e.as_canonical_u32() == RV32_REGISTER_AS {
+            record.rs2_as = RV32_REGISTER_AS as u8;
+            record.rs2 = c.as_canonical_u32();
+
+            tracing_read(
+                memory,
+                RV32_REGISTER_AS,
+                record.rs2,
+                &mut record.reads_aux[1].prev_timestamp,
             )
         } else {
-            let rs2_read = memory.read::<RV32_REGISTER_NUM_LIMBS>(e, c);
-            (Some(rs2_read.0), rs2_read.1, F::ZERO)
+            record.rs2_as = RV32_IMM_AS as u8;
+
+            tracing_read_imm(memory, c.as_canonical_u32(), &mut record.rs2)
         };
 
-        Ok((
-            [rs1.1, rs2_data],
-            Self::ReadRecord {
-                rs1: rs1.0,
-                rs2,
-                rs2_imm,
-            },
-        ))
+        [rs1, rs2]
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn write(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction { a, d, .. } = instruction;
-        let rd = memory.write(*d, *a, output.writes[0]);
-
-        let timestamp_delta = memory.timestamp() - from_state.timestamp;
-        debug_assert!(
-            timestamp_delta == 3,
-            "timestamp delta is {}, expected 3",
-            timestamp_delta
-        );
+        data: Self::WriteData,
+        record: &mut &mut Rv32BaseAluAdapterRecord,
+    ) {
+        let &Instruction { a, d, .. } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
 
-        Ok((
-            ExecutionState {
-                pc: from_state.pc + DEFAULT_PC_STEP,
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state, rd },
-        ))
+        record.rd_ptr = a.as_canonical_u32();
+        tracing_write(
+            memory,
+            RV32_REGISTER_AS,
+            record.rd_ptr,
+            data[0],
+            &mut record.writes_aux.prev_timestamp,
+            &mut record.writes_aux.prev_data,
+        );
     }
+}
 
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
-    ) {
-        let row_slice: &mut Rv32BaseAluAdapterCols<_> = row_slice.borrow_mut();
-        let aux_cols_factory = memory.aux_cols_factory();
-
-        let rd = memory.record_by_id(write_record.rd.0);
-        row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
-        row_slice.rd_ptr = rd.pointer;
-
-        let rs1 = memory.record_by_id(read_record.rs1);
-        let rs2 = read_record.rs2.map(|rs2| memory.record_by_id(rs2));
-        row_slice.rs1_ptr = rs1.pointer;
-
-        if let Some(rs2) = rs2 {
-            row_slice.rs2 = rs2.pointer;
-            row_slice.rs2_as = rs2.address_space;
-            aux_cols_factory.generate_read_aux(rs1, &mut row_slice.reads_aux[0]);
-            aux_cols_factory.generate_read_aux(rs2, &mut row_slice.reads_aux[1]);
+impl<F: PrimeField32, const LIMB_BITS: usize> AdapterTraceFiller<F>
+    for Rv32BaseAluAdapterFiller<LIMB_BITS>
+{
+    const WIDTH: usize = size_of::<Rv32BaseAluAdapterCols<u8>>();
+
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        // SAFETY: the following is highly unsafe. We are going to cast `adapter_row` to a record
+        // buffer, and then do an _overlapping_ write to the `adapter_row` as a row of field
+        // elements. This requires:
+        // - Cols struct should be repr(C) and we write in reverse order (to ensure non-overlapping)
+        // - Do not overwrite any reference in `record` before it has already been used or moved
+        // - alignment of `F` must be >= alignment of Record (AlignedBytesBorrow will panic
+        //   otherwise)
+        let record: &Rv32BaseAluAdapterRecord =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut Rv32BaseAluAdapterCols<F> = adapter_row.borrow_mut();
+
+        // We must assign in reverse
+        const TIMESTAMP_DELTA: u32 = 2;
+        let mut timestamp = record.from_timestamp + TIMESTAMP_DELTA;
+
+        adapter_row
+            .writes_aux
+            .set_prev_data(record.writes_aux.prev_data.map(F::from_canonical_u8));
+        mem_helper.fill(
+            record.writes_aux.prev_timestamp,
+            timestamp,
+            adapter_row.writes_aux.as_mut(),
+        );
+        timestamp -= 1;
+
+        if record.rs2_as != 0 {
+            mem_helper.fill(
+                record.reads_aux[1].prev_timestamp,
+                timestamp,
+                adapter_row.reads_aux[1].as_mut(),
+            );
         } else {
-            row_slice.rs2 = read_record.rs2_imm;
-            row_slice.rs2_as = F::ZERO;
-            let rs2_imm = row_slice.rs2.as_canonical_u32();
+            mem_helper.fill_zero(adapter_row.reads_aux[1].as_mut());
+            let rs2_imm = record.rs2;
             let mask = (1 << RV32_CELL_BITS) - 1;
             self.bitwise_lookup_chip
                 .request_range(rs2_imm & mask, (rs2_imm >> 8) & mask);
-            aux_cols_factory.generate_read_aux(rs1, &mut row_slice.reads_aux[0]);
-            // row_slice.reads_aux[1] is disabled
         }
-        aux_cols_factory.generate_write_aux(rd, &mut row_slice.writes_aux);
-    }
+        timestamp -= 1;
+
+        mem_helper.fill(
+            record.reads_aux[0].prev_timestamp,
+            timestamp,
+            adapter_row.reads_aux[0].as_mut(),
+        );
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        adapter_row.rs2_as = F::from_canonical_u8(record.rs2_as);
+        adapter_row.rs2 = F::from_canonical_u32(record.rs2);
+        adapter_row.rs1_ptr = F::from_canonical_u32(record.rs1_ptr);
+        adapter_row.rd_ptr = F::from_canonical_u32(record.rd_ptr);
+        adapter_row.from_state.timestamp = F::from_canonical_u32(timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/rv32im/circuit/src/adapters/branch.rs b/extensions/rv32im/circuit/src/adapters/branch.rs
index 3e26f37f4c..3f891f0791 100644
--- a/extensions/rv32im/circuit/src/adapters/branch.rs
+++ b/extensions/rv32im/circuit/src/adapters/branch.rs
@@ -1,22 +1,17 @@
-use std::{
-    borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
-};
+use std::borrow::{Borrow, BorrowMut};
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, ImmInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, ImmInstruction, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
-        },
-        program::ProgramBus,
+    system::memory::{
+        offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord},
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
     instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS,
@@ -26,48 +21,9 @@ use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
 
 use super::RV32_REGISTER_NUM_LIMBS;
-
-/// Reads instructions of the form OP a, b, c, d, e where if(\[a:4\]_d op \[b:4\]_e) pc += c.
-/// Operands d and e can only be 1.
-#[derive(Debug)]
-pub struct Rv32BranchAdapterChip<F: Field> {
-    pub air: Rv32BranchAdapterAir,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32> Rv32BranchAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: Rv32BranchAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _marker: PhantomData,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct Rv32BranchReadRecord {
-    /// Read register value from address space d = 1
-    pub rs1: RecordId,
-    /// Read register value from address space e = 1
-    pub rs2: RecordId,
-}
-
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct Rv32BranchWriteRecord {
-    pub from_state: ExecutionState<u32>,
-}
+use crate::adapters::tracing_read;
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -149,80 +105,108 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for Rv32BranchAdapterAir {
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for Rv32BranchAdapterChip<F> {
-    type ReadRecord = Rv32BranchReadRecord;
-    type WriteRecord = Rv32BranchWriteRecord;
-    type Air = Rv32BranchAdapterAir;
-    type Interface = BasicAdapterInterface<F, ImmInstruction<F>, 2, 0, RV32_REGISTER_NUM_LIMBS, 0>;
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32BranchAdapterRecord {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+    pub rs1_ptr: u32,
+    pub rs2_ptr: u32,
+    pub reads_aux: [MemoryReadAuxRecord; 2],
+}
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+/// Reads instructions of the form OP a, b, c, d, e where if(\[a:4\]_d op \[b:4\]_e) pc += c.
+/// Operands d and e can only be 1.
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32BranchAdapterExecutor;
+
+#[derive(derive_new::new)]
+pub struct Rv32BranchAdapterFiller;
+
+impl<F> AdapterTraceExecutor<F> for Rv32BranchAdapterExecutor
+where
+    F: PrimeField32,
+{
+    const WIDTH: usize = size_of::<Rv32BranchAdapterCols<u8>>();
+    type ReadData = [[u8; RV32_REGISTER_NUM_LIMBS]; 2];
+    type WriteData = ();
+    type RecordMut<'a> = &'a mut Rv32BranchAdapterRecord;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut &mut Rv32BranchAdapterRecord) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
+    }
+
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { a, b, d, e, .. } = *instruction;
+        record: &mut &mut Rv32BranchAdapterRecord,
+    ) -> Self::ReadData {
+        let &Instruction { a, b, d, e, .. } = instruction;
 
         debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
         debug_assert_eq!(e.as_canonical_u32(), RV32_REGISTER_AS);
 
-        let rs1 = memory.read::<RV32_REGISTER_NUM_LIMBS>(d, a);
-        let rs2 = memory.read::<RV32_REGISTER_NUM_LIMBS>(e, b);
-
-        Ok((
-            [rs1.1, rs2.1],
-            Self::ReadRecord {
-                rs1: rs1.0,
-                rs2: rs2.0,
-            },
-        ))
-    }
-
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        _instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let timestamp_delta = memory.timestamp() - from_state.timestamp;
-        debug_assert!(
-            timestamp_delta == 2,
-            "timestamp delta is {}, expected 2",
-            timestamp_delta
+        record.rs1_ptr = a.as_canonical_u32();
+        let rs1 = tracing_read(
+            memory,
+            RV32_REGISTER_AS,
+            a.as_canonical_u32(),
+            &mut record.reads_aux[0].prev_timestamp,
+        );
+        record.rs2_ptr = b.as_canonical_u32();
+        let rs2 = tracing_read(
+            memory,
+            RV32_REGISTER_AS,
+            b.as_canonical_u32(),
+            &mut record.reads_aux[1].prev_timestamp,
         );
 
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state },
-        ))
+        [rs1, rs2]
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        _memory: &mut TracingMemory,
+        _instruction: &Instruction<F>,
+        _data: Self::WriteData,
+        _record: &mut Self::RecordMut<'_>,
     ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let row_slice: &mut Rv32BranchAdapterCols<_> = row_slice.borrow_mut();
-        row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
-        let rs1 = memory.record_by_id(read_record.rs1);
-        let rs2 = memory.record_by_id(read_record.rs2);
-        row_slice.rs1_ptr = rs1.pointer;
-        row_slice.rs2_ptr = rs2.pointer;
-        aux_cols_factory.generate_read_aux(rs1, &mut row_slice.reads_aux[0]);
-        aux_cols_factory.generate_read_aux(rs2, &mut row_slice.reads_aux[1]);
+        // This function is intentionally left empty
     }
+}
+
+impl<F: PrimeField32> AdapterTraceFiller<F> for Rv32BranchAdapterFiller {
+    const WIDTH: usize = size_of::<Rv32BranchAdapterCols<u8>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &Rv32BranchAdapterRecord =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut Rv32BranchAdapterCols<F> = adapter_row.borrow_mut();
+
+        // We must assign in reverse
+        let timestamp = record.from_timestamp;
+
+        mem_helper.fill(
+            record.reads_aux[1].prev_timestamp,
+            timestamp + 1,
+            adapter_row.reads_aux[1].as_mut(),
+        );
+
+        mem_helper.fill(
+            record.reads_aux[0].prev_timestamp,
+            timestamp,
+            adapter_row.reads_aux[0].as_mut(),
+        );
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.rs1_ptr = F::from_canonical_u32(record.rs1_ptr);
+        adapter_row.rs2_ptr = F::from_canonical_u32(record.rs2_ptr);
     }
 }
diff --git a/extensions/rv32im/circuit/src/adapters/jalr.rs b/extensions/rv32im/circuit/src/adapters/jalr.rs
index f7dbf623b8..c1b3434e83 100644
--- a/extensions/rv32im/circuit/src/adapters/jalr.rs
+++ b/extensions/rv32im/circuit/src/adapters/jalr.rs
@@ -1,23 +1,20 @@
-use std::{
-    borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
-};
+use std::borrow::{Borrow, BorrowMut};
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, Result, SignedImmInstruction, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, SignedImmInstruction, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+    system::memory::{
+        offline_checker::{
+            MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+            MemoryWriteBytesAuxRecord,
         },
-        program::ProgramBus,
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
-use openvm_circuit_primitives::utils::not;
+use openvm_circuit_primitives::{utils::not, AlignedBytesBorrow};
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
     instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS,
@@ -27,44 +24,9 @@ use openvm_stark_backend::{
     p3_air::{AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
 
 use super::RV32_REGISTER_NUM_LIMBS;
-
-// This adapter reads from [b:4]_d (rs1) and writes to [a:4]_d (rd)
-#[derive(Debug)]
-pub struct Rv32JalrAdapterChip<F: Field> {
-    pub air: Rv32JalrAdapterAir,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32> Rv32JalrAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: Rv32JalrAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _marker: PhantomData,
-        }
-    }
-}
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Rv32JalrReadRecord {
-    pub rs1: RecordId,
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Rv32JalrWriteRecord {
-    pub from_state: ExecutionState<u32>,
-    pub rd_id: Option<RecordId>,
-}
+use crate::adapters::{tracing_read, tracing_write};
 
 #[repr(C)]
 #[derive(Debug, Clone, AlignedBorrow)]
@@ -179,84 +141,126 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for Rv32JalrAdapterAir {
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for Rv32JalrAdapterChip<F> {
-    type ReadRecord = Rv32JalrReadRecord;
-    type WriteRecord = Rv32JalrWriteRecord;
-    type Air = Rv32JalrAdapterAir;
-    type Interface = BasicAdapterInterface<
-        F,
-        SignedImmInstruction<F>,
-        1,
-        1,
-        RV32_REGISTER_NUM_LIMBS,
-        RV32_REGISTER_NUM_LIMBS,
-    >;
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { b, d, .. } = *instruction;
-        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32JalrAdapterRecord {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+
+    pub rs1_ptr: u32,
+    // Will use u32::MAX to indicate no write
+    pub rd_ptr: u32,
+
+    pub reads_aux: MemoryReadAuxRecord,
+    pub writes_aux: MemoryWriteBytesAuxRecord<RV32_REGISTER_NUM_LIMBS>,
+}
 
-        let rs1 = memory.read::<RV32_REGISTER_NUM_LIMBS>(d, b);
+// This adapter reads from [b:4]_d (rs1) and writes to [a:4]_d (rd)
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32JalrAdapterExecutor;
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32JalrAdapterFiller;
 
-        Ok(([rs1.1], Rv32JalrReadRecord { rs1: rs1.0 }))
+impl<F> AdapterTraceExecutor<F> for Rv32JalrAdapterExecutor
+where
+    F: PrimeField32,
+{
+    const WIDTH: usize = size_of::<Rv32JalrAdapterCols<u8>>();
+    type ReadData = [u8; RV32_REGISTER_NUM_LIMBS];
+    type WriteData = [u8; RV32_REGISTER_NUM_LIMBS];
+    type RecordMut<'a> = &'a mut Rv32JalrAdapterRecord;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction {
-            a, d, f: enabled, ..
-        } = *instruction;
-        let rd_id = if enabled != F::ZERO {
-            let (record_id, _) = memory.write(d, a, output.writes[0]);
-            Some(record_id)
-        } else {
-            memory.increment_timestamp();
-            None
-        };
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let &Instruction { b, d, .. } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
 
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state, rd_id },
-        ))
+        record.rs1_ptr = b.as_canonical_u32();
+        tracing_read(
+            memory,
+            RV32_REGISTER_AS,
+            b.as_canonical_u32(),
+            &mut record.reads_aux.prev_timestamp,
+        )
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let adapter_cols: &mut Rv32JalrAdapterCols<_> = row_slice.borrow_mut();
-        adapter_cols.from_state = write_record.from_state.map(F::from_canonical_u32);
-        let rs1 = memory.record_by_id(read_record.rs1);
-        adapter_cols.rs1_ptr = rs1.pointer;
-        aux_cols_factory.generate_read_aux(rs1, &mut adapter_cols.rs1_aux_cols);
-        if let Some(id) = write_record.rd_id {
-            let rd = memory.record_by_id(id);
-            adapter_cols.rd_ptr = rd.pointer;
-            adapter_cols.needs_write = F::ONE;
-            aux_cols_factory.generate_write_aux(rd, &mut adapter_cols.rd_aux_cols);
+        let &Instruction {
+            a, d, f: enabled, ..
+        } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+
+        if enabled.is_one() {
+            record.rd_ptr = a.as_canonical_u32();
+
+            tracing_write(
+                memory,
+                RV32_REGISTER_AS,
+                a.as_canonical_u32(),
+                data,
+                &mut record.writes_aux.prev_timestamp,
+                &mut record.writes_aux.prev_data,
+            );
+        } else {
+            record.rd_ptr = u32::MAX;
+            memory.increment_timestamp();
         }
     }
+}
+
+impl<F: PrimeField32> AdapterTraceFiller<F> for Rv32JalrAdapterFiller {
+    const WIDTH: usize = size_of::<Rv32JalrAdapterCols<u8>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &Rv32JalrAdapterRecord = unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut Rv32JalrAdapterCols<F> = adapter_row.borrow_mut();
+
+        // We must assign in reverse
+        adapter_row.needs_write = F::from_bool(record.rd_ptr != u32::MAX);
+
+        if record.rd_ptr != u32::MAX {
+            adapter_row
+                .rd_aux_cols
+                .set_prev_data(record.writes_aux.prev_data.map(F::from_canonical_u8));
+            mem_helper.fill(
+                record.writes_aux.prev_timestamp,
+                record.from_timestamp + 1,
+                adapter_row.rd_aux_cols.as_mut(),
+            );
+            adapter_row.rd_ptr = F::from_canonical_u32(record.rd_ptr);
+        } else {
+            adapter_row.rd_ptr = F::ZERO;
+        }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        mem_helper.fill(
+            record.reads_aux.prev_timestamp,
+            record.from_timestamp,
+            adapter_row.rs1_aux_cols.as_mut(),
+        );
+        adapter_row.rs1_ptr = F::from_canonical_u32(record.rs1_ptr);
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/rv32im/circuit/src/adapters/loadstore.rs b/extensions/rv32im/circuit/src/adapters/loadstore.rs
index b92680a0c7..8e151789b5 100644
--- a/extensions/rv32im/circuit/src/adapters/loadstore.rs
+++ b/extensions/rv32im/circuit/src/adapters/loadstore.rs
@@ -1,34 +1,36 @@
 use std::{
-    array,
     borrow::{Borrow, BorrowMut},
     marker::PhantomData,
 };
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, ExecutionBridge, ExecutionBus, ExecutionState,
-        Result, VmAdapterAir, VmAdapterChip, VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        ExecutionBridge, ExecutionState, VmAdapterAir, VmAdapterInterface,
     },
     system::{
         memory::{
             offline_checker::{
-                MemoryBaseAuxCols, MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols,
+                MemoryBaseAuxCols, MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord,
+                MemoryWriteAuxCols,
             },
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+            online::TracingMemory,
+            MemoryAddress, MemoryAuxColsFactory,
         },
-        program::ProgramBus,
+        native_adapter::util::{memory_read_native, timed_write_native},
     },
 };
 use openvm_circuit_primitives::{
     utils::{not, select},
     var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
     instruction::Instruction,
     program::DEFAULT_PC_STEP,
-    riscv::{RV32_IMM_AS, RV32_REGISTER_AS},
-    LocalOpcode,
+    riscv::{RV32_IMM_AS, RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode, NATIVE_AS,
 };
 use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *};
 use openvm_stark_backend::{
@@ -36,10 +38,9 @@ use openvm_stark_backend::{
     p3_air::{AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
 
-use super::{compose, RV32_REGISTER_NUM_LIMBS};
-use crate::adapters::RV32_CELL_BITS;
+use super::RV32_REGISTER_NUM_LIMBS;
+use crate::adapters::{memory_read, timed_write, tracing_read, RV32_CELL_BITS};
 
 /// LoadStore Adapter handles all memory and register operations, so it must be aware
 /// of the instruction type, specifically whether it is a load or store
@@ -64,22 +65,6 @@ pub struct LoadStoreInstruction<T> {
     pub store_shift_amount: T,
 }
 
-/// The LoadStoreAdapter separates Runtime and Air AdapterInterfaces.
-/// This is necessary because `prev_data` should be owned by the core chip and sent to the adapter,
-/// and it must have an AB::Var type in AIR as to satisfy the memory_bridge interface.
-/// This is achieved by having different types for reads and writes in Air AdapterInterface.
-/// This method ensures that there are no modifications to the global interfaces.
-///
-/// Here 2 reads represent read_data and prev_data,
-/// The second element of the tuple in Reads is the shift amount needed to be passed to the core
-/// chip Getting the intermediate pointer is completely internal to the adapter and shouldn't be a
-/// part of the AdapterInterface
-pub struct Rv32LoadStoreAdapterRuntimeInterface<T>(PhantomData<T>);
-impl<T> VmAdapterInterface<T> for Rv32LoadStoreAdapterRuntimeInterface<T> {
-    type Reads = ([[T; RV32_REGISTER_NUM_LIMBS]; 2], T);
-    type Writes = [[T; RV32_REGISTER_NUM_LIMBS]; 1];
-    type ProcessedInstruction = ();
-}
 pub struct Rv32LoadStoreAdapterAirInterface<AB: InteractionBuilder>(PhantomData<AB>);
 
 /// Using AB::Var for prev_data and AB::Expr for read_data
@@ -92,65 +77,6 @@ impl<AB: InteractionBuilder> VmAdapterInterface<AB::Expr> for Rv32LoadStoreAdapt
     type ProcessedInstruction = LoadStoreInstruction<AB::Expr>;
 }
 
-/// This chip reads rs1 and gets a intermediate memory pointer address with rs1 + imm.
-/// In case of Loads, reads from the shifted intermediate pointer and writes to rd.
-/// In case of Stores, reads from rs2 and writes to the shifted intermediate pointer.
-pub struct Rv32LoadStoreAdapterChip<F: Field> {
-    pub air: Rv32LoadStoreAdapterAir,
-    pub range_checker_chip: SharedVariableRangeCheckerChip,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32> Rv32LoadStoreAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-        pointer_max_bits: usize,
-        range_checker_chip: SharedVariableRangeCheckerChip,
-    ) -> Self {
-        assert!(range_checker_chip.range_max_bits() >= 15);
-        Self {
-            air: Rv32LoadStoreAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                range_bus: range_checker_chip.bus(),
-                pointer_max_bits,
-            },
-            range_checker_chip,
-            _marker: PhantomData,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct Rv32LoadStoreReadRecord<F: Field> {
-    pub rs1_record: RecordId,
-    /// This will be a read from a register in case of Stores and a read from RISC-V memory in case
-    /// of Loads.
-    pub read: RecordId,
-    pub rs1_ptr: F,
-    pub imm: F,
-    pub imm_sign: F,
-    pub mem_as: F,
-    pub mem_ptr_limbs: [u32; 2],
-    pub shift_amount: u32,
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct Rv32LoadStoreWriteRecord<F: Field> {
-    /// This will be a write to a register in case of Load and a write to RISC-V memory in case of
-    /// Stores. For better struct packing, `RecordId(usize::MAX)` is used to indicate that
-    /// there is no write.
-    pub write_id: RecordId,
-    pub from_state: ExecutionState<u32>,
-    pub rd_rs2_ptr: F,
-}
-
 #[repr(C)]
 #[derive(Debug, Clone, AlignedBorrow)]
 pub struct Rv32LoadStoreAdapterCols<T> {
@@ -366,22 +292,69 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for Rv32LoadStoreAdapterAir {
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for Rv32LoadStoreAdapterChip<F> {
-    type ReadRecord = Rv32LoadStoreReadRecord<F>;
-    type WriteRecord = Rv32LoadStoreWriteRecord<F>;
-    type Air = Rv32LoadStoreAdapterAir;
-    type Interface = Rv32LoadStoreAdapterRuntimeInterface<F>;
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32LoadStoreAdapterRecord {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
 
-    #[allow(clippy::type_complexity)]
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    pub rs1_ptr: u32,
+    pub rs1_val: u32,
+    pub rs1_aux_record: MemoryReadAuxRecord,
+
+    pub rd_rs2_ptr: u32,
+    pub read_data_aux: MemoryReadAuxRecord,
+    pub imm: u16,
+    pub imm_sign: bool,
+
+    pub mem_as: u8,
+
+    pub write_prev_timestamp: u32,
+}
+
+/// This chip reads rs1 and gets a intermediate memory pointer address with rs1 + imm.
+/// In case of Loads, reads from the shifted intermediate pointer and writes to rd.
+/// In case of Stores, reads from rs2 and writes to the shifted intermediate pointer.
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32LoadStoreAdapterExecutor {
+    pointer_max_bits: usize,
+}
+
+#[derive(derive_new::new)]
+pub struct Rv32LoadStoreAdapterFiller {
+    pointer_max_bits: usize,
+    pub range_checker_chip: SharedVariableRangeCheckerChip,
+}
+
+impl<F> AdapterTraceExecutor<F> for Rv32LoadStoreAdapterExecutor
+where
+    F: PrimeField32,
+{
+    const WIDTH: usize = size_of::<Rv32LoadStoreAdapterCols<u8>>();
+    type ReadData = (
+        (
+            [u32; RV32_REGISTER_NUM_LIMBS],
+            [u8; RV32_REGISTER_NUM_LIMBS],
+        ),
+        u8,
+    );
+    type WriteData = [u32; RV32_REGISTER_NUM_LIMBS];
+    type RecordMut<'a> = &'a mut Rv32LoadStoreAdapterRecord;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
+    }
+
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction {
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let &Instruction {
             opcode,
             a,
             b,
@@ -390,154 +363,193 @@ impl<F: PrimeField32> VmAdapterChip<F> for Rv32LoadStoreAdapterChip<F> {
             e,
             g,
             ..
-        } = *instruction;
+        } = instruction;
+
         debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
-        debug_assert!(e.as_canonical_u32() != RV32_IMM_AS);
 
         let local_opcode = Rv32LoadStoreOpcode::from_usize(
             opcode.local_opcode_idx(Rv32LoadStoreOpcode::CLASS_OFFSET),
         );
-        let rs1_record = memory.read::<RV32_REGISTER_NUM_LIMBS>(d, b);
 
-        let rs1_val = compose(rs1_record.1);
-        let imm = c.as_canonical_u32();
-        let imm_sign = g.as_canonical_u32();
-        let imm_extended = imm + imm_sign * 0xffff0000;
+        record.rs1_ptr = b.as_canonical_u32();
+        record.rs1_val = u32::from_le_bytes(tracing_read(
+            memory,
+            RV32_REGISTER_AS,
+            record.rs1_ptr,
+            &mut record.rs1_aux_record.prev_timestamp,
+        ));
+
+        record.imm = c.as_canonical_u32() as u16;
+        record.imm_sign = g.is_one();
+        let imm_extended = record.imm as u32 + record.imm_sign as u32 * 0xffff0000;
+
+        let ptr_val = record.rs1_val.wrapping_add(imm_extended);
+        let shift_amount = ptr_val & 3;
+        let ptr_val = ptr_val - shift_amount;
 
-        let ptr_val = rs1_val.wrapping_add(imm_extended);
-        let shift_amount = ptr_val % 4;
         assert!(
-            ptr_val < (1 << self.air.pointer_max_bits),
-            "ptr_val: {ptr_val} = rs1_val: {rs1_val} + imm_extended: {imm_extended} >= 2 ** {}",
-            self.air.pointer_max_bits
+            ptr_val < (1 << self.pointer_max_bits),
+            "ptr_val: {ptr_val} = rs1_val: {} + imm_extended: {imm_extended} >= 2 ** {}",
+            record.rs1_val,
+            self.pointer_max_bits
         );
 
-        let mem_ptr_limbs = array::from_fn(|i| ((ptr_val >> (i * (RV32_CELL_BITS * 2))) & 0xffff));
-
-        let ptr_val = ptr_val - shift_amount;
-        let read_record = match local_opcode {
+        // prev_data: We need to keep values of some cells to keep them unchanged when writing to
+        // those cells
+        let (read_data, prev_data) = match local_opcode {
             LOADW | LOADB | LOADH | LOADBU | LOADHU => {
-                memory.read::<RV32_REGISTER_NUM_LIMBS>(e, F::from_canonical_u32(ptr_val))
+                debug_assert_eq!(e, F::from_canonical_u32(RV32_MEMORY_AS));
+                record.mem_as = RV32_MEMORY_AS as u8;
+                let read_data = tracing_read(
+                    memory,
+                    RV32_MEMORY_AS,
+                    ptr_val,
+                    &mut record.read_data_aux.prev_timestamp,
+                );
+                let prev_data = memory_read(memory.data(), RV32_REGISTER_AS, a.as_canonical_u32())
+                    .map(u32::from);
+                (read_data, prev_data)
             }
-            STOREW | STOREH | STOREB => memory.read::<RV32_REGISTER_NUM_LIMBS>(d, a),
-        };
-
-        // We need to keep values of some cells to keep them unchanged when writing to those cells
-        let prev_data = match local_opcode {
-            STOREW | STOREH | STOREB => array::from_fn(|i| {
-                memory.unsafe_read_cell(e, F::from_canonical_usize(ptr_val as usize + i))
-            }),
-            LOADW | LOADB | LOADH | LOADBU | LOADHU => {
-                array::from_fn(|i| memory.unsafe_read_cell(d, a + F::from_canonical_usize(i)))
+            STOREW | STOREH | STOREB => {
+                let e = e.as_canonical_u32();
+                debug_assert_ne!(e, RV32_IMM_AS);
+                debug_assert_ne!(e, RV32_REGISTER_AS);
+                record.mem_as = e as u8;
+                let read_data = tracing_read(
+                    memory,
+                    RV32_REGISTER_AS,
+                    a.as_canonical_u32(),
+                    &mut record.read_data_aux.prev_timestamp,
+                );
+                let prev_data = if e == NATIVE_AS {
+                    memory_read_native(memory.data(), ptr_val).map(|x: F| x.as_canonical_u32())
+                } else {
+                    memory_read(memory.data(), e, ptr_val).map(u32::from)
+                };
+                (read_data, prev_data)
             }
         };
 
-        Ok((
-            (
-                [prev_data, read_record.1],
-                F::from_canonical_u32(shift_amount),
-            ),
-            Self::ReadRecord {
-                rs1_record: rs1_record.0,
-                rs1_ptr: b,
-                read: read_record.0,
-                imm: c,
-                imm_sign: g,
-                shift_amount,
-                mem_ptr_limbs,
-                mem_as: e,
-            },
-        ))
+        ((prev_data, read_data), shift_amount as u8)
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn write(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction {
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
+    ) {
+        let &Instruction {
             opcode,
             a,
             d,
             e,
             f: enabled,
             ..
-        } = *instruction;
+        } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+        debug_assert_ne!(e.as_canonical_u32(), RV32_IMM_AS);
+        debug_assert_ne!(e.as_canonical_u32(), RV32_REGISTER_AS);
 
         let local_opcode = Rv32LoadStoreOpcode::from_usize(
             opcode.local_opcode_idx(Rv32LoadStoreOpcode::CLASS_OFFSET),
         );
 
-        let write_id = if enabled != F::ZERO {
-            let (record_id, _) = match local_opcode {
+        if enabled != F::ZERO {
+            record.rd_rs2_ptr = a.as_canonical_u32();
+
+            record.write_prev_timestamp = match local_opcode {
                 STOREW | STOREH | STOREB => {
-                    let ptr = read_record.mem_ptr_limbs[0]
-                        + read_record.mem_ptr_limbs[1] * (1 << (RV32_CELL_BITS * 2));
-                    memory.write(e, F::from_canonical_u32(ptr & 0xfffffffc), output.writes[0])
+                    let imm_extended = record.imm as u32 + record.imm_sign as u32 * 0xffff0000;
+                    let ptr = record.rs1_val.wrapping_add(imm_extended) & !3;
+
+                    if record.mem_as == 4 {
+                        timed_write_native(memory, ptr, data.map(F::from_canonical_u32)).0
+                    } else {
+                        timed_write(memory, record.mem_as as u32, ptr, data.map(|x| x as u8)).0
+                    }
+                }
+                LOADW | LOADB | LOADH | LOADBU | LOADHU => {
+                    timed_write(
+                        memory,
+                        RV32_REGISTER_AS,
+                        record.rd_rs2_ptr,
+                        data.map(|x| x as u8),
+                    )
+                    .0
                 }
-                LOADW | LOADB | LOADH | LOADBU | LOADHU => memory.write(d, a, output.writes[0]),
             };
-            record_id
         } else {
+            record.rd_rs2_ptr = u32::MAX;
             memory.increment_timestamp();
-            // RecordId will never get to usize::MAX, so it can be used as a flag for no write
-            RecordId(usize::MAX)
         };
-
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord {
-                from_state,
-                write_id,
-                rd_rs2_ptr: a,
-            },
-        ))
     }
+}
 
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
-    ) {
-        self.range_checker_chip.add_count(
-            (read_record.mem_ptr_limbs[0] - read_record.shift_amount) / 4,
-            RV32_CELL_BITS * 2 - 2,
+impl<F: PrimeField32> AdapterTraceFiller<F> for Rv32LoadStoreAdapterFiller {
+    const WIDTH: usize = size_of::<Rv32LoadStoreAdapterCols<u8>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        debug_assert!(self.range_checker_chip.range_max_bits() >= 15);
+
+        let record: &Rv32LoadStoreAdapterRecord =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut Rv32LoadStoreAdapterCols<F> = adapter_row.borrow_mut();
+
+        let needs_write = record.rd_rs2_ptr != u32::MAX;
+        // Writing in reverse order
+        adapter_row.needs_write = F::from_bool(needs_write);
+
+        if needs_write {
+            mem_helper.fill(
+                record.write_prev_timestamp,
+                record.from_timestamp + 2,
+                &mut adapter_row.write_base_aux,
+            );
+        } else {
+            mem_helper.fill_zero(&mut adapter_row.write_base_aux);
+        }
+
+        adapter_row.mem_as = F::from_canonical_u8(record.mem_as);
+        let ptr = record
+            .rs1_val
+            .wrapping_add(record.imm as u32 + record.imm_sign as u32 * 0xffff0000);
+
+        let ptr_limbs = [ptr & 0xffff, ptr >> 16];
+        self.range_checker_chip
+            .add_count(ptr_limbs[0] >> 2, RV32_CELL_BITS * 2 - 2);
+        self.range_checker_chip
+            .add_count(ptr_limbs[1], self.pointer_max_bits - 16);
+        adapter_row.mem_ptr_limbs = ptr_limbs.map(F::from_canonical_u32);
+
+        adapter_row.imm_sign = F::from_bool(record.imm_sign);
+        adapter_row.imm = F::from_canonical_u16(record.imm);
+
+        mem_helper.fill(
+            record.read_data_aux.prev_timestamp,
+            record.from_timestamp + 1,
+            adapter_row.read_data_aux.as_mut(),
         );
-        self.range_checker_chip.add_count(
-            read_record.mem_ptr_limbs[1],
-            self.air.pointer_max_bits - RV32_CELL_BITS * 2,
+        adapter_row.rd_rs2_ptr = if record.rd_rs2_ptr != u32::MAX {
+            F::from_canonical_u32(record.rd_rs2_ptr)
+        } else {
+            F::ZERO
+        };
+
+        mem_helper.fill(
+            record.rs1_aux_record.prev_timestamp,
+            record.from_timestamp,
+            adapter_row.rs1_aux_cols.as_mut(),
         );
 
-        let aux_cols_factory = memory.aux_cols_factory();
-        let adapter_cols: &mut Rv32LoadStoreAdapterCols<_> = row_slice.borrow_mut();
-        adapter_cols.from_state = write_record.from_state.map(F::from_canonical_u32);
-        let rs1 = memory.record_by_id(read_record.rs1_record);
-        adapter_cols.rs1_data.copy_from_slice(rs1.data_slice());
-        aux_cols_factory.generate_read_aux(rs1, &mut adapter_cols.rs1_aux_cols);
-        adapter_cols.rs1_ptr = read_record.rs1_ptr;
-        adapter_cols.rd_rs2_ptr = write_record.rd_rs2_ptr;
-        let read = memory.record_by_id(read_record.read);
-        aux_cols_factory.generate_read_aux(read, &mut adapter_cols.read_data_aux);
-        adapter_cols.imm = read_record.imm;
-        adapter_cols.imm_sign = read_record.imm_sign;
-        adapter_cols.mem_ptr_limbs = read_record.mem_ptr_limbs.map(F::from_canonical_u32);
-        adapter_cols.mem_as = read_record.mem_as;
-        if write_record.write_id.0 != usize::MAX {
-            let write = memory.record_by_id(write_record.write_id);
-            aux_cols_factory.generate_base_aux(write, &mut adapter_cols.write_base_aux);
-            adapter_cols.needs_write = F::ONE;
-        }
-    }
+        adapter_row.rs1_data = record.rs1_val.to_le_bytes().map(F::from_canonical_u8);
+        adapter_row.rs1_ptr = F::from_canonical_u32(record.rs1_ptr);
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/rv32im/circuit/src/adapters/mod.rs b/extensions/rv32im/circuit/src/adapters/mod.rs
index ab15671b74..b07e2449ba 100644
--- a/extensions/rv32im/circuit/src/adapters/mod.rs
+++ b/extensions/rv32im/circuit/src/adapters/mod.rs
@@ -1,6 +1,13 @@
 use std::ops::Mul;
 
-use openvm_circuit::system::memory::{MemoryController, RecordId};
+use openvm_circuit::{
+    arch::{execution_mode::ExecutionCtxTrait, VmStateMut},
+    system::memory::{
+        merkle::public_values::PUBLIC_VALUES_AS,
+        online::{GuestMemory, TracingMemory},
+    },
+};
+use openvm_instructions::riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS};
 use openvm_stark_backend::p3_field::{FieldAlgebra, PrimeField32};
 
 mod alu;
@@ -46,25 +53,177 @@ pub fn decompose<F: PrimeField32>(value: u32) -> [F; RV32_REGISTER_NUM_LIMBS] {
     })
 }
 
-/// Read register value as [RV32_REGISTER_NUM_LIMBS] limbs from memory.
-/// Returns the read record and the register value as u32.
-/// Does not make any range check calls.
-pub fn read_rv32_register<F: PrimeField32>(
-    memory: &mut MemoryController<F>,
-    address_space: F,
-    pointer: F,
-) -> (RecordId, u32) {
-    debug_assert_eq!(address_space, F::ONE);
-    let record = memory.read::<RV32_REGISTER_NUM_LIMBS>(address_space, pointer);
-    let val = compose(record.1);
-    (record.0, val)
+#[inline(always)]
+pub fn imm_to_bytes(imm: u32) -> [u8; RV32_REGISTER_NUM_LIMBS] {
+    debug_assert_eq!(imm >> 24, 0);
+    let mut imm_le = imm.to_le_bytes();
+    imm_le[3] = imm_le[2];
+    imm_le
 }
 
-/// Peeks at the value of a register without updating the memory state or incrementing the
-/// timestamp.
-pub fn unsafe_read_rv32_register<F: PrimeField32>(memory: &MemoryController<F>, pointer: F) -> u32 {
-    let data = memory.unsafe_read::<RV32_REGISTER_NUM_LIMBS>(F::ONE, pointer);
-    compose(data)
+#[inline(always)]
+pub fn memory_read<const N: usize>(memory: &GuestMemory, address_space: u32, ptr: u32) -> [u8; N] {
+    debug_assert!(
+        address_space == RV32_REGISTER_AS
+            || address_space == RV32_MEMORY_AS
+            || address_space == PUBLIC_VALUES_AS,
+    );
+
+    // SAFETY:
+    // - address space `RV32_REGISTER_AS` and `RV32_MEMORY_AS` will always have cell type `u8` and
+    //   minimum alignment of `RV32_REGISTER_NUM_LIMBS`
+    unsafe { memory.read::<u8, N>(address_space, ptr) }
+}
+
+#[inline(always)]
+pub fn memory_write<const N: usize>(
+    memory: &mut GuestMemory,
+    address_space: u32,
+    ptr: u32,
+    data: [u8; N],
+) {
+    debug_assert!(
+        address_space == RV32_REGISTER_AS
+            || address_space == RV32_MEMORY_AS
+            || address_space == PUBLIC_VALUES_AS
+    );
+
+    // SAFETY:
+    // - address space `RV32_REGISTER_AS` and `RV32_MEMORY_AS` will always have cell type `u8` and
+    //   minimum alignment of `RV32_REGISTER_NUM_LIMBS`
+    unsafe { memory.write::<u8, N>(address_space, ptr, data) }
+}
+
+/// Atomic read operation which increments the timestamp by 1.
+/// Returns `(t_prev, [ptr:4]_{address_space})` where `t_prev` is the timestamp of the last memory
+/// access.
+#[inline(always)]
+pub fn timed_read<const N: usize>(
+    memory: &mut TracingMemory,
+    address_space: u32,
+    ptr: u32,
+) -> (u32, [u8; N]) {
+    debug_assert!(
+        address_space == RV32_REGISTER_AS
+            || address_space == RV32_MEMORY_AS
+            || address_space == PUBLIC_VALUES_AS
+    );
+
+    // SAFETY:
+    // - address space `RV32_REGISTER_AS` and `RV32_MEMORY_AS` will always have cell type `u8` and
+    //   minimum alignment of `RV32_REGISTER_NUM_LIMBS`
+    unsafe { memory.read::<u8, N, RV32_REGISTER_NUM_LIMBS>(address_space, ptr) }
+}
+
+#[inline(always)]
+pub fn timed_write<const N: usize>(
+    memory: &mut TracingMemory,
+    address_space: u32,
+    ptr: u32,
+    data: [u8; N],
+) -> (u32, [u8; N]) {
+    debug_assert!(
+        address_space == RV32_REGISTER_AS
+            || address_space == RV32_MEMORY_AS
+            || address_space == PUBLIC_VALUES_AS
+    );
+
+    // SAFETY:
+    // - address space `RV32_REGISTER_AS` and `RV32_MEMORY_AS` will always have cell type `u8` and
+    //   minimum alignment of `RV32_REGISTER_NUM_LIMBS`
+    unsafe { memory.write::<u8, N, RV32_REGISTER_NUM_LIMBS>(address_space, ptr, data) }
+}
+
+/// Reads register value at `reg_ptr` from memory and records the memory access in mutable buffer.
+/// Trace generation relevant to this memory access can be done fully from the recorded buffer.
+#[inline(always)]
+pub fn tracing_read<const N: usize>(
+    memory: &mut TracingMemory,
+    address_space: u32,
+    ptr: u32,
+    prev_timestamp: &mut u32,
+) -> [u8; N] {
+    let (t_prev, data) = timed_read(memory, address_space, ptr);
+    *prev_timestamp = t_prev;
+    data
+}
+
+#[inline(always)]
+pub fn tracing_read_imm(
+    memory: &mut TracingMemory,
+    imm: u32,
+    imm_mut: &mut u32,
+) -> [u8; RV32_REGISTER_NUM_LIMBS] {
+    *imm_mut = imm;
+    debug_assert_eq!(imm >> 24, 0); // highest byte should be zero to prevent overflow
+
+    memory.increment_timestamp();
+
+    let mut imm_le = imm.to_le_bytes();
+    // Important: we set the highest byte equal to the second highest byte, using the assumption
+    // that imm is at most 24 bits
+    imm_le[3] = imm_le[2];
+    imm_le
+}
+
+/// Writes `reg_ptr, reg_val` into memory and records the memory access in mutable buffer.
+/// Trace generation relevant to this memory access can be done fully from the recorded buffer.
+#[inline(always)]
+pub fn tracing_write<const N: usize>(
+    memory: &mut TracingMemory,
+    address_space: u32,
+    ptr: u32,
+    data: [u8; N],
+    prev_timestamp: &mut u32,
+    prev_data: &mut [u8; N],
+) {
+    let (t_prev, data_prev) = timed_write(memory, address_space, ptr, data);
+    *prev_timestamp = t_prev;
+    *prev_data = data_prev;
+}
+
+#[inline(always)]
+pub fn memory_read_from_state<F, Ctx, const N: usize>(
+    state: &mut VmStateMut<F, GuestMemory, Ctx>,
+    address_space: u32,
+    ptr: u32,
+) -> [u8; N]
+where
+    Ctx: ExecutionCtxTrait,
+{
+    state.ctx.on_memory_operation(address_space, ptr, N as u32);
+
+    memory_read(state.memory, address_space, ptr)
+}
+
+#[inline(always)]
+pub fn memory_write_from_state<F, Ctx, const N: usize>(
+    state: &mut VmStateMut<F, GuestMemory, Ctx>,
+    address_space: u32,
+    ptr: u32,
+    data: [u8; N],
+) where
+    Ctx: ExecutionCtxTrait,
+{
+    state.ctx.on_memory_operation(address_space, ptr, N as u32);
+
+    memory_write(state.memory, address_space, ptr, data)
+}
+
+#[inline(always)]
+pub fn read_rv32_register_from_state<F, Ctx>(
+    state: &mut VmStateMut<F, GuestMemory, Ctx>,
+    ptr: u32,
+) -> u32
+where
+    Ctx: ExecutionCtxTrait,
+{
+    u32::from_le_bytes(memory_read_from_state(state, RV32_REGISTER_AS, ptr))
+}
+
+#[inline(always)]
+pub fn read_rv32_register(memory: &GuestMemory, ptr: u32) -> u32 {
+    u32::from_le_bytes(memory_read(memory, RV32_REGISTER_AS, ptr))
 }
 
 pub fn abstract_compose<T: FieldAlgebra, V: Mul<T, Output = T>>(
@@ -76,3 +235,8 @@ pub fn abstract_compose<T: FieldAlgebra, V: Mul<T, Output = T>>(
             acc + limb * T::from_canonical_u32(1 << (i * RV32_CELL_BITS))
         })
 }
+
+// TEMP[jpw]
+pub fn tmp_convert_to_u8s<F: PrimeField32, const N: usize>(data: [F; N]) -> [u8; N] {
+    data.map(|x| x.as_canonical_u32() as u8)
+}
diff --git a/extensions/rv32im/circuit/src/adapters/mul.rs b/extensions/rv32im/circuit/src/adapters/mul.rs
index a82e83acaa..f0a8281c22 100644
--- a/extensions/rv32im/circuit/src/adapters/mul.rs
+++ b/extensions/rv32im/circuit/src/adapters/mul.rs
@@ -1,22 +1,20 @@
-use std::{
-    borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
-};
+use std::borrow::{Borrow, BorrowMut};
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, MinimalInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, MinimalInstruction, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
+    system::memory::{
+        offline_checker::{
+            MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+            MemoryWriteBytesAuxRecord,
         },
-        program::ProgramBus,
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
+use openvm_circuit_primitives::AlignedBytesBorrow;
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
     instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS,
@@ -26,49 +24,9 @@ use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
-
-use super::RV32_REGISTER_NUM_LIMBS;
-
-/// Reads instructions of the form OP a, b, c, d where \[a:4\]_d = \[b:4\]_d op \[c:4\]_d.
-/// Operand d can only be 1, and there is no immediate support.
-#[derive(Debug)]
-pub struct Rv32MultAdapterChip<F: Field> {
-    pub air: Rv32MultAdapterAir,
-    _marker: PhantomData<F>,
-}
-
-impl<F: PrimeField32> Rv32MultAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: Rv32MultAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _marker: PhantomData,
-        }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct Rv32MultReadRecord {
-    /// Reads from operand registers
-    pub rs1: RecordId,
-    pub rs2: RecordId,
-}
 
-#[repr(C)]
-#[derive(Debug, Serialize, Deserialize)]
-pub struct Rv32MultWriteRecord {
-    pub from_state: ExecutionState<u32>,
-    /// Write to destination register
-    pub rd_id: RecordId,
-}
+use super::{tracing_write, RV32_REGISTER_NUM_LIMBS};
+use crate::adapters::tracing_read;
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -81,6 +39,8 @@ pub struct Rv32MultAdapterCols<T> {
     pub writes_aux: MemoryWriteAuxCols<T, RV32_REGISTER_NUM_LIMBS>,
 }
 
+/// Reads instructions of the form OP a, b, c, d where \[a:4\]_d = \[b:4\]_d op \[c:4\]_d.
+/// Operand d can only be 1, and there is no immediate support.
 #[derive(Clone, Copy, Debug, derive_new::new)]
 pub struct Rv32MultAdapterAir {
     pub(super) execution_bridge: ExecutionBridge,
@@ -167,92 +127,130 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for Rv32MultAdapterAir {
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for Rv32MultAdapterChip<F> {
-    type ReadRecord = Rv32MultReadRecord;
-    type WriteRecord = Rv32MultWriteRecord;
-    type Air = Rv32MultAdapterAir;
-    type Interface = BasicAdapterInterface<
-        F,
-        MinimalInstruction<F>,
-        2,
-        1,
-        RV32_REGISTER_NUM_LIMBS,
-        RV32_REGISTER_NUM_LIMBS,
-    >;
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32MultAdapterRecord {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { b, c, d, .. } = *instruction;
+    pub rd_ptr: u32,
+    pub rs1_ptr: u32,
+    pub rs2_ptr: u32,
 
-        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+    pub reads_aux: [MemoryReadAuxRecord; 2],
+    pub writes_aux: MemoryWriteBytesAuxRecord<RV32_REGISTER_NUM_LIMBS>,
+}
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32MultAdapterExecutor;
 
-        let rs1 = memory.read::<RV32_REGISTER_NUM_LIMBS>(d, b);
-        let rs2 = memory.read::<RV32_REGISTER_NUM_LIMBS>(d, c);
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32MultAdapterFiller;
 
-        Ok((
-            [rs1.1, rs2.1],
-            Self::ReadRecord {
-                rs1: rs1.0,
-                rs2: rs2.0,
-            },
-        ))
+impl<F> AdapterTraceExecutor<F> for Rv32MultAdapterExecutor
+where
+    F: PrimeField32,
+{
+    const WIDTH: usize = size_of::<Rv32MultAdapterCols<u8>>();
+    type ReadData = [[u8; RV32_REGISTER_NUM_LIMBS]; 2];
+    type WriteData = [[u8; RV32_REGISTER_NUM_LIMBS]; 1];
+    type RecordMut<'a> = &'a mut Rv32MultAdapterRecord;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction { a, d, .. } = *instruction;
-        let (rd_id, _) = memory.write(d, a, output.writes[0]);
-
-        let timestamp_delta = memory.timestamp() - from_state.timestamp;
-        debug_assert!(
-            timestamp_delta == 3,
-            "timestamp delta is {}, expected 3",
-            timestamp_delta
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        let &Instruction { b, c, d, .. } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+
+        record.rs1_ptr = b.as_canonical_u32();
+        let rs1 = tracing_read(
+            memory,
+            RV32_REGISTER_AS,
+            b.as_canonical_u32(),
+            &mut record.reads_aux[0].prev_timestamp,
+        );
+        record.rs2_ptr = c.as_canonical_u32();
+        let rs2 = tracing_read(
+            memory,
+            RV32_REGISTER_AS,
+            c.as_canonical_u32(),
+            &mut record.reads_aux[1].prev_timestamp,
         );
 
-        Ok((
-            ExecutionState {
-                pc: from_state.pc + DEFAULT_PC_STEP,
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state, rd_id },
-        ))
+        [rs1, rs2]
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let row_slice: &mut Rv32MultAdapterCols<_> = row_slice.borrow_mut();
-        row_slice.from_state = write_record.from_state.map(F::from_canonical_u32);
-        let rd = memory.record_by_id(write_record.rd_id);
-        row_slice.rd_ptr = rd.pointer;
-        let rs1 = memory.record_by_id(read_record.rs1);
-        let rs2 = memory.record_by_id(read_record.rs2);
-        row_slice.rs1_ptr = rs1.pointer;
-        row_slice.rs2_ptr = rs2.pointer;
-        aux_cols_factory.generate_read_aux(rs1, &mut row_slice.reads_aux[0]);
-        aux_cols_factory.generate_read_aux(rs2, &mut row_slice.reads_aux[1]);
-        aux_cols_factory.generate_write_aux(rd, &mut row_slice.writes_aux);
+        let &Instruction { a, d, .. } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+
+        record.rd_ptr = a.as_canonical_u32();
+        tracing_write(
+            memory,
+            RV32_REGISTER_AS,
+            a.as_canonical_u32(),
+            data[0],
+            &mut record.writes_aux.prev_timestamp,
+            &mut record.writes_aux.prev_data,
+        )
     }
+}
+
+impl<F: PrimeField32> AdapterTraceFiller<F> for Rv32MultAdapterFiller {
+    const WIDTH: usize = size_of::<Rv32MultAdapterCols<u8>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &Rv32MultAdapterRecord = unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut Rv32MultAdapterCols<F> = adapter_row.borrow_mut();
+
+        let timestamp = record.from_timestamp;
+
+        adapter_row
+            .writes_aux
+            .set_prev_data(record.writes_aux.prev_data.map(F::from_canonical_u8));
+        mem_helper.fill(
+            record.writes_aux.prev_timestamp,
+            timestamp + 2,
+            adapter_row.writes_aux.as_mut(),
+        );
+
+        mem_helper.fill(
+            record.reads_aux[1].prev_timestamp,
+            timestamp + 1,
+            adapter_row.reads_aux[1].as_mut(),
+        );
+
+        mem_helper.fill(
+            record.reads_aux[0].prev_timestamp,
+            timestamp,
+            adapter_row.reads_aux[0].as_mut(),
+        );
+
+        adapter_row.rs2_ptr = F::from_canonical_u32(record.rs2_ptr);
+        adapter_row.rs1_ptr = F::from_canonical_u32(record.rs1_ptr);
+        adapter_row.rd_ptr = F::from_canonical_u32(record.rd_ptr);
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
diff --git a/extensions/rv32im/circuit/src/adapters/rdwrite.rs b/extensions/rv32im/circuit/src/adapters/rdwrite.rs
index abd4d8eb17..02d669a2d9 100644
--- a/extensions/rv32im/circuit/src/adapters/rdwrite.rs
+++ b/extensions/rv32im/circuit/src/adapters/rdwrite.rs
@@ -1,23 +1,17 @@
-use std::{
-    borrow::{Borrow, BorrowMut},
-    marker::PhantomData,
-};
+use std::borrow::{Borrow, BorrowMut};
 
 use openvm_circuit::{
     arch::{
-        AdapterAirContext, AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge,
-        ExecutionBus, ExecutionState, ImmInstruction, Result, VmAdapterAir, VmAdapterChip,
-        VmAdapterInterface,
+        get_record_from_slice, AdapterAirContext, AdapterTraceExecutor, AdapterTraceFiller,
+        BasicAdapterInterface, ExecutionBridge, ExecutionState, ImmInstruction, VmAdapterAir,
     },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryWriteAuxCols},
-            MemoryAddress, MemoryController, OfflineMemory, RecordId,
-        },
-        program::ProgramBus,
+    system::memory::{
+        offline_checker::{MemoryBridge, MemoryWriteAuxCols, MemoryWriteBytesAuxRecord},
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
-use openvm_circuit_primitives::utils::not;
+use openvm_circuit_primitives::{utils::not, AlignedBytesBorrow};
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
     instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS,
@@ -27,59 +21,9 @@ use openvm_stark_backend::{
     p3_air::{AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
 };
-use serde::{Deserialize, Serialize};
 
 use super::RV32_REGISTER_NUM_LIMBS;
-
-/// This adapter doesn't read anything, and writes to \[a:4\]_d, where d == 1
-#[derive(Debug)]
-pub struct Rv32RdWriteAdapterChip<F: Field> {
-    pub air: Rv32RdWriteAdapterAir,
-    _marker: PhantomData<F>,
-}
-
-/// This adapter doesn't read anything, and **maybe** writes to \[a:4\]_d, where d == 1
-#[derive(Debug)]
-pub struct Rv32CondRdWriteAdapterChip<F: Field> {
-    /// Do not use the inner air directly, use `air` instead.
-    inner: Rv32RdWriteAdapterChip<F>,
-    pub air: Rv32CondRdWriteAdapterAir,
-}
-
-impl<F: PrimeField32> Rv32RdWriteAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        Self {
-            air: Rv32RdWriteAdapterAir {
-                execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-            },
-            _marker: PhantomData,
-        }
-    }
-}
-
-impl<F: PrimeField32> Rv32CondRdWriteAdapterChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        memory_bridge: MemoryBridge,
-    ) -> Self {
-        let inner = Rv32RdWriteAdapterChip::new(execution_bus, program_bus, memory_bridge);
-        let air = Rv32CondRdWriteAdapterAir { inner: inner.air };
-        Self { inner, air }
-    }
-}
-
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Rv32RdWriteWriteRecord {
-    pub from_state: ExecutionState<u32>,
-    pub rd_id: Option<RecordId>,
-}
+use crate::adapters::tracing_write;
 
 #[repr(C)]
 #[derive(Debug, Clone, AlignedBorrow)]
@@ -92,16 +36,18 @@ pub struct Rv32RdWriteAdapterCols<T> {
 #[repr(C)]
 #[derive(Debug, Clone, AlignedBorrow)]
 pub struct Rv32CondRdWriteAdapterCols<T> {
-    inner: Rv32RdWriteAdapterCols<T>,
+    pub inner: Rv32RdWriteAdapterCols<T>,
     pub needs_write: T,
 }
 
+/// This adapter doesn't read anything, and writes to \[a:4\]_d, where d == 1
 #[derive(Clone, Copy, Debug, derive_new::new)]
 pub struct Rv32RdWriteAdapterAir {
     pub(super) memory_bridge: MemoryBridge,
     pub(super) execution_bridge: ExecutionBridge,
 }
 
+/// This adapter doesn't read anything, and **maybe** writes to \[a:4\]_d, where d == 1
 #[derive(Clone, Copy, Debug, derive_new::new)]
 pub struct Rv32CondRdWriteAdapterAir {
     inner: Rv32RdWriteAdapterAir,
@@ -241,131 +187,187 @@ impl<AB: InteractionBuilder> VmAdapterAir<AB> for Rv32CondRdWriteAdapterAir {
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for Rv32RdWriteAdapterChip<F> {
-    type ReadRecord = ();
-    type WriteRecord = Rv32RdWriteWriteRecord;
-    type Air = Rv32RdWriteAdapterAir;
-    type Interface = BasicAdapterInterface<F, ImmInstruction<F>, 0, 1, 0, RV32_REGISTER_NUM_LIMBS>;
-
-    fn preprocess(
-        &mut self,
-        _memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let d = instruction.d;
-        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+/// This adapter doesn't read anything, and writes to \[a:4\]_d, where d == 1
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug, Clone)]
+pub struct Rv32RdWriteAdapterRecord {
+    pub from_pc: u32,
+    pub from_timestamp: u32,
+
+    // Will use u32::MAX to indicate no write
+    pub rd_ptr: u32,
+    pub rd_aux_record: MemoryWriteBytesAuxRecord<RV32_REGISTER_NUM_LIMBS>,
+}
 
-        Ok(([], ()))
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32RdWriteAdapterExecutor;
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32RdWriteAdapterFiller;
+
+impl<F> AdapterTraceExecutor<F> for Rv32RdWriteAdapterExecutor
+where
+    F: PrimeField32,
+{
+    const WIDTH: usize = size_of::<Rv32RdWriteAdapterCols<u8>>();
+    type ReadData = ();
+    type WriteData = [u8; RV32_REGISTER_NUM_LIMBS];
+    type RecordMut<'a> = &'a mut Rv32RdWriteAdapterRecord;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction { a, d, .. } = *instruction;
-        let (rd_id, _) = memory.write(d, a, output.writes[0]);
-
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord {
-                from_state,
-                rd_id: Some(rd_id),
-            },
-        ))
+    #[inline(always)]
+    fn read(
+        &self,
+        _memory: &mut TracingMemory,
+        _instruction: &Instruction<F>,
+        _record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        // Rv32RdWriteAdapter doesn't read anything
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        _read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let adapter_cols: &mut Rv32RdWriteAdapterCols<F> = row_slice.borrow_mut();
-        adapter_cols.from_state = write_record.from_state.map(F::from_canonical_u32);
-        let rd = memory.record_by_id(write_record.rd_id.unwrap());
-        adapter_cols.rd_ptr = rd.pointer;
-        aux_cols_factory.generate_write_aux(rd, &mut adapter_cols.rd_aux_cols);
+        let &Instruction { a, d, .. } = instruction;
+
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+
+        record.rd_ptr = a.as_canonical_u32();
+        tracing_write(
+            memory,
+            RV32_REGISTER_AS,
+            record.rd_ptr,
+            data,
+            &mut record.rd_aux_record.prev_timestamp,
+            &mut record.rd_aux_record.prev_data,
+        );
     }
+}
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+impl<F: PrimeField32> AdapterTraceFiller<F> for Rv32RdWriteAdapterFiller {
+    const WIDTH: usize = size_of::<Rv32RdWriteAdapterCols<u8>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &Rv32RdWriteAdapterRecord =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_row: &mut Rv32RdWriteAdapterCols<F> = adapter_row.borrow_mut();
+
+        adapter_row
+            .rd_aux_cols
+            .set_prev_data(record.rd_aux_record.prev_data.map(F::from_canonical_u8));
+        mem_helper.fill(
+            record.rd_aux_record.prev_timestamp,
+            record.from_timestamp,
+            adapter_row.rd_aux_cols.as_mut(),
+        );
+        adapter_row.rd_ptr = F::from_canonical_u32(record.rd_ptr);
+        adapter_row.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+        adapter_row.from_state.pc = F::from_canonical_u32(record.from_pc);
     }
 }
 
-impl<F: PrimeField32> VmAdapterChip<F> for Rv32CondRdWriteAdapterChip<F> {
-    type ReadRecord = ();
-    type WriteRecord = Rv32RdWriteWriteRecord;
-    type Air = Rv32CondRdWriteAdapterAir;
-    type Interface = BasicAdapterInterface<F, ImmInstruction<F>, 0, 1, 0, RV32_REGISTER_NUM_LIMBS>;
+/// This adapter doesn't read anything, and **maybe** writes to \[a:4\]_d, where d == 1
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32CondRdWriteAdapterExecutor {
+    inner: Rv32RdWriteAdapterExecutor,
+}
 
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        self.inner.preprocess(memory, instruction)
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32CondRdWriteAdapterFiller {
+    inner: Rv32RdWriteAdapterFiller,
+}
+
+impl<F> AdapterTraceExecutor<F> for Rv32CondRdWriteAdapterExecutor
+where
+    F: PrimeField32,
+{
+    const WIDTH: usize = size_of::<Rv32CondRdWriteAdapterCols<u8>>();
+    type ReadData = ();
+    type WriteData = [u8; RV32_REGISTER_NUM_LIMBS];
+    type RecordMut<'a> = &'a mut Rv32RdWriteAdapterRecord;
+
+    #[inline(always)]
+    fn start(pc: u32, memory: &TracingMemory, record: &mut Self::RecordMut<'_>) {
+        record.from_pc = pc;
+        record.from_timestamp = memory.timestamp;
     }
 
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
+    #[inline(always)]
+    fn read(
+        &self,
+        memory: &mut TracingMemory,
         instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        let Instruction { a, d, .. } = *instruction;
-        let rd_id = if instruction.f != F::ZERO {
-            let (rd_id, _) = memory.write(d, a, output.writes[0]);
-            Some(rd_id)
-        } else {
-            memory.increment_timestamp();
-            None
-        };
-
-        Ok((
-            ExecutionState {
-                pc: output.to_pc.unwrap_or(from_state.pc + DEFAULT_PC_STEP),
-                timestamp: memory.timestamp(),
-            },
-            Self::WriteRecord { from_state, rd_id },
-        ))
+        record: &mut Self::RecordMut<'_>,
+    ) -> Self::ReadData {
+        <Rv32RdWriteAdapterExecutor as AdapterTraceExecutor<F>>::read(
+            &self.inner,
+            memory,
+            instruction,
+            record,
+        )
     }
 
-    fn generate_trace_row(
+    #[inline(always)]
+    fn write(
         &self,
-        row_slice: &mut [F],
-        _read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
+        memory: &mut TracingMemory,
+        instruction: &Instruction<F>,
+        data: Self::WriteData,
+        record: &mut Self::RecordMut<'_>,
     ) {
-        let aux_cols_factory = memory.aux_cols_factory();
-        let adapter_cols: &mut Rv32CondRdWriteAdapterCols<F> = row_slice.borrow_mut();
-        adapter_cols.inner.from_state = write_record.from_state.map(F::from_canonical_u32);
-        if let Some(rd_id) = write_record.rd_id {
-            let rd = memory.record_by_id(rd_id);
-            adapter_cols.inner.rd_ptr = rd.pointer;
-            aux_cols_factory.generate_write_aux(rd, &mut adapter_cols.inner.rd_aux_cols);
-            adapter_cols.needs_write = F::ONE;
+        let Instruction { f: enabled, .. } = instruction;
+
+        if enabled.is_one() {
+            <Rv32RdWriteAdapterExecutor as AdapterTraceExecutor<F>>::write(
+                &self.inner,
+                memory,
+                instruction,
+                data,
+                record,
+            );
+        } else {
+            memory.increment_timestamp();
+            record.rd_ptr = u32::MAX;
         }
     }
+}
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+impl<F: PrimeField32> AdapterTraceFiller<F> for Rv32CondRdWriteAdapterFiller {
+    const WIDTH: usize = size_of::<Rv32CondRdWriteAdapterCols<u8>>();
+
+    #[inline(always)]
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, mut adapter_row: &mut [F]) {
+        let record: &Rv32RdWriteAdapterRecord =
+            unsafe { get_record_from_slice(&mut adapter_row, ()) };
+        let adapter_cols: &mut Rv32CondRdWriteAdapterCols<F> = adapter_row.borrow_mut();
+
+        adapter_cols.needs_write = F::from_bool(record.rd_ptr != u32::MAX);
+
+        if record.rd_ptr != u32::MAX {
+            unsafe {
+                self.inner.fill_trace_row(
+                    mem_helper,
+                    adapter_row
+                        .split_at_mut_unchecked(size_of::<Rv32RdWriteAdapterCols<u8>>())
+                        .0,
+                )
+            };
+        } else {
+            adapter_cols.inner.rd_ptr = F::ZERO;
+            mem_helper.fill_zero(adapter_cols.inner.rd_aux_cols.as_mut());
+            adapter_cols.inner.from_state.timestamp = F::from_canonical_u32(record.from_timestamp);
+            adapter_cols.inner.from_state.pc = F::from_canonical_u32(record.from_pc);
+        }
     }
 }
diff --git a/extensions/rv32im/circuit/src/auipc/core.rs b/extensions/rv32im/circuit/src/auipc/core.rs
index 8ec9e274f6..42bd148776 100644
--- a/extensions/rv32im/circuit/src/auipc/core.rs
+++ b/extensions/rv32im/circuit/src/auipc/core.rs
@@ -1,17 +1,22 @@
 use std::{
-    array,
+    array::{self, from_fn},
     borrow::{Borrow, BorrowMut},
 };
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, ImmInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, program::PC_BITS, LocalOpcode};
+use openvm_instructions::{
+    instruction::Instruction,
+    program::{DEFAULT_PC_STEP, PC_BITS},
+    LocalOpcode,
+};
 use openvm_rv32im_transpiler::Rv32AuipcOpcode::{self, *};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -19,11 +24,10 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
-
-use crate::adapters::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
 
-const RV32_LIMB_MAX: u32 = (1 << RV32_CELL_BITS) - 1;
+use crate::adapters::{
+    Rv32RdWriteAdapterExecutor, Rv32RdWriteAdapterFiller, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+};
 
 #[repr(C)]
 #[derive(Debug, Clone, AlignedBorrow)]
@@ -36,7 +40,7 @@ pub struct Rv32AuipcCoreCols<T> {
     pub rd_data: [T; RV32_REGISTER_NUM_LIMBS],
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Copy, derive_new::new)]
 pub struct Rv32AuipcCoreAir {
     pub bus: BitwiseOperationLookupBus,
 }
@@ -186,116 +190,105 @@ where
 }
 
 #[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-pub struct Rv32AuipcCoreRecord<F> {
-    pub imm_limbs: [F; RV32_REGISTER_NUM_LIMBS - 1],
-    pub pc_limbs: [F; RV32_REGISTER_NUM_LIMBS - 2],
-    pub rd_data: [F; RV32_REGISTER_NUM_LIMBS],
+#[derive(AlignedBytesBorrow, Debug, Clone)]
+pub struct Rv32AuipcCoreRecord {
+    pub from_pc: u32,
+    pub imm: u32,
 }
 
-pub struct Rv32AuipcCoreChip {
-    pub air: Rv32AuipcCoreAir,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32AuipcExecutor<A = Rv32RdWriteAdapterExecutor> {
+    adapter: A,
 }
 
-impl Rv32AuipcCoreChip {
-    pub fn new(bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>) -> Self {
-        Self {
-            air: Rv32AuipcCoreAir {
-                bus: bitwise_lookup_chip.bus(),
-            },
-            bitwise_lookup_chip,
-        }
-    }
+#[derive(Clone, derive_new::new)]
+pub struct Rv32AuipcFiller<A = Rv32RdWriteAdapterFiller> {
+    adapter: A,
+    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>> VmCoreChip<F, I> for Rv32AuipcCoreChip
+impl<F, A, RA> PreflightExecutor<F, RA> for Rv32AuipcExecutor<A>
 where
-    I::Writes: From<[[F; RV32_REGISTER_NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static + AdapterTraceExecutor<F, ReadData = (), WriteData = [u8; RV32_REGISTER_NUM_LIMBS]>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut Rv32AuipcCoreRecord),
+    >,
 {
-    type Record = Rv32AuipcCoreRecord<F>;
-    type Air = Rv32AuipcCoreAir;
+    fn get_opcode_name(&self, _: usize) -> String {
+        format!("{:?}", AUIPC)
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_pc: u32,
-        _reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let local_opcode = Rv32AuipcOpcode::from_usize(
-            instruction
-                .opcode
-                .local_opcode_idx(Rv32AuipcOpcode::CLASS_OFFSET),
-        );
-        let imm = instruction.c.as_canonical_u32();
-        let rd_data = run_auipc(local_opcode, from_pc, imm);
-        let rd_data_field = rd_data.map(F::from_canonical_u32);
-
-        let output = AdapterRuntimeContext::without_pc([rd_data_field]);
-
-        let imm_limbs = array::from_fn(|i| (imm >> (i * RV32_CELL_BITS)) & RV32_LIMB_MAX);
-        let pc_limbs: [u32; RV32_REGISTER_NUM_LIMBS] =
-            array::from_fn(|i| (from_pc >> (i * RV32_CELL_BITS)) & RV32_LIMB_MAX);
+    ) -> Result<(), ExecutionError> {
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        for i in 0..(RV32_REGISTER_NUM_LIMBS / 2) {
-            self.bitwise_lookup_chip
-                .request_range(rd_data[i * 2], rd_data[i * 2 + 1]);
-        }
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        let mut need_range_check: Vec<u32> = Vec::new();
-        for limb in imm_limbs {
-            need_range_check.push(limb);
-        }
+        core_record.from_pc = *state.pc;
+        core_record.imm = instruction.c.as_canonical_u32();
 
-        for (i, limb) in pc_limbs.iter().enumerate().skip(1) {
-            if i == pc_limbs.len() - 1 {
-                need_range_check.push((*limb) << (pc_limbs.len() * RV32_CELL_BITS - PC_BITS));
-            } else {
-                need_range_check.push(*limb);
-            }
-        }
+        let rd = run_auipc(*state.pc, core_record.imm);
 
-        for pair in need_range_check.chunks(2) {
-            self.bitwise_lookup_chip.request_range(pair[0], pair[1]);
-        }
+        self.adapter
+            .write(state.memory, instruction, rd, &mut adapter_record);
 
-        Ok((
-            output,
-            Self::Record {
-                imm_limbs: imm_limbs.map(F::from_canonical_u32),
-                pc_limbs: array::from_fn(|i| F::from_canonical_u32(pc_limbs[i + 1])),
-                rd_data: rd_data.map(F::from_canonical_u32),
-            },
-        ))
-    }
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            Rv32AuipcOpcode::from_usize(opcode - Rv32AuipcOpcode::CLASS_OFFSET)
-        )
+        Ok(())
     }
+}
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let core_cols: &mut Rv32AuipcCoreCols<F> = row_slice.borrow_mut();
-        core_cols.imm_limbs = record.imm_limbs;
-        core_cols.pc_limbs = record.pc_limbs;
-        core_cols.rd_data = record.rd_data;
-        core_cols.is_valid = F::ONE;
-    }
+impl<F, A> TraceFiller<F> for Rv32AuipcFiller<A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: &Rv32AuipcCoreRecord = unsafe { get_record_from_slice(&mut core_row, ()) };
+
+        let core_row: &mut Rv32AuipcCoreCols<F> = core_row.borrow_mut();
+
+        let imm_limbs = record.imm.to_le_bytes();
+        let pc_limbs = record.from_pc.to_le_bytes();
+        let rd_data = run_auipc(record.from_pc, record.imm);
+        debug_assert_eq!(imm_limbs[3], 0);
+
+        // range checks:
+        // hardcoding for performance: first 3 limbs of imm_limbs, last 3 limbs of pc_limbs where
+        // most significant limb of pc_limbs is shifted up
+        self.bitwise_lookup_chip
+            .request_range(imm_limbs[0] as u32, imm_limbs[1] as u32);
+        self.bitwise_lookup_chip
+            .request_range(imm_limbs[2] as u32, pc_limbs[1] as u32);
+        let msl_shift = RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - PC_BITS;
+        self.bitwise_lookup_chip
+            .request_range(pc_limbs[2] as u32, (pc_limbs[3] as u32) << msl_shift);
+        for pair in rd_data.chunks_exact(2) {
+            self.bitwise_lookup_chip
+                .request_range(pair[0] as u32, pair[1] as u32);
+        }
+        // Writing in reverse order
+        core_row.rd_data = rd_data.map(F::from_canonical_u8);
+        // only the middle 2 limbs:
+        core_row.pc_limbs = from_fn(|i| F::from_canonical_u8(pc_limbs[i + 1]));
+        core_row.imm_limbs = from_fn(|i| F::from_canonical_u8(imm_limbs[i]));
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.is_valid = F::ONE;
     }
 }
 
 // returns rd_data
-pub(super) fn run_auipc(
-    _opcode: Rv32AuipcOpcode,
-    pc: u32,
-    imm: u32,
-) -> [u32; RV32_REGISTER_NUM_LIMBS] {
+#[inline(always)]
+pub(super) fn run_auipc(pc: u32, imm: u32) -> [u8; RV32_REGISTER_NUM_LIMBS] {
     let rd = pc.wrapping_add(imm << RV32_CELL_BITS);
-    array::from_fn(|i| (rd >> (RV32_CELL_BITS * i)) & RV32_LIMB_MAX)
+    rd.to_le_bytes()
 }
diff --git a/extensions/rv32im/circuit/src/auipc/execution.rs b/extensions/rv32im/circuit/src/auipc/execution.rs
new file mode 100644
index 0000000000..c9269613a1
--- /dev/null
+++ b/extensions/rv32im/circuit/src/auipc/execution.rs
@@ -0,0 +1,125 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS,
+};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::{run_auipc, Rv32AuipcExecutor};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct AuiPcPreCompute {
+    imm: u32,
+    a: u8,
+}
+
+impl<A> Rv32AuipcExecutor<A> {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut AuiPcPreCompute,
+    ) -> Result<(), StaticProgramError> {
+        let Instruction { a, c: imm, d, .. } = inst;
+        if d.as_canonical_u32() != RV32_REGISTER_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        let imm = imm.as_canonical_u32();
+        let data: &mut AuiPcPreCompute = data.borrow_mut();
+        *data = AuiPcPreCompute {
+            imm,
+            a: a.as_canonical_u32() as u8,
+        };
+        Ok(())
+    }
+}
+
+impl<F, A> Executor<F> for Rv32AuipcExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<AuiPcPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let data: &mut AuiPcPreCompute = data.borrow_mut();
+        self.pre_compute_impl(pc, inst, data)?;
+        Ok(execute_e1_impl)
+    }
+}
+
+impl<F, A> MeteredExecutor<F> for Rv32AuipcExecutor<A>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<AuiPcPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<AuiPcPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        self.pre_compute_impl(pc, inst, &mut data.data)?;
+        Ok(execute_e2_impl)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &AuiPcPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rd = run_auipc(vm_state.pc, pre_compute.imm);
+    vm_state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &rd);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &AuiPcPreCompute = pre_compute.borrow();
+    execute_e12_impl(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<AuiPcPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl(&pre_compute.data, vm_state);
+}
diff --git a/extensions/rv32im/circuit/src/auipc/mod.rs b/extensions/rv32im/circuit/src/auipc/mod.rs
index 6e2234bfbd..2276c108d1 100644
--- a/extensions/rv32im/circuit/src/auipc/mod.rs
+++ b/extensions/rv32im/circuit/src/auipc/mod.rs
@@ -1,11 +1,13 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use crate::adapters::Rv32RdWriteAdapterChip;
+use crate::adapters::Rv32RdWriteAdapterAir;
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
-pub type Rv32AuipcChip<F> = VmChipWrapper<F, Rv32RdWriteAdapterChip<F>, Rv32AuipcCoreChip>;
+pub type Rv32AuipcAir = VmAirWrapper<Rv32RdWriteAdapterAir, Rv32AuipcCoreAir>;
+pub type Rv32AuipcChip<F> = VmChipWrapper<F, Rv32AuipcFiller>;
diff --git a/extensions/rv32im/circuit/src/auipc/tests.rs b/extensions/rv32im/circuit/src/auipc/tests.rs
index 2c8a399198..be80b756ba 100644
--- a/extensions/rv32im/circuit/src/auipc/tests.rs
+++ b/extensions/rv32im/circuit/src/auipc/tests.rs
@@ -1,52 +1,92 @@
-use std::borrow::BorrowMut;
+use std::{borrow::BorrowMut, sync::Arc};
 
-use openvm_circuit::arch::{testing::VmChipTestBuilder, VmAdapterChip};
+use openvm_circuit::arch::{
+    testing::{TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
+    Arena, DenseRecordArena, EmptyAdapterCoreLayout, PreflightExecutor, VmAirWrapper,
+    VmChipWrapper,
+};
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
 use openvm_instructions::{instruction::Instruction, program::PC_BITS, LocalOpcode};
 use openvm_rv32im_transpiler::Rv32AuipcOpcode::{self, *};
 use openvm_stark_backend::{
-    interaction::BusIndex,
     p3_air::BaseAir,
     p3_field::{FieldAlgebra, PrimeField32},
-    p3_matrix::{dense::RowMajorMatrix, Matrix},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    Chip, ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
 
-use super::{run_auipc, Rv32AuipcChip, Rv32AuipcCoreChip, Rv32AuipcCoreCols};
-use crate::adapters::{Rv32RdWriteAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use super::{run_auipc, Rv32AuipcChip, Rv32AuipcCoreAir, Rv32AuipcCoreCols, Rv32AuipcExecutor};
+use crate::{
+    adapters::{
+        Rv32RdWriteAdapterAir, Rv32RdWriteAdapterExecutor, Rv32RdWriteAdapterFiller,
+        Rv32RdWriteAdapterRecord, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+    },
+    test_utils::get_verification_error,
+    Rv32AuipcAir, Rv32AuipcCoreRecord, Rv32AuipcFiller,
+};
 
 const IMM_BITS: usize = 24;
-const BITWISE_OP_LOOKUP_BUS: BusIndex = 9;
-
+const MAX_INS_CAPACITY: usize = 128;
 type F = BabyBear;
+type Harness<RA> = TestChipHarness<F, Rv32AuipcExecutor, Rv32AuipcAir, Rv32AuipcChip<F>, RA>;
+
+fn create_test_chip<RA: Arena>(
+    tester: &VmChipTestBuilder<F>,
+) -> (
+    Harness<RA>,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let air = VmAirWrapper::new(
+        Rv32RdWriteAdapterAir::new(tester.memory_bridge(), tester.execution_bridge()),
+        Rv32AuipcCoreAir::new(bitwise_bus),
+    );
+    let executor = Rv32AuipcExecutor::new(Rv32RdWriteAdapterExecutor::new());
+    let chip = VmChipWrapper::<F, _>::new(
+        Rv32AuipcFiller::new(Rv32RdWriteAdapterFiller::new(), bitwise_chip.clone()),
+        tester.memory_helper(),
+    );
+    let harness = Harness::<RA>::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
 
-fn set_and_execute(
+fn set_and_execute<RA: Arena>(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut Rv32AuipcChip<F>,
+    harness: &mut Harness<RA>,
     rng: &mut StdRng,
     opcode: Rv32AuipcOpcode,
     imm: Option<u32>,
     initial_pc: Option<u32>,
-) {
+) where
+    Rv32AuipcExecutor: PreflightExecutor<F, RA>,
+{
     let imm = imm.unwrap_or(rng.gen_range(0..(1 << IMM_BITS))) as usize;
     let a = rng.gen_range(0..32) << 2;
 
     tester.execute_with_pc(
-        chip,
+        harness,
         &Instruction::from_usize(opcode.global_opcode(), [a, 0, imm, 1, 0]),
         initial_pc.unwrap_or(rng.gen_range(0..(1 << PC_BITS))),
     );
     let initial_pc = tester.execution.last_from_pc().as_canonical_u32();
-
-    let rd_data = run_auipc(opcode, initial_pc, imm as u32);
-
-    assert_eq!(rd_data.map(F::from_canonical_u32), tester.read::<4>(1, a));
+    let rd_data = run_auipc(initial_pc, imm as u32);
+    assert_eq!(rd_data.map(F::from_canonical_u8), tester.read::<4>(1, a));
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////
@@ -59,24 +99,18 @@ fn set_and_execute(
 #[test]
 fn rand_auipc_test() {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
     let mut tester = VmChipTestBuilder::default();
-    let adapter = Rv32RdWriteAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let core = Rv32AuipcCoreChip::new(bitwise_chip.clone());
-    let mut chip = Rv32AuipcChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
+    let (mut harness, bitwise) = create_test_chip(&tester);
 
     let num_tests: usize = 100;
     for _ in 0..num_tests {
-        set_and_execute(&mut tester, &mut chip, &mut rng, AUIPC, None, None);
+        set_and_execute(&mut tester, &mut harness, &mut rng, AUIPC, None, None);
     }
-
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
@@ -84,75 +118,62 @@ fn rand_auipc_test() {
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adaptor is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
+#[derive(Clone, Copy, Default, PartialEq)]
+struct AuipcPrankValues {
+    pub rd_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    pub imm_limbs: Option<[u32; RV32_REGISTER_NUM_LIMBS - 1]>,
+    pub pc_limbs: Option<[u32; RV32_REGISTER_NUM_LIMBS - 2]>,
+}
+
 fn run_negative_auipc_test(
     opcode: Rv32AuipcOpcode,
     initial_imm: Option<u32>,
     initial_pc: Option<u32>,
-    rd_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
-    imm_limbs: Option<[u32; RV32_REGISTER_NUM_LIMBS - 1]>,
-    pc_limbs: Option<[u32; RV32_REGISTER_NUM_LIMBS - 2]>,
-    expected_error: VerificationError,
+    prank_vals: AuipcPrankValues,
+    interaction_error: bool,
 ) {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
     let mut tester = VmChipTestBuilder::default();
-    let adapter = Rv32RdWriteAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let adapter_width = BaseAir::<F>::width(adapter.air());
-    let core = Rv32AuipcCoreChip::new(bitwise_chip.clone());
-    let mut chip = Rv32AuipcChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
+    let (mut harness, bitwise) = create_test_chip(&tester);
 
     set_and_execute(
         &mut tester,
-        &mut chip,
+        &mut harness,
         &mut rng,
         opcode,
         initial_imm,
         initial_pc,
     );
 
-    let tester = tester.build();
-
-    let auipc_trace_width = chip.trace_width();
-    let air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    let auipc_trace = chip_input.raw.common_main.as_mut().unwrap();
-    {
-        let mut trace_row = auipc_trace.row_slice(0).to_vec();
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+    let modify_trace = |trace: &mut DenseMatrix<F>| {
+        let mut trace_row = trace.row_slice(0).to_vec();
         let (_, core_row) = trace_row.split_at_mut(adapter_width);
-
         let core_cols: &mut Rv32AuipcCoreCols<F> = core_row.borrow_mut();
 
-        if let Some(data) = rd_data {
+        if let Some(data) = prank_vals.rd_data {
             core_cols.rd_data = data.map(F::from_canonical_u32);
         }
-
-        if let Some(data) = imm_limbs {
+        if let Some(data) = prank_vals.imm_limbs {
             core_cols.imm_limbs = data.map(F::from_canonical_u32);
         }
-
-        if let Some(data) = pc_limbs {
+        if let Some(data) = prank_vals.pc_limbs {
             core_cols.pc_limbs = data.map(F::from_canonical_u32);
         }
 
-        *auipc_trace = RowMajorMatrix::new(trace_row, auipc_trace_width);
-    }
+        *trace = RowMajorMatrix::new(trace_row, trace.width());
+    };
+
     disable_debug_builder();
     let tester = tester
-        .load_air_proof_input((air, chip_input))
-        .load(bitwise_chip)
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
         .finalize();
-    tester.simple_test_with_expected_error(expected_error);
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
@@ -161,47 +182,53 @@ fn invalid_limb_negative_tests() {
         AUIPC,
         Some(9722891),
         None,
-        None,
-        Some([107, 46, 81]),
-        None,
-        VerificationError::OodEvaluationMismatch,
+        AuipcPrankValues {
+            imm_limbs: Some([107, 46, 81]),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_auipc_test(
         AUIPC,
         Some(0),
         Some(2110400),
-        Some([194, 51, 32, 240]),
-        None,
-        Some([51, 32]),
-        VerificationError::ChallengePhaseError,
+        AuipcPrankValues {
+            rd_data: Some([194, 51, 32, 240]),
+            pc_limbs: Some([51, 32]),
+            ..Default::default()
+        },
+        true,
     );
     run_negative_auipc_test(
         AUIPC,
         None,
         None,
-        None,
-        None,
-        Some([206, 166]),
-        VerificationError::OodEvaluationMismatch,
+        AuipcPrankValues {
+            pc_limbs: Some([206, 166]),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_auipc_test(
         AUIPC,
         None,
         None,
-        Some([30, 92, 82, 132]),
-        None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        AuipcPrankValues {
+            rd_data: Some([30, 92, 82, 132]),
+            ..Default::default()
+        },
+        false,
     );
-
     run_negative_auipc_test(
         AUIPC,
         None,
         Some(876487877),
-        Some([197, 202, 49, 70]),
-        Some([166, 243, 17]),
-        Some([36, 62]),
-        VerificationError::ChallengePhaseError,
+        AuipcPrankValues {
+            rd_data: Some([197, 202, 49, 70]),
+            imm_limbs: Some([166, 243, 17]),
+            pc_limbs: Some([36, 62]),
+        },
+        true,
     );
 }
 
@@ -211,37 +238,42 @@ fn overflow_negative_tests() {
         AUIPC,
         Some(256264),
         None,
-        None,
-        Some([3592, 219, 3]),
-        None,
-        VerificationError::OodEvaluationMismatch,
+        AuipcPrankValues {
+            imm_limbs: Some([3592, 219, 3]),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_auipc_test(
         AUIPC,
         None,
         None,
-        None,
-        None,
-        Some([0, 0]),
-        VerificationError::OodEvaluationMismatch,
+        AuipcPrankValues {
+            pc_limbs: Some([0, 0]),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_auipc_test(
         AUIPC,
         Some(255),
         None,
-        None,
-        Some([F::NEG_ONE.as_canonical_u32(), 1, 0]),
-        None,
-        VerificationError::ChallengePhaseError,
+        AuipcPrankValues {
+            imm_limbs: Some([F::NEG_ONE.as_canonical_u32(), 1, 0]),
+            ..Default::default()
+        },
+        true,
     );
     run_negative_auipc_test(
         AUIPC,
         Some(0),
         Some(255),
-        Some([F::NEG_ONE.as_canonical_u32(), 1, 0, 0]),
-        Some([0, 0, 0]),
-        Some([1, 0]),
-        VerificationError::ChallengePhaseError,
+        AuipcPrankValues {
+            rd_data: Some([F::NEG_ONE.as_canonical_u32(), 1, 0, 0]),
+            imm_limbs: Some([0, 0, 0]),
+            pc_limbs: Some([1, 0]),
+        },
+        true,
     );
 }
 
@@ -251,33 +283,54 @@ fn overflow_negative_tests() {
 /// Ensure that solve functions produce the correct results.
 ///////////////////////////////////////////////////////////////////////////////////////
 
-#[test]
-fn execute_roundtrip_sanity_test() {
-    let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
-    let mut tester = VmChipTestBuilder::default();
-    let adapter = Rv32RdWriteAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let inner = Rv32AuipcCoreChip::new(bitwise_chip);
-    let mut chip = Rv32AuipcChip::<F>::new(adapter, inner, tester.offline_memory_mutex_arc());
-
-    let num_tests: usize = 100;
-    for _ in 0..num_tests {
-        set_and_execute(&mut tester, &mut chip, &mut rng, AUIPC, None, None);
-    }
-}
-
 #[test]
 fn run_auipc_sanity_test() {
-    let opcode = AUIPC;
     let initial_pc = 234567890;
     let imm = 11302451;
-    let rd_data = run_auipc(opcode, initial_pc, imm);
+    let rd_data = run_auipc(initial_pc, imm);
 
     assert_eq!(rd_data, [210, 107, 113, 186]);
 }
+
+// ////////////////////////////////////////////////////////////////////////////////////
+// DENSE TESTS
+
+// Ensure that the chip works as expected with dense records.
+// We first execute some instructions with a [DenseRecordArena] and transfer the records
+// to a [MatrixRecordArena]. After transferring we generate the trace and make sure that
+// all the constraints pass.
+// ////////////////////////////////////////////////////////////////////////////////////
+
+#[test]
+fn dense_record_arena_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut sparse_harness, bitwise) = create_test_chip(&tester);
+
+    {
+        let mut dense_harness = create_test_chip::<DenseRecordArena>(&tester).0;
+
+        let num_ops: usize = 100;
+        for _ in 0..num_ops {
+            set_and_execute(&mut tester, &mut dense_harness, &mut rng, AUIPC, None, None);
+        }
+
+        type Record<'a> = (
+            &'a mut Rv32RdWriteAdapterRecord,
+            &'a mut Rv32AuipcCoreRecord,
+        );
+
+        let mut record_interpreter = dense_harness.arena.get_record_seeker::<Record, _>();
+        record_interpreter.transfer_to_matrix_arena(
+            &mut sparse_harness.arena,
+            EmptyAdapterCoreLayout::<F, Rv32RdWriteAdapterExecutor>::new(),
+        );
+    }
+
+    let tester = tester
+        .build()
+        .load(sparse_harness)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
+}
diff --git a/extensions/rv32im/circuit/src/base_alu/core.rs b/extensions/rv32im/circuit/src/base_alu/core.rs
index a87418cc91..145d364794 100644
--- a/extensions/rv32im/circuit/src/base_alu/core.rs
+++ b/extensions/rv32im/circuit/src/base_alu/core.rs
@@ -1,18 +1,20 @@
 use std::{
     array,
     borrow::{Borrow, BorrowMut},
+    iter::zip,
 };
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     utils::not,
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_rv32im_transpiler::BaseAluOpcode;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -20,12 +22,10 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use serde_big_array::BigArray;
 use strum::IntoEnumIterator;
 
 #[repr(C)]
-#[derive(AlignedBorrow)]
+#[derive(AlignedBorrow, Debug)]
 pub struct BaseAluCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub a: [T; NUM_LIMBS],
     pub b: [T; NUM_LIMBS],
@@ -38,10 +38,10 @@ pub struct BaseAluCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub opcode_and_flag: T,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct BaseAluCoreAir<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bus: BitwiseOperationLookupBus,
-    offset: usize,
+    pub offset: usize,
 }
 
 impl<F: Field, const NUM_LIMBS: usize, const LIMB_BITS: usize> BaseAir<F>
@@ -165,175 +165,190 @@ where
     }
 }
 
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "T: Serialize + DeserializeOwned")]
-pub struct BaseAluCoreRecord<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub opcode: BaseAluOpcode,
-    #[serde(with = "BigArray")]
-    pub a: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub b: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub c: [T; NUM_LIMBS],
+#[repr(C, align(4))]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct BaseAluCoreRecord<const NUM_LIMBS: usize> {
+    pub b: [u8; NUM_LIMBS],
+    pub c: [u8; NUM_LIMBS],
+    // Use u8 instead of usize for better packing
+    pub local_opcode: u8,
 }
 
-pub struct BaseAluCoreChip<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub air: BaseAluCoreAir<NUM_LIMBS, LIMB_BITS>,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
+#[derive(Clone, Copy, derive_new::new)]
+pub struct BaseAluExecutor<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
 }
 
-impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> BaseAluCoreChip<NUM_LIMBS, LIMB_BITS> {
-    pub fn new(
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
-        offset: usize,
-    ) -> Self {
-        Self {
-            air: BaseAluCoreAir {
-                bus: bitwise_lookup_chip.bus(),
-                offset,
-            },
-            bitwise_lookup_chip,
-        }
-    }
+#[derive(derive_new::new)]
+pub struct BaseAluFiller<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
+    pub offset: usize,
 }
 
-impl<F, I, const NUM_LIMBS: usize, const LIMB_BITS: usize> VmCoreChip<F, I>
-    for BaseAluCoreChip<NUM_LIMBS, LIMB_BITS>
+impl<F, A, RA, const NUM_LIMBS: usize, const LIMB_BITS: usize> PreflightExecutor<F, RA>
+    for BaseAluExecutor<A, NUM_LIMBS, LIMB_BITS>
 where
     F: PrimeField32,
-    I: VmAdapterInterface<F>,
-    I::Reads: Into<[[F; NUM_LIMBS]; 2]>,
-    I::Writes: From<[[F; NUM_LIMBS]; 1]>,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData: Into<[[u8; NUM_LIMBS]; 2]>,
+            WriteData: From<[[u8; NUM_LIMBS]; 1]>,
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut BaseAluCoreRecord<NUM_LIMBS>),
+    >,
 {
-    type Record = BaseAluCoreRecord<F, NUM_LIMBS, LIMB_BITS>;
-    type Air = BaseAluCoreAir<NUM_LIMBS, LIMB_BITS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!("{:?}", BaseAluOpcode::from_usize(opcode - self.offset))
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
+    ) -> Result<(), ExecutionError> {
         let Instruction { opcode, .. } = instruction;
-        let local_opcode = BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.air.offset));
 
-        let data: [[F; NUM_LIMBS]; 2] = reads.into();
-        let b = data[0].map(|x| x.as_canonical_u32());
-        let c = data[1].map(|y| y.as_canonical_u32());
-        let a = run_alu::<NUM_LIMBS, LIMB_BITS>(local_opcode, &b, &c);
+        let local_opcode = BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        let output = AdapterRuntimeContext {
-            to_pc: None,
-            writes: [a.map(F::from_canonical_u32)].into(),
-        };
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        if local_opcode == BaseAluOpcode::ADD || local_opcode == BaseAluOpcode::SUB {
-            for a_val in a {
-                self.bitwise_lookup_chip.request_xor(a_val, a_val);
-            }
-        } else {
-            for (b_val, c_val) in b.iter().zip(c.iter()) {
-                self.bitwise_lookup_chip.request_xor(*b_val, *c_val);
-            }
-        }
+        [core_record.b, core_record.c] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
 
-        let record = Self::Record {
-            opcode: local_opcode,
-            a: a.map(F::from_canonical_u32),
-            b: data[0],
-            c: data[1],
-        };
+        let rd = run_alu::<NUM_LIMBS, LIMB_BITS>(local_opcode, &core_record.b, &core_record.c);
 
-        Ok((output, record))
-    }
+        core_record.local_opcode = local_opcode as u8;
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!("{:?}", BaseAluOpcode::from_usize(opcode - self.air.offset))
-    }
+        self.adapter
+            .write(state.memory, instruction, [rd].into(), &mut adapter_record);
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let row_slice: &mut BaseAluCoreCols<_, NUM_LIMBS, LIMB_BITS> = row_slice.borrow_mut();
-        row_slice.a = record.a;
-        row_slice.b = record.b;
-        row_slice.c = record.c;
-        row_slice.opcode_add_flag = F::from_bool(record.opcode == BaseAluOpcode::ADD);
-        row_slice.opcode_sub_flag = F::from_bool(record.opcode == BaseAluOpcode::SUB);
-        row_slice.opcode_xor_flag = F::from_bool(record.opcode == BaseAluOpcode::XOR);
-        row_slice.opcode_or_flag = F::from_bool(record.opcode == BaseAluOpcode::OR);
-        row_slice.opcode_and_flag = F::from_bool(record.opcode == BaseAluOpcode::AND);
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
     }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for BaseAluFiller<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: &BaseAluCoreRecord<NUM_LIMBS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut BaseAluCoreCols<F, NUM_LIMBS, LIMB_BITS> = core_row.borrow_mut();
+        // SAFETY: the following is highly unsafe. We are going to cast `core_row` to a record
+        // buffer, and then do an _overlapping_ write to the `core_row` as a row of field elements.
+        // This requires:
+        // - Cols and Record structs should be repr(C) and we write in reverse order (to ensure
+        //   non-overlapping)
+        // - Do not overwrite any reference in `record` before it has already been used or moved
+        // - alignment of `F` must be >= alignment of Record (AlignedBytesBorrow will panic
+        //   otherwise)
+
+        let local_opcode = BaseAluOpcode::from_usize(record.local_opcode as usize);
+        let a = run_alu::<NUM_LIMBS, LIMB_BITS>(local_opcode, &record.b, &record.c);
+        // PERF: needless conversion
+        core_row.opcode_and_flag = F::from_bool(local_opcode == BaseAluOpcode::AND);
+        core_row.opcode_or_flag = F::from_bool(local_opcode == BaseAluOpcode::OR);
+        core_row.opcode_xor_flag = F::from_bool(local_opcode == BaseAluOpcode::XOR);
+        core_row.opcode_sub_flag = F::from_bool(local_opcode == BaseAluOpcode::SUB);
+        core_row.opcode_add_flag = F::from_bool(local_opcode == BaseAluOpcode::ADD);
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        if local_opcode == BaseAluOpcode::ADD || local_opcode == BaseAluOpcode::SUB {
+            for a_val in a {
+                self.bitwise_lookup_chip
+                    .request_xor(a_val as u32, a_val as u32);
+            }
+        } else {
+            for (b_val, c_val) in zip(record.b, record.c) {
+                self.bitwise_lookup_chip
+                    .request_xor(b_val as u32, c_val as u32);
+            }
+        }
+        core_row.c = record.c.map(F::from_canonical_u8);
+        core_row.b = record.b.map(F::from_canonical_u8);
+        core_row.a = a.map(F::from_canonical_u8);
     }
 }
 
+#[inline(always)]
 pub(super) fn run_alu<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     opcode: BaseAluOpcode,
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> [u32; NUM_LIMBS] {
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
+) -> [u8; NUM_LIMBS] {
+    debug_assert!(LIMB_BITS <= 8, "specialize for bytes");
     match opcode {
         BaseAluOpcode::ADD => run_add::<NUM_LIMBS, LIMB_BITS>(x, y),
         BaseAluOpcode::SUB => run_subtract::<NUM_LIMBS, LIMB_BITS>(x, y),
-        BaseAluOpcode::XOR => run_xor::<NUM_LIMBS, LIMB_BITS>(x, y),
-        BaseAluOpcode::OR => run_or::<NUM_LIMBS, LIMB_BITS>(x, y),
-        BaseAluOpcode::AND => run_and::<NUM_LIMBS, LIMB_BITS>(x, y),
+        BaseAluOpcode::XOR => run_xor::<NUM_LIMBS>(x, y),
+        BaseAluOpcode::OR => run_or::<NUM_LIMBS>(x, y),
+        BaseAluOpcode::AND => run_and::<NUM_LIMBS>(x, y),
     }
 }
 
+#[inline(always)]
 fn run_add<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> [u32; NUM_LIMBS] {
-    let mut z = [0u32; NUM_LIMBS];
-    let mut carry = [0u32; NUM_LIMBS];
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
+) -> [u8; NUM_LIMBS] {
+    let mut z = [0u8; NUM_LIMBS];
+    let mut carry = [0u8; NUM_LIMBS];
     for i in 0..NUM_LIMBS {
-        z[i] = x[i] + y[i] + if i > 0 { carry[i - 1] } else { 0 };
-        carry[i] = z[i] >> LIMB_BITS;
-        z[i] &= (1 << LIMB_BITS) - 1;
+        let mut overflow =
+            (x[i] as u16) + (y[i] as u16) + if i > 0 { carry[i - 1] as u16 } else { 0 };
+        carry[i] = (overflow >> LIMB_BITS) as u8;
+        overflow &= (1u16 << LIMB_BITS) - 1;
+        z[i] = overflow as u8;
     }
     z
 }
 
+#[inline(always)]
 fn run_subtract<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> [u32; NUM_LIMBS] {
-    let mut z = [0u32; NUM_LIMBS];
-    let mut carry = [0u32; NUM_LIMBS];
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
+) -> [u8; NUM_LIMBS] {
+    let mut z = [0u8; NUM_LIMBS];
+    let mut carry = [0u8; NUM_LIMBS];
     for i in 0..NUM_LIMBS {
-        let rhs = y[i] + if i > 0 { carry[i - 1] } else { 0 };
-        if x[i] >= rhs {
-            z[i] = x[i] - rhs;
+        let rhs = y[i] as u16 + if i > 0 { carry[i - 1] as u16 } else { 0 };
+        if x[i] as u16 >= rhs {
+            z[i] = x[i] - rhs as u8;
             carry[i] = 0;
         } else {
-            z[i] = x[i] + (1 << LIMB_BITS) - rhs;
+            z[i] = (x[i] as u16 + (1u16 << LIMB_BITS) - rhs) as u8;
             carry[i] = 1;
         }
     }
     z
 }
 
-fn run_xor<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> [u32; NUM_LIMBS] {
+#[inline(always)]
+fn run_xor<const NUM_LIMBS: usize>(x: &[u8; NUM_LIMBS], y: &[u8; NUM_LIMBS]) -> [u8; NUM_LIMBS] {
     array::from_fn(|i| x[i] ^ y[i])
 }
 
-fn run_or<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> [u32; NUM_LIMBS] {
+#[inline(always)]
+fn run_or<const NUM_LIMBS: usize>(x: &[u8; NUM_LIMBS], y: &[u8; NUM_LIMBS]) -> [u8; NUM_LIMBS] {
     array::from_fn(|i| x[i] | y[i])
 }
 
-fn run_and<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> [u32; NUM_LIMBS] {
+#[inline(always)]
+fn run_and<const NUM_LIMBS: usize>(x: &[u8; NUM_LIMBS], y: &[u8; NUM_LIMBS]) -> [u8; NUM_LIMBS] {
     array::from_fn(|i| x[i] & y[i])
 }
diff --git a/extensions/rv32im/circuit/src/base_alu/execution.rs b/extensions/rv32im/circuit/src/base_alu/execution.rs
new file mode 100644
index 0000000000..acbbf12844
--- /dev/null
+++ b/extensions/rv32im/circuit/src/base_alu/execution.rs
@@ -0,0 +1,245 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_IMM_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32im_transpiler::BaseAluOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::{adapters::imm_to_bytes, BaseAluExecutor};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+pub(super) struct BaseAluPreCompute {
+    c: u32,
+    a: u8,
+    b: u8,
+}
+
+impl<A, const LIMB_BITS: usize> BaseAluExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS> {
+    /// Return `is_imm`, true if `e` is RV32_IMM_AS.
+    #[inline(always)]
+    pub(super) fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut BaseAluPreCompute,
+    ) -> Result<bool, StaticProgramError> {
+        let Instruction { a, b, c, d, e, .. } = inst;
+        let e_u32 = e.as_canonical_u32();
+        if (d.as_canonical_u32() != RV32_REGISTER_AS)
+            || !(e_u32 == RV32_IMM_AS || e_u32 == RV32_REGISTER_AS)
+        {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        let is_imm = e_u32 == RV32_IMM_AS;
+        let c_u32 = c.as_canonical_u32();
+        *data = BaseAluPreCompute {
+            c: if is_imm {
+                u32::from_le_bytes(imm_to_bytes(c_u32))
+            } else {
+                c_u32
+            },
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+        };
+        Ok(is_imm)
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> Executor<F>
+    for BaseAluExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<BaseAluPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut BaseAluPreCompute = data.borrow_mut();
+        let is_imm = self.pre_compute_impl(pc, inst, data)?;
+        let opcode = inst.opcode;
+
+        let fn_ptr = match (
+            is_imm,
+            BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)),
+        ) {
+            (true, BaseAluOpcode::ADD) => execute_e1_impl::<_, _, true, AddOp>,
+            (false, BaseAluOpcode::ADD) => execute_e1_impl::<_, _, false, AddOp>,
+            (true, BaseAluOpcode::SUB) => execute_e1_impl::<_, _, true, SubOp>,
+            (false, BaseAluOpcode::SUB) => execute_e1_impl::<_, _, false, SubOp>,
+            (true, BaseAluOpcode::XOR) => execute_e1_impl::<_, _, true, XorOp>,
+            (false, BaseAluOpcode::XOR) => execute_e1_impl::<_, _, false, XorOp>,
+            (true, BaseAluOpcode::OR) => execute_e1_impl::<_, _, true, OrOp>,
+            (false, BaseAluOpcode::OR) => execute_e1_impl::<_, _, false, OrOp>,
+            (true, BaseAluOpcode::AND) => execute_e1_impl::<_, _, true, AndOp>,
+            (false, BaseAluOpcode::AND) => execute_e1_impl::<_, _, false, AndOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> MeteredExecutor<F>
+    for BaseAluExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<BaseAluPreCompute>>()
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<BaseAluPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let is_imm = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let opcode = inst.opcode;
+
+        let fn_ptr = match (
+            is_imm,
+            BaseAluOpcode::from_usize(opcode.local_opcode_idx(self.offset)),
+        ) {
+            (true, BaseAluOpcode::ADD) => execute_e2_impl::<_, _, true, AddOp>,
+            (false, BaseAluOpcode::ADD) => execute_e2_impl::<_, _, false, AddOp>,
+            (true, BaseAluOpcode::SUB) => execute_e2_impl::<_, _, true, SubOp>,
+            (false, BaseAluOpcode::SUB) => execute_e2_impl::<_, _, false, SubOp>,
+            (true, BaseAluOpcode::XOR) => execute_e2_impl::<_, _, true, XorOp>,
+            (false, BaseAluOpcode::XOR) => execute_e2_impl::<_, _, false, XorOp>,
+            (true, BaseAluOpcode::OR) => execute_e2_impl::<_, _, true, OrOp>,
+            (false, BaseAluOpcode::OR) => execute_e2_impl::<_, _, false, OrOp>,
+            (true, BaseAluOpcode::AND) => execute_e2_impl::<_, _, true, AndOp>,
+            (false, BaseAluOpcode::AND) => execute_e2_impl::<_, _, false, AndOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const IS_IMM: bool,
+    OP: AluOp,
+>(
+    pre_compute: &BaseAluPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2 = if IS_IMM {
+        pre_compute.c.to_le_bytes()
+    } else {
+        vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.c)
+    };
+    let rs1 = u32::from_le_bytes(rs1);
+    let rs2 = u32::from_le_bytes(rs2);
+    let rd = <OP as AluOp>::compute(rs1, rs2);
+    let rd = rd.to_le_bytes();
+    vm_state.vm_write::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32, &rd);
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+#[inline(always)]
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const IS_IMM: bool,
+    OP: AluOp,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &BaseAluPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, IS_IMM, OP>(pre_compute, vm_state);
+}
+
+#[inline(always)]
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const IS_IMM: bool,
+    OP: AluOp,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<BaseAluPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, IS_IMM, OP>(&pre_compute.data, vm_state);
+}
+
+trait AluOp {
+    fn compute(rs1: u32, rs2: u32) -> u32;
+}
+struct AddOp;
+struct SubOp;
+struct XorOp;
+struct OrOp;
+struct AndOp;
+impl AluOp for AddOp {
+    #[inline(always)]
+    fn compute(rs1: u32, rs2: u32) -> u32 {
+        rs1.wrapping_add(rs2)
+    }
+}
+impl AluOp for SubOp {
+    #[inline(always)]
+    fn compute(rs1: u32, rs2: u32) -> u32 {
+        rs1.wrapping_sub(rs2)
+    }
+}
+impl AluOp for XorOp {
+    #[inline(always)]
+    fn compute(rs1: u32, rs2: u32) -> u32 {
+        rs1 ^ rs2
+    }
+}
+impl AluOp for OrOp {
+    #[inline(always)]
+    fn compute(rs1: u32, rs2: u32) -> u32 {
+        rs1 | rs2
+    }
+}
+impl AluOp for AndOp {
+    #[inline(always)]
+    fn compute(rs1: u32, rs2: u32) -> u32 {
+        rs1 & rs2
+    }
+}
diff --git a/extensions/rv32im/circuit/src/base_alu/mod.rs b/extensions/rv32im/circuit/src/base_alu/mod.rs
index cbda8ce555..48bf704f7d 100644
--- a/extensions/rv32im/circuit/src/base_alu/mod.rs
+++ b/extensions/rv32im/circuit/src/base_alu/mod.rs
@@ -1,16 +1,29 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
-use crate::adapters::Rv32BaseAluAdapterChip;
+use super::adapters::{
+    Rv32BaseAluAdapterAir, Rv32BaseAluAdapterExecutor, Rv32BaseAluAdapterFiller, RV32_CELL_BITS,
+    RV32_REGISTER_NUM_LIMBS,
+};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
+pub type Rv32BaseAluAir =
+    VmAirWrapper<Rv32BaseAluAdapterAir, BaseAluCoreAir<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
+pub type Rv32BaseAluExecutor = BaseAluExecutor<
+    Rv32BaseAluAdapterExecutor<RV32_CELL_BITS>,
+    RV32_REGISTER_NUM_LIMBS,
+    RV32_CELL_BITS,
+>;
 pub type Rv32BaseAluChip<F> = VmChipWrapper<
     F,
-    Rv32BaseAluAdapterChip<F>,
-    BaseAluCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+    BaseAluFiller<
+        Rv32BaseAluAdapterFiller<RV32_CELL_BITS>,
+        RV32_REGISTER_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
 >;
diff --git a/extensions/rv32im/circuit/src/base_alu/tests.rs b/extensions/rv32im/circuit/src/base_alu/tests.rs
index 165cd12526..bc1880953c 100644
--- a/extensions/rv32im/circuit/src/base_alu/tests.rs
+++ b/extensions/rv32im/circuit/src/base_alu/tests.rs
@@ -1,44 +1,119 @@
-use std::borrow::BorrowMut;
+use std::{array, borrow::BorrowMut, sync::Arc};
 
-use openvm_circuit::{
-    arch::{
-        testing::{TestAdapterChip, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-        AdapterRuntimeContext, BasicAdapterInterface, ExecutionBridge, ExecutionState,
-        MinimalInstruction, Result, VmAdapterChip, VmAdapterInterface, VmChipWrapper,
-    },
-    system::memory::{MemoryController, OfflineMemory},
-    utils::generate_long_number,
-};
+use openvm_circuit::arch::testing::{TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_rv32im_transpiler::BaseAluOpcode;
+use openvm_instructions::LocalOpcode;
+use openvm_rv32im_transpiler::BaseAluOpcode::{self, *};
 use openvm_stark_backend::{
     p3_air::BaseAir,
-    p3_field::{Field, FieldAlgebra, PrimeField32},
+    p3_field::{FieldAlgebra, PrimeField32},
     p3_matrix::{
         dense::{DenseMatrix, RowMajorMatrix},
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::Rng;
+use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::{core::run_alu, BaseAluCoreChip, Rv32BaseAluChip};
+use super::{core::run_alu, BaseAluCoreAir, Rv32BaseAluChip, Rv32BaseAluExecutor};
 use crate::{
     adapters::{
-        Rv32BaseAluAdapterAir, Rv32BaseAluAdapterChip, Rv32BaseAluReadRecord,
-        Rv32BaseAluWriteRecord, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+        Rv32BaseAluAdapterAir, Rv32BaseAluAdapterExecutor, Rv32BaseAluAdapterFiller,
+        RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
     },
     base_alu::BaseAluCoreCols,
-    test_utils::{generate_rv32_is_type_immediate, rv32_rand_write_register_or_imm},
+    test_utils::{
+        generate_rv32_is_type_immediate, get_verification_error, rv32_rand_write_register_or_imm,
+    },
+    BaseAluFiller, Rv32BaseAluAir,
 };
 
+const MAX_INS_CAPACITY: usize = 128;
 type F = BabyBear;
+type Harness = TestChipHarness<F, Rv32BaseAluExecutor, Rv32BaseAluAir, Rv32BaseAluChip<F>>;
+
+fn create_test_chip(
+    tester: &VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let air = Rv32BaseAluAir::new(
+        Rv32BaseAluAdapterAir::new(
+            tester.execution_bridge(),
+            tester.memory_bridge(),
+            bitwise_bus,
+        ),
+        BaseAluCoreAir::new(bitwise_bus, BaseAluOpcode::CLASS_OFFSET),
+    );
+    let executor = Rv32BaseAluExecutor::new(
+        Rv32BaseAluAdapterExecutor::new(),
+        BaseAluOpcode::CLASS_OFFSET,
+    );
+    let chip = Rv32BaseAluChip::new(
+        BaseAluFiller::new(
+            Rv32BaseAluAdapterFiller::new(bitwise_chip.clone()),
+            bitwise_chip.clone(),
+            BaseAluOpcode::CLASS_OFFSET,
+        ),
+        tester.memory_helper(),
+    );
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    opcode: BaseAluOpcode,
+    b: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    is_imm: Option<bool>,
+    c: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+) {
+    let b = b.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX)));
+    let (c_imm, c) = if is_imm.unwrap_or(rng.gen_bool(0.5)) {
+        let (imm, c) = if let Some(c) = c {
+            ((u32::from_le_bytes(c) & 0xFFFFFF) as usize, c)
+        } else {
+            generate_rv32_is_type_immediate(rng)
+        };
+        (Some(imm), c)
+    } else {
+        (
+            None,
+            c.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX))),
+        )
+    };
+
+    let (instruction, rd) = rv32_rand_write_register_or_imm(
+        tester,
+        b,
+        c,
+        c_imm,
+        opcode.global_opcode().as_usize(),
+        rng,
+    );
+    tester.execute(harness, &instruction);
+
+    let a = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c)
+        .map(F::from_canonical_u8);
+    assert_eq!(a, tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd))
+}
 
 //////////////////////////////////////////////////////////////////////////////////////
 // POSITIVE TESTS
@@ -47,227 +122,266 @@ type F = BabyBear;
 // passes all constraints.
 //////////////////////////////////////////////////////////////////////////////////////
 
-fn run_rv32_alu_rand_test(opcode: BaseAluOpcode, num_ops: usize) {
+#[test_case(ADD, 100)]
+#[test_case(SUB, 100)]
+#[test_case(XOR, 100)]
+#[test_case(OR, 100)]
+#[test_case(AND, 100)]
+fn rand_rv32_alu_test(opcode: BaseAluOpcode, num_ops: usize) {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
 
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32BaseAluChip::<F>::new(
-        Rv32BaseAluAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            bitwise_chip.clone(),
-        ),
-        BaseAluCoreChip::new(bitwise_chip.clone(), BaseAluOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, bitwise) = create_test_chip(&tester);
 
-    for _ in 0..num_ops {
-        let b = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        let (c_imm, c) = if rng.gen_bool(0.5) {
-            (
-                None,
-                generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng),
-            )
-        } else {
-            let (imm, c) = generate_rv32_is_type_immediate(&mut rng);
-            (Some(imm), c)
-        };
+    // TODO(AG): make a more meaningful test for memory accesses
+    tester.write(2, 1024, [F::ONE; 4]);
+    tester.write(2, 1028, [F::ONE; 4]);
+    let sm = tester.read(2, 1024);
+    assert_eq!(sm, [F::ONE; 8]);
 
-        let (instruction, rd) = rv32_rand_write_register_or_imm(
+    for _ in 0..num_ops {
+        set_and_execute(
             &mut tester,
-            b,
-            c,
-            c_imm,
-            opcode.global_opcode().as_usize(),
+            &mut harness,
             &mut rng,
+            opcode,
+            None,
+            None,
+            None,
         );
-        tester.execute(&mut chip, &instruction);
-
-        let a = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c)
-            .map(F::from_canonical_u32);
-        assert_eq!(a, tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd))
     }
 
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn rv32_alu_add_rand_test() {
-    run_rv32_alu_rand_test(BaseAluOpcode::ADD, 100);
-}
+#[test_case(ADD, 100)]
+#[test_case(SUB, 100)]
+#[test_case(XOR, 100)]
+#[test_case(OR, 100)]
+#[test_case(AND, 100)]
+fn rand_rv32_alu_test_persistent(opcode: BaseAluOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
 
-#[test]
-fn rv32_alu_sub_rand_test() {
-    run_rv32_alu_rand_test(BaseAluOpcode::SUB, 100);
-}
+    let mut tester = VmChipTestBuilder::default_persistent();
+    let (mut harness, bitwise) = create_test_chip(&tester);
 
-#[test]
-fn rv32_alu_xor_rand_test() {
-    run_rv32_alu_rand_test(BaseAluOpcode::XOR, 100);
-}
+    // TODO(AG): make a more meaningful test for memory accesses
+    tester.write(2, 1024, [F::ONE; 4]);
+    tester.write(2, 1028, [F::ONE; 4]);
+    let sm = tester.read(2, 1024);
+    assert_eq!(sm, [F::ONE; 8]);
 
-#[test]
-fn rv32_alu_or_rand_test() {
-    run_rv32_alu_rand_test(BaseAluOpcode::OR, 100);
-}
+    for _ in 0..num_ops {
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode,
+            None,
+            None,
+            None,
+        );
+    }
 
-#[test]
-fn rv32_alu_and_rand_test() {
-    run_rv32_alu_rand_test(BaseAluOpcode::AND, 100);
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
 }
 
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adapter is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-type Rv32BaseAluTestChip<F> =
-    VmChipWrapper<F, TestAdapterChip<F>, BaseAluCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
-
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_alu_negative_test(
+fn run_negative_alu_test(
     opcode: BaseAluOpcode,
-    a: [u32; RV32_REGISTER_NUM_LIMBS],
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    c: [u32; RV32_REGISTER_NUM_LIMBS],
+    prank_a: [u32; RV32_REGISTER_NUM_LIMBS],
+    b: [u8; RV32_REGISTER_NUM_LIMBS],
+    c: [u8; RV32_REGISTER_NUM_LIMBS],
+    prank_c: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    prank_opcode_flags: Option<[bool; 5]>,
+    is_imm: Option<bool>,
     interaction_error: bool,
 ) {
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
+    let mut rng = create_seeded_rng();
     let mut tester: VmChipTestBuilder<BabyBear> = VmChipTestBuilder::default();
-    let mut chip = Rv32BaseAluTestChip::<F>::new(
-        TestAdapterChip::new(
-            vec![[b.map(F::from_canonical_u32), c.map(F::from_canonical_u32)].concat()],
-            vec![None],
-            ExecutionBridge::new(tester.execution_bus(), tester.program_bus()),
-        ),
-        BaseAluCoreChip::new(bitwise_chip.clone(), BaseAluOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut chip, bitwise) = create_test_chip(&tester);
 
-    tester.execute(
+    set_and_execute(
+        &mut tester,
         &mut chip,
-        &Instruction::from_usize(opcode.global_opcode(), [0, 0, 0, 1, 1]),
+        &mut rng,
+        opcode,
+        Some(b),
+        is_imm,
+        Some(c),
     );
 
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
-
-    if (opcode == BaseAluOpcode::ADD || opcode == BaseAluOpcode::SUB)
-        && a.iter().all(|&a_val| a_val < (1 << RV32_CELL_BITS))
-    {
-        bitwise_chip.clear();
-        for a_val in a {
-            bitwise_chip.request_xor(a_val, a_val);
-        }
-    }
-
+    let adapter_width = BaseAir::<F>::width(&chip.air.adapter);
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut values = trace.row_slice(0).to_vec();
         let cols: &mut BaseAluCoreCols<F, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> =
             values.split_at_mut(adapter_width).1.borrow_mut();
-        cols.a = a.map(F::from_canonical_u32);
-        *trace = RowMajorMatrix::new(values, trace_width);
+        cols.a = prank_a.map(F::from_canonical_u32);
+        if let Some(prank_c) = prank_c {
+            cols.c = prank_c.map(F::from_canonical_u32);
+        }
+        if let Some(prank_opcode_flags) = prank_opcode_flags {
+            cols.opcode_add_flag = F::from_bool(prank_opcode_flags[0]);
+            cols.opcode_and_flag = F::from_bool(prank_opcode_flags[1]);
+            cols.opcode_or_flag = F::from_bool(prank_opcode_flags[2]);
+            cols.opcode_sub_flag = F::from_bool(prank_opcode_flags[3]);
+            cols.opcode_xor_flag = F::from_bool(prank_opcode_flags[4]);
+        }
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
     disable_debug_builder();
     let tester = tester
         .build()
         .load_and_prank_trace(chip, modify_trace)
-        .load(bitwise_chip)
+        .load_periphery(bitwise)
         .finalize();
-    tester.simple_test_with_expected_error(if interaction_error {
-        VerificationError::ChallengePhaseError
-    } else {
-        VerificationError::OodEvaluationMismatch
-    });
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
 fn rv32_alu_add_wrong_negative_test() {
-    run_rv32_alu_negative_test(
-        BaseAluOpcode::ADD,
+    run_negative_alu_test(
+        ADD,
         [246, 0, 0, 0],
         [250, 0, 0, 0],
         [250, 0, 0, 0],
+        None,
+        None,
+        None,
         false,
     );
 }
 
 #[test]
 fn rv32_alu_add_out_of_range_negative_test() {
-    run_rv32_alu_negative_test(
-        BaseAluOpcode::ADD,
+    run_negative_alu_test(
+        ADD,
         [500, 0, 0, 0],
         [250, 0, 0, 0],
         [250, 0, 0, 0],
+        None,
+        None,
+        None,
         true,
     );
 }
 
 #[test]
 fn rv32_alu_sub_wrong_negative_test() {
-    run_rv32_alu_negative_test(
-        BaseAluOpcode::SUB,
+    run_negative_alu_test(
+        SUB,
         [255, 0, 0, 0],
         [1, 0, 0, 0],
         [2, 0, 0, 0],
+        None,
+        None,
+        None,
         false,
     );
 }
 
 #[test]
 fn rv32_alu_sub_out_of_range_negative_test() {
-    run_rv32_alu_negative_test(
-        BaseAluOpcode::SUB,
+    run_negative_alu_test(
+        SUB,
         [F::NEG_ONE.as_canonical_u32(), 0, 0, 0],
         [1, 0, 0, 0],
         [2, 0, 0, 0],
+        None,
+        None,
+        None,
         true,
     );
 }
 
 #[test]
 fn rv32_alu_xor_wrong_negative_test() {
-    run_rv32_alu_negative_test(
-        BaseAluOpcode::XOR,
+    run_negative_alu_test(
+        XOR,
         [255, 255, 255, 255],
         [0, 0, 1, 0],
         [255, 255, 255, 255],
+        None,
+        None,
+        None,
         true,
     );
 }
 
 #[test]
 fn rv32_alu_or_wrong_negative_test() {
-    run_rv32_alu_negative_test(
-        BaseAluOpcode::OR,
+    run_negative_alu_test(
+        OR,
         [255, 255, 255, 255],
         [255, 255, 255, 254],
         [0, 0, 0, 0],
+        None,
+        None,
+        None,
         true,
     );
 }
 
 #[test]
 fn rv32_alu_and_wrong_negative_test() {
-    run_rv32_alu_negative_test(
-        BaseAluOpcode::AND,
+    run_negative_alu_test(
+        AND,
         [255, 255, 255, 255],
         [0, 0, 1, 0],
         [0, 0, 0, 0],
+        None,
+        None,
+        None,
         true,
     );
 }
 
+#[test]
+fn rv32_alu_adapter_unconstrained_imm_limb_test() {
+    run_negative_alu_test(
+        ADD,
+        [255, 7, 0, 0],
+        [0, 0, 0, 0],
+        [255, 7, 0, 0],
+        Some([511, 6, 0, 0]),
+        None,
+        Some(true),
+        true,
+    );
+}
+
+#[test]
+fn rv32_alu_adapter_unconstrained_rs2_read_test() {
+    run_negative_alu_test(
+        ADD,
+        [2, 2, 2, 2],
+        [1, 1, 1, 1],
+        [1, 1, 1, 1],
+        None,
+        Some([false, false, false, false, false]),
+        Some(false),
+        false,
+    );
+}
+
 ///////////////////////////////////////////////////////////////////////////////////////
 /// SANITY TESTS
 ///
@@ -276,10 +390,10 @@ fn rv32_alu_and_wrong_negative_test() {
 
 #[test]
 fn run_add_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [23, 205, 73, 49];
-    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BaseAluOpcode::ADD, &x, &y);
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [23, 205, 73, 49];
+    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(ADD, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], result[i])
     }
@@ -287,10 +401,10 @@ fn run_add_sanity_test() {
 
 #[test]
 fn run_sub_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [179, 118, 240, 172];
-    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BaseAluOpcode::SUB, &x, &y);
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [179, 118, 240, 172];
+    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(SUB, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], result[i])
     }
@@ -298,10 +412,10 @@ fn run_sub_sanity_test() {
 
 #[test]
 fn run_xor_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [215, 138, 49, 173];
-    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BaseAluOpcode::XOR, &x, &y);
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [215, 138, 49, 173];
+    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(XOR, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], result[i])
     }
@@ -309,10 +423,10 @@ fn run_xor_sanity_test() {
 
 #[test]
 fn run_or_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [247, 171, 61, 239];
-    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BaseAluOpcode::OR, &x, &y);
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [247, 171, 61, 239];
+    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(OR, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], result[i])
     }
@@ -320,195 +434,11 @@ fn run_or_sanity_test() {
 
 #[test]
 fn run_and_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [32, 33, 12, 66];
-    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BaseAluOpcode::AND, &x, &y);
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [229, 33, 29, 111];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [50, 171, 44, 194];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [32, 33, 12, 66];
+    let result = run_alu::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(AND, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], result[i])
     }
 }
-
-//////////////////////////////////////////////////////////////////////////////////////
-// ADAPTER TESTS
-//
-// Ensure that the adapter is correct.
-//////////////////////////////////////////////////////////////////////////////////////
-
-// A pranking chip where `preprocess` can have `rs2` limbs that overflow.
-struct Rv32BaseAluAdapterTestChip<F: Field>(Rv32BaseAluAdapterChip<F>);
-
-impl<F: PrimeField32> VmAdapterChip<F> for Rv32BaseAluAdapterTestChip<F> {
-    type ReadRecord = Rv32BaseAluReadRecord<F>;
-    type WriteRecord = Rv32BaseAluWriteRecord<F>;
-    type Air = Rv32BaseAluAdapterAir;
-    type Interface = BasicAdapterInterface<
-        F,
-        MinimalInstruction<F>,
-        2,
-        1,
-        RV32_REGISTER_NUM_LIMBS,
-        RV32_REGISTER_NUM_LIMBS,
-    >;
-
-    fn preprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-    ) -> Result<(
-        <Self::Interface as VmAdapterInterface<F>>::Reads,
-        Self::ReadRecord,
-    )> {
-        let Instruction { b, c, d, e, .. } = *instruction;
-
-        let rs1 = memory.read::<RV32_REGISTER_NUM_LIMBS>(d, b);
-        let (rs2, rs2_data, rs2_imm) = if e.is_zero() {
-            let c_u32 = c.as_canonical_u32();
-            memory.increment_timestamp();
-            let mask1 = (1 << 9) - 1;
-            let mask2 = (1 << 3) - 2;
-            (
-                None,
-                [
-                    (c_u32 & mask1) as u16,
-                    ((c_u32 >> 8) & mask2) as u16,
-                    (c_u32 >> 16) as u16,
-                    (c_u32 >> 16) as u16,
-                ]
-                .map(F::from_canonical_u16),
-                c,
-            )
-        } else {
-            let rs2_read = memory.read::<RV32_REGISTER_NUM_LIMBS>(e, c);
-            (Some(rs2_read.0), rs2_read.1, F::ZERO)
-        };
-
-        Ok((
-            [rs1.1, rs2_data],
-            Self::ReadRecord {
-                rs1: rs1.0,
-                rs2,
-                rs2_imm,
-            },
-        ))
-    }
-
-    fn postprocess(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-        output: AdapterRuntimeContext<F, Self::Interface>,
-        _read_record: &Self::ReadRecord,
-    ) -> Result<(ExecutionState<u32>, Self::WriteRecord)> {
-        self.0
-            .postprocess(memory, instruction, from_state, output, _read_record)
-    }
-
-    fn generate_trace_row(
-        &self,
-        row_slice: &mut [F],
-        read_record: Self::ReadRecord,
-        write_record: Self::WriteRecord,
-        memory: &OfflineMemory<F>,
-    ) {
-        self.0
-            .generate_trace_row(row_slice, read_record, write_record, memory)
-    }
-
-    fn air(&self) -> &Self::Air {
-        self.0.air()
-    }
-}
-
-#[test]
-fn rv32_alu_adapter_unconstrained_imm_limb_test() {
-    let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = VmChipWrapper::new(
-        Rv32BaseAluAdapterTestChip(Rv32BaseAluAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            bitwise_chip.clone(),
-        )),
-        BaseAluCoreChip::new(bitwise_chip.clone(), BaseAluOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
-
-    let b = [0, 0, 0, 0];
-    let (c_imm, c) = {
-        let imm = (1 << 11) - 1;
-        let fake_c = [(1 << 9) - 1, (1 << 3) - 2, 0, 0];
-        (Some(imm), fake_c)
-    };
-
-    let (instruction, _rd) = rv32_rand_write_register_or_imm(
-        &mut tester,
-        b,
-        c,
-        c_imm,
-        BaseAluOpcode::ADD.global_opcode().as_usize(),
-        &mut rng,
-    );
-    tester.execute(&mut chip, &instruction);
-
-    disable_debug_builder();
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
-    tester.simple_test_with_expected_error(VerificationError::ChallengePhaseError);
-}
-
-#[test]
-fn rv32_alu_adapter_unconstrained_rs2_read_test() {
-    let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32BaseAluChip::<F>::new(
-        Rv32BaseAluAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            bitwise_chip.clone(),
-        ),
-        BaseAluCoreChip::new(bitwise_chip.clone(), BaseAluOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
-
-    let b = [1, 1, 1, 1];
-    let c = [1, 1, 1, 1];
-    let (instruction, _rd) = rv32_rand_write_register_or_imm(
-        &mut tester,
-        b,
-        c,
-        None,
-        BaseAluOpcode::ADD.global_opcode().as_usize(),
-        &mut rng,
-    );
-    tester.execute(&mut chip, &instruction);
-
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
-
-    let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
-        let mut values = trace.row_slice(0).to_vec();
-        let mut dummy_values = values.clone();
-        let cols: &mut BaseAluCoreCols<F, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> =
-            dummy_values.split_at_mut(adapter_width).1.borrow_mut();
-        cols.opcode_add_flag = F::ZERO;
-        values.extend(dummy_values);
-        *trace = RowMajorMatrix::new(values, trace_width);
-    };
-
-    disable_debug_builder();
-    let tester = tester
-        .build()
-        .load_and_prank_trace(chip, modify_trace)
-        .load(bitwise_chip)
-        .finalize();
-    tester.simple_test_with_expected_error(VerificationError::OodEvaluationMismatch);
-}
diff --git a/extensions/rv32im/circuit/src/branch_eq/core.rs b/extensions/rv32im/circuit/src/branch_eq/core.rs
index bb04d86ee5..44b227ec8e 100644
--- a/extensions/rv32im/circuit/src/branch_eq/core.rs
+++ b/extensions/rv32im/circuit/src/branch_eq/core.rs
@@ -1,14 +1,11 @@
-use std::{
-    array,
-    borrow::{Borrow, BorrowMut},
-};
+use std::borrow::{Borrow, BorrowMut};
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, ImmInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::utils::not;
-use openvm_circuit_primitives_derive::AlignedBorrow;
+use openvm_circuit_primitives_derive::{AlignedBorrow, AlignedBytesBorrow};
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
 use openvm_rv32im_transpiler::BranchEqualOpcode;
 use openvm_stark_backend::{
@@ -17,8 +14,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
 use strum::IntoEnumIterator;
 
 #[repr(C)]
@@ -37,7 +32,7 @@ pub struct BranchEqualCoreCols<T, const NUM_LIMBS: usize> {
     pub diff_inv_marker: [T; NUM_LIMBS],
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct BranchEqualCoreAir<const NUM_LIMBS: usize> {
     offset: usize,
     pc_step: u32,
@@ -135,117 +130,143 @@ where
 }
 
 #[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct BranchEqualCoreRecord<T, const NUM_LIMBS: usize> {
-    #[serde(with = "BigArray")]
-    pub a: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub b: [T; NUM_LIMBS],
-    pub cmp_result: T,
-    pub imm: T,
-    pub diff_inv_val: T,
-    pub diff_idx: usize,
-    pub opcode: BranchEqualOpcode,
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct BranchEqualCoreRecord<const NUM_LIMBS: usize> {
+    pub a: [u8; NUM_LIMBS],
+    pub b: [u8; NUM_LIMBS],
+    pub imm: u32,
+    pub local_opcode: u8,
 }
 
-#[derive(Debug)]
-pub struct BranchEqualCoreChip<const NUM_LIMBS: usize> {
-    pub air: BranchEqualCoreAir<NUM_LIMBS>,
+#[derive(Clone, Copy, derive_new::new)]
+pub struct BranchEqualExecutor<A, const NUM_LIMBS: usize> {
+    adapter: A,
+    pub offset: usize,
+    pub pc_step: u32,
 }
 
-impl<const NUM_LIMBS: usize> BranchEqualCoreChip<NUM_LIMBS> {
-    pub fn new(offset: usize, pc_step: u32) -> Self {
-        Self {
-            air: BranchEqualCoreAir { offset, pc_step },
-        }
-    }
+#[derive(Clone, Copy, derive_new::new)]
+pub struct BranchEqualFiller<A, const NUM_LIMBS: usize> {
+    adapter: A,
+    pub offset: usize,
+    pub pc_step: u32,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_LIMBS: usize> VmCoreChip<F, I>
-    for BranchEqualCoreChip<NUM_LIMBS>
+impl<F, A, RA, const NUM_LIMBS: usize> PreflightExecutor<F, RA>
+    for BranchEqualExecutor<A, NUM_LIMBS>
 where
-    I::Reads: Into<[[F; NUM_LIMBS]; 2]>,
-    I::Writes: Default,
+    F: PrimeField32,
+    A: 'static + AdapterTraceExecutor<F, ReadData: Into<[[u8; NUM_LIMBS]; 2]>, WriteData = ()>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut BranchEqualCoreRecord<NUM_LIMBS>,
+        ),
+    >,
 {
-    type Record = BranchEqualCoreRecord<F, NUM_LIMBS>;
-    type Air = BranchEqualCoreAir<NUM_LIMBS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!("{:?}", BranchEqualOpcode::from_usize(opcode - self.offset))
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let Instruction { opcode, c: imm, .. } = *instruction;
-        let branch_eq_opcode =
-            BranchEqualOpcode::from_usize(opcode.local_opcode_idx(self.air.offset));
-
-        let data: [[F; NUM_LIMBS]; 2] = reads.into();
-        let x = data[0].map(|x| x.as_canonical_u32());
-        let y = data[1].map(|y| y.as_canonical_u32());
-        let (cmp_result, diff_idx, diff_inv_val) = run_eq::<F, NUM_LIMBS>(branch_eq_opcode, &x, &y);
-
-        let output = AdapterRuntimeContext {
-            to_pc: cmp_result.then_some((F::from_canonical_u32(from_pc) + imm).as_canonical_u32()),
-            writes: Default::default(),
-        };
-        let record = BranchEqualCoreRecord {
-            opcode: branch_eq_opcode,
-            a: data[0],
-            b: data[1],
-            cmp_result: F::from_bool(cmp_result),
-            imm,
-            diff_idx,
-            diff_inv_val,
-        };
-
-        Ok((output, record))
-    }
+    ) -> Result<(), ExecutionError> {
+        let &Instruction { opcode, c: imm, .. } = instruction;
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            BranchEqualOpcode::from_usize(opcode - self.air.offset)
-        )
-    }
+        let branch_eq_opcode = BranchEqualOpcode::from_usize(opcode.local_opcode_idx(self.offset));
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let row_slice: &mut BranchEqualCoreCols<_, NUM_LIMBS> = row_slice.borrow_mut();
-        row_slice.a = record.a;
-        row_slice.b = record.b;
-        row_slice.cmp_result = record.cmp_result;
-        row_slice.imm = record.imm;
-        row_slice.opcode_beq_flag = F::from_bool(record.opcode == BranchEqualOpcode::BEQ);
-        row_slice.opcode_bne_flag = F::from_bool(record.opcode == BranchEqualOpcode::BNE);
-        row_slice.diff_inv_marker = array::from_fn(|i| {
-            if i == record.diff_idx {
-                record.diff_inv_val
-            } else {
-                F::ZERO
-            }
-        });
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        let [rs1, rs2] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
+
+        core_record.a = rs1;
+        core_record.b = rs2;
+        core_record.imm = imm.as_canonical_u32();
+        core_record.local_opcode = branch_eq_opcode as u8;
+
+        if fast_run_eq(branch_eq_opcode, &rs1, &rs2) {
+            *state.pc = (F::from_canonical_u32(*state.pc) + imm).as_canonical_u32();
+        } else {
+            *state.pc = state.pc.wrapping_add(self.pc_step);
+        }
+
+        Ok(())
     }
+}
+
+impl<F, A, const NUM_LIMBS: usize> TraceFiller<F> for BranchEqualFiller<A, NUM_LIMBS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &BranchEqualCoreRecord<NUM_LIMBS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut BranchEqualCoreCols<F, NUM_LIMBS> = core_row.borrow_mut();
+
+        let (cmp_result, diff_idx, diff_inv_val) = run_eq::<F, NUM_LIMBS>(
+            record.local_opcode == BranchEqualOpcode::BEQ as u8,
+            &record.a,
+            &record.b,
+        );
+        core_row.diff_inv_marker = [F::ZERO; NUM_LIMBS];
+        core_row.diff_inv_marker[diff_idx] = diff_inv_val;
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.opcode_bne_flag =
+            F::from_bool(record.local_opcode == BranchEqualOpcode::BNE as u8);
+        core_row.opcode_beq_flag =
+            F::from_bool(record.local_opcode == BranchEqualOpcode::BEQ as u8);
+
+        core_row.imm = F::from_canonical_u32(record.imm);
+        core_row.cmp_result = F::from_bool(cmp_result);
+
+        core_row.b = record.b.map(F::from_canonical_u8);
+        core_row.a = record.a.map(F::from_canonical_u8);
     }
 }
 
 // Returns (cmp_result, diff_idx, x[diff_idx] - y[diff_idx])
-pub(super) fn run_eq<F: PrimeField32, const NUM_LIMBS: usize>(
+#[inline(always)]
+pub(super) fn fast_run_eq<const NUM_LIMBS: usize>(
     local_opcode: BranchEqualOpcode,
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> (bool, usize, F) {
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
+) -> bool {
+    match local_opcode {
+        BranchEqualOpcode::BEQ => x == y,
+        BranchEqualOpcode::BNE => x != y,
+    }
+}
+
+// Returns (cmp_result, diff_idx, x[diff_idx] - y[diff_idx])
+#[inline(always)]
+pub(super) fn run_eq<F, const NUM_LIMBS: usize>(
+    is_beq: bool,
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
+) -> (bool, usize, F)
+where
+    F: PrimeField32,
+{
     for i in 0..NUM_LIMBS {
         if x[i] != y[i] {
             return (
-                local_opcode == BranchEqualOpcode::BNE,
+                !is_beq,
                 i,
-                (F::from_canonical_u32(x[i]) - F::from_canonical_u32(y[i])).inverse(),
+                (F::from_canonical_u8(x[i]) - F::from_canonical_u8(y[i])).inverse(),
             );
         }
     }
-    (local_opcode == BranchEqualOpcode::BEQ, 0, F::ZERO)
+    (is_beq, 0, F::ZERO)
 }
diff --git a/extensions/rv32im/circuit/src/branch_eq/execution.rs b/extensions/rv32im/circuit/src/branch_eq/execution.rs
new file mode 100644
index 0000000000..dba0d8cddb
--- /dev/null
+++ b/extensions/rv32im/circuit/src/branch_eq/execution.rs
@@ -0,0 +1,151 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, LocalOpcode,
+};
+use openvm_rv32im_transpiler::BranchEqualOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::BranchEqualExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct BranchEqualPreCompute {
+    imm: isize,
+    a: u8,
+    b: u8,
+}
+
+impl<A, const NUM_LIMBS: usize> BranchEqualExecutor<A, NUM_LIMBS> {
+    /// Return `is_bne`, true if the local opcode is BNE.
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut BranchEqualPreCompute,
+    ) -> Result<bool, StaticProgramError> {
+        let data: &mut BranchEqualPreCompute = data.borrow_mut();
+        let &Instruction {
+            opcode, a, b, c, d, ..
+        } = inst;
+        let local_opcode = BranchEqualOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        let c = c.as_canonical_u32();
+        let imm = if F::ORDER_U32 - c < c {
+            -((F::ORDER_U32 - c) as isize)
+        } else {
+            c as isize
+        };
+        if d.as_canonical_u32() != RV32_REGISTER_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = BranchEqualPreCompute {
+            imm,
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+        };
+        Ok(local_opcode == BranchEqualOpcode::BNE)
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize> Executor<F> for BranchEqualExecutor<A, NUM_LIMBS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<BranchEqualPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let data: &mut BranchEqualPreCompute = data.borrow_mut();
+        let is_bne = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = if is_bne {
+            execute_e1_impl::<_, _, true>
+        } else {
+            execute_e1_impl::<_, _, false>
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize> MeteredExecutor<F> for BranchEqualExecutor<A, NUM_LIMBS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<BranchEqualPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<BranchEqualPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let is_bne = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = if is_bne {
+            execute_e2_impl::<_, _, true>
+        } else {
+            execute_e2_impl::<_, _, false>
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_NE: bool>(
+    pre_compute: &BranchEqualPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+    let rs2 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    if (rs1 == rs2) ^ IS_NE {
+        vm_state.pc = (vm_state.pc as isize + pre_compute.imm) as u32;
+    } else {
+        vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    }
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_NE: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &BranchEqualPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, IS_NE>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, const IS_NE: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<BranchEqualPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, IS_NE>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/rv32im/circuit/src/branch_eq/mod.rs b/extensions/rv32im/circuit/src/branch_eq/mod.rs
index 7d53946a73..f6c142c1b7 100644
--- a/extensions/rv32im/circuit/src/branch_eq/mod.rs
+++ b/extensions/rv32im/circuit/src/branch_eq/mod.rs
@@ -1,13 +1,18 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
 use super::adapters::RV32_REGISTER_NUM_LIMBS;
-use crate::adapters::Rv32BranchAdapterChip;
+use crate::adapters::{Rv32BranchAdapterAir, Rv32BranchAdapterExecutor, Rv32BranchAdapterFiller};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
+pub type Rv32BranchEqualAir =
+    VmAirWrapper<Rv32BranchAdapterAir, BranchEqualCoreAir<RV32_REGISTER_NUM_LIMBS>>;
+pub type Rv32BranchEqualExecutor =
+    BranchEqualExecutor<Rv32BranchAdapterExecutor, RV32_REGISTER_NUM_LIMBS>;
 pub type Rv32BranchEqualChip<F> =
-    VmChipWrapper<F, Rv32BranchAdapterChip<F>, BranchEqualCoreChip<RV32_REGISTER_NUM_LIMBS>>;
+    VmChipWrapper<F, BranchEqualFiller<Rv32BranchAdapterFiller, RV32_REGISTER_NUM_LIMBS>>;
diff --git a/extensions/rv32im/circuit/src/branch_eq/tests.rs b/extensions/rv32im/circuit/src/branch_eq/tests.rs
index c16858b071..094a13f259 100644
--- a/extensions/rv32im/circuit/src/branch_eq/tests.rs
+++ b/extensions/rv32im/circuit/src/branch_eq/tests.rs
@@ -1,11 +1,11 @@
 use std::{array, borrow::BorrowMut};
 
-use openvm_circuit::arch::{
-    testing::{memory::gen_pointer, TestAdapterChip, VmChipTestBuilder},
-    BasicAdapterInterface, ExecutionBridge, ImmInstruction, InstructionExecutor, VmAdapterChip,
-    VmChipWrapper, VmCoreChip,
+use openvm_circuit::arch::testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder};
+use openvm_instructions::{
+    instruction::Instruction,
+    program::{DEFAULT_PC_STEP, PC_BITS},
+    LocalOpcode,
 };
-use openvm_instructions::{instruction::Instruction, program::PC_BITS, LocalOpcode};
 use openvm_rv32im_transpiler::BranchEqualOpcode;
 use openvm_stark_backend::{
     p3_air::BaseAir,
@@ -15,44 +15,76 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::{
-    core::{run_eq, BranchEqualCoreChip},
-    BranchEqualCoreCols, Rv32BranchEqualChip,
+use super::{core::run_eq, BranchEqualCoreCols, Rv32BranchEqualChip};
+use crate::{
+    adapters::{
+        Rv32BranchAdapterAir, Rv32BranchAdapterExecutor, Rv32BranchAdapterFiller,
+        RV32_REGISTER_NUM_LIMBS, RV_B_TYPE_IMM_BITS,
+    },
+    branch_eq::fast_run_eq,
+    test_utils::get_verification_error,
+    BranchEqualCoreAir, BranchEqualFiller, Rv32BranchEqualAir, Rv32BranchEqualExecutor,
 };
-use crate::adapters::{Rv32BranchAdapterChip, RV32_REGISTER_NUM_LIMBS, RV_B_TYPE_IMM_BITS};
 
 type F = BabyBear;
+const MAX_INS_CAPACITY: usize = 128;
+const ABS_MAX_IMM: i32 = 1 << (RV_B_TYPE_IMM_BITS - 1);
+type Harness =
+    TestChipHarness<F, Rv32BranchEqualExecutor, Rv32BranchEqualAir, Rv32BranchEqualChip<F>>;
+
+fn create_test_chip(tester: &mut VmChipTestBuilder<F>) -> Harness {
+    let air = Rv32BranchEqualAir::new(
+        Rv32BranchAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        BranchEqualCoreAir::new(BranchEqualOpcode::CLASS_OFFSET, DEFAULT_PC_STEP),
+    );
+    let executor = Rv32BranchEqualExecutor::new(
+        Rv32BranchAdapterExecutor,
+        BranchEqualOpcode::CLASS_OFFSET,
+        DEFAULT_PC_STEP,
+    );
+    let chip = Rv32BranchEqualChip::new(
+        BranchEqualFiller::new(
+            Rv32BranchAdapterFiller,
+            BranchEqualOpcode::CLASS_OFFSET,
+            DEFAULT_PC_STEP,
+        ),
+        tester.memory_helper(),
+    );
 
-//////////////////////////////////////////////////////////////////////////////////////
-// POSITIVE TESTS
-//
-// Randomly generate computations and execute, ensuring that the generated trace
-// passes all constraints.
-//////////////////////////////////////////////////////////////////////////////////////
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
+}
 
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_branch_eq_rand_execute<E: InstructionExecutor<F>>(
+fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut E,
-    opcode: BranchEqualOpcode,
-    a: [u32; RV32_REGISTER_NUM_LIMBS],
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    imm: i32,
+    harness: &mut Harness,
     rng: &mut StdRng,
+    opcode: BranchEqualOpcode,
+    a: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    b: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    imm: Option<i32>,
 ) {
+    let a = a.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX)));
+    let b = b.unwrap_or(if rng.gen_bool(0.5) {
+        a
+    } else {
+        array::from_fn(|_| rng.gen_range(0..=u8::MAX))
+    });
+
+    let imm = imm.unwrap_or(rng.gen_range((-ABS_MAX_IMM)..ABS_MAX_IMM));
     let rs1 = gen_pointer(rng, 4);
     let rs2 = gen_pointer(rng, 4);
-    tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs1, a.map(F::from_canonical_u32));
-    tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs2, b.map(F::from_canonical_u32));
+    tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs1, a.map(F::from_canonical_u8));
+    tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs2, b.map(F::from_canonical_u8));
 
+    let initial_pc = rng.gen_range(imm.unsigned_abs()..(1 << (PC_BITS - 1)));
     tester.execute_with_pc(
-        chip,
+        harness,
         &Instruction::from_isize(
             opcode.global_opcode(),
             rs1 as isize,
@@ -61,10 +93,10 @@ fn run_rv32_branch_eq_rand_execute<E: InstructionExecutor<F>>(
             1,
             1,
         ),
-        rng.gen_range(imm.unsigned_abs()..(1 << (PC_BITS - 1))),
+        initial_pc,
     );
 
-    let (cmp_result, _, _) = run_eq::<F, RV32_REGISTER_NUM_LIMBS>(opcode, &a, &b);
+    let cmp_result = fast_run_eq(opcode, &a, &b);
     let from_pc = tester.execution.last_from_pc().as_canonical_u32() as i32;
     let to_pc = tester.execution.last_to_pc().as_canonical_u32() as i32;
     let pc_inc = if cmp_result { imm } else { 4 };
@@ -72,183 +104,176 @@ fn run_rv32_branch_eq_rand_execute<E: InstructionExecutor<F>>(
     assert_eq!(to_pc, from_pc + pc_inc);
 }
 
-fn run_rv32_branch_eq_rand_test(opcode: BranchEqualOpcode, num_ops: usize) {
-    let mut rng = create_seeded_rng();
-    const ABS_MAX_BRANCH: i32 = 1 << (RV_B_TYPE_IMM_BITS - 1);
+//////////////////////////////////////////////////////////////////////////////////////
+// POSITIVE TESTS
+//
+// Randomly generate computations and execute, ensuring that the generated trace
+// passes all constraints.
+//////////////////////////////////////////////////////////////////////////////////////
 
+#[test_case(BranchEqualOpcode::BEQ, 100)]
+#[test_case(BranchEqualOpcode::BNE, 100)]
+fn rand_rv32_branch_eq_test(opcode: BranchEqualOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32BranchEqualChip::<F>::new(
-        Rv32BranchAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        BranchEqualCoreChip::new(BranchEqualOpcode::CLASS_OFFSET, 4),
-        tester.offline_memory_mutex_arc(),
-    );
+    let mut harness = create_test_chip(&mut tester);
 
     for _ in 0..num_ops {
-        let a = array::from_fn(|_| rng.gen_range(0..F::ORDER_U32));
-        let b = if rng.gen_bool(0.5) {
-            a
-        } else {
-            array::from_fn(|_| rng.gen_range(0..F::ORDER_U32))
-        };
-        let imm = rng.gen_range((-ABS_MAX_BRANCH)..ABS_MAX_BRANCH);
-        run_rv32_branch_eq_rand_execute(&mut tester, &mut chip, opcode, a, b, imm, &mut rng);
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode,
+            None,
+            None,
+            None,
+        );
     }
 
-    let tester = tester.build().load(chip).finalize();
+    let tester = tester.build().load(harness).finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn rv32_beq_rand_test() {
-    run_rv32_branch_eq_rand_test(BranchEqualOpcode::BEQ, 100);
-}
-
-#[test]
-fn rv32_bne_rand_test() {
-    run_rv32_branch_eq_rand_test(BranchEqualOpcode::BNE, 100);
-}
-
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adapter is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-type Rv32BranchEqualTestChip<F> =
-    VmChipWrapper<F, TestAdapterChip<F>, BranchEqualCoreChip<RV32_REGISTER_NUM_LIMBS>>;
-
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_beq_negative_test(
+fn run_negative_branch_eq_test(
     opcode: BranchEqualOpcode,
-    a: [u32; RV32_REGISTER_NUM_LIMBS],
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    cmp_result: bool,
-    diff_inv_marker: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    a: [u8; RV32_REGISTER_NUM_LIMBS],
+    b: [u8; RV32_REGISTER_NUM_LIMBS],
+    prank_cmp_result: Option<bool>,
+    prank_diff_inv_marker: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    interaction_error: bool,
 ) {
-    let imm = 16u32;
+    let imm = 16i32;
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32BranchEqualTestChip::<F>::new(
-        TestAdapterChip::new(
-            vec![[a.map(F::from_canonical_u32), b.map(F::from_canonical_u32)].concat()],
-            vec![if cmp_result { Some(imm) } else { None }],
-            ExecutionBridge::new(tester.execution_bus(), tester.program_bus()),
-        ),
-        BranchEqualCoreChip::new(BranchEqualOpcode::CLASS_OFFSET, 4),
-        tester.offline_memory_mutex_arc(),
+    let mut harness = create_test_chip(&mut tester);
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(a),
+        Some(b),
+        Some(imm),
     );
 
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(opcode.global_opcode(), [0, 0, imm as usize, 1, 1]),
-    );
-
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut values = trace.row_slice(0).to_vec();
         let cols: &mut BranchEqualCoreCols<F, RV32_REGISTER_NUM_LIMBS> =
             values.split_at_mut(adapter_width).1.borrow_mut();
-        cols.cmp_result = F::from_bool(cmp_result);
-        if let Some(diff_inv_marker) = diff_inv_marker {
+        if let Some(cmp_result) = prank_cmp_result {
+            cols.cmp_result = F::from_bool(cmp_result);
+        }
+        if let Some(diff_inv_marker) = prank_diff_inv_marker {
             cols.diff_inv_marker = diff_inv_marker.map(F::from_canonical_u32);
         }
-        *trace = RowMajorMatrix::new(values, trace_width);
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
+        .load_and_prank_trace(harness, modify_trace)
         .finalize();
-    tester.simple_test_with_expected_error(VerificationError::OodEvaluationMismatch);
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
 fn rv32_beq_wrong_cmp_negative_test() {
-    run_rv32_beq_negative_test(
+    run_negative_branch_eq_test(
         BranchEqualOpcode::BEQ,
         [0, 0, 7, 0],
         [0, 0, 0, 7],
-        true,
+        Some(true),
         None,
+        false,
     );
 
-    run_rv32_beq_negative_test(
+    run_negative_branch_eq_test(
         BranchEqualOpcode::BEQ,
         [0, 0, 7, 0],
         [0, 0, 7, 0],
-        false,
+        Some(false),
         None,
+        false,
     );
 }
 
 #[test]
 fn rv32_beq_zero_inv_marker_negative_test() {
-    run_rv32_beq_negative_test(
+    run_negative_branch_eq_test(
         BranchEqualOpcode::BEQ,
         [0, 0, 7, 0],
         [0, 0, 0, 7],
-        true,
+        Some(true),
         Some([0, 0, 0, 0]),
+        false,
     );
 }
 
 #[test]
 fn rv32_beq_invalid_inv_marker_negative_test() {
-    run_rv32_beq_negative_test(
+    run_negative_branch_eq_test(
         BranchEqualOpcode::BEQ,
         [0, 0, 7, 0],
         [0, 0, 7, 0],
-        false,
+        Some(false),
         Some([0, 0, 1, 0]),
+        false,
     );
 }
 
 #[test]
 fn rv32_bne_wrong_cmp_negative_test() {
-    run_rv32_beq_negative_test(
+    run_negative_branch_eq_test(
         BranchEqualOpcode::BNE,
         [0, 0, 7, 0],
         [0, 0, 0, 7],
-        false,
+        Some(false),
         None,
+        false,
     );
 
-    run_rv32_beq_negative_test(
+    run_negative_branch_eq_test(
         BranchEqualOpcode::BNE,
         [0, 0, 7, 0],
         [0, 0, 7, 0],
-        true,
+        Some(true),
         None,
+        false,
     );
 }
 
 #[test]
 fn rv32_bne_zero_inv_marker_negative_test() {
-    run_rv32_beq_negative_test(
+    run_negative_branch_eq_test(
         BranchEqualOpcode::BNE,
         [0, 0, 7, 0],
         [0, 0, 0, 7],
-        false,
+        Some(false),
         Some([0, 0, 0, 0]),
+        false,
     );
 }
 
 #[test]
 fn rv32_bne_invalid_inv_marker_negative_test() {
-    run_rv32_beq_negative_test(
+    run_negative_branch_eq_test(
         BranchEqualOpcode::BNE,
         [0, 0, 7, 0],
         [0, 0, 7, 0],
-        true,
+        Some(true),
         Some([0, 0, 1, 0]),
+        false,
     );
 }
 
@@ -259,66 +284,61 @@ fn rv32_bne_invalid_inv_marker_negative_test() {
 ///////////////////////////////////////////////////////////////////////////////////////
 
 #[test]
-fn execute_pc_increment_sanity_test() {
-    let core =
-        BranchEqualCoreChip::<RV32_REGISTER_NUM_LIMBS>::new(BranchEqualOpcode::CLASS_OFFSET, 4);
-
-    let mut instruction = Instruction::<F> {
-        opcode: BranchEqualOpcode::BEQ.global_opcode(),
-        c: F::from_canonical_u8(8),
-        ..Default::default()
-    };
-    let x: [F; RV32_REGISTER_NUM_LIMBS] = [19, 4, 1790, 60].map(F::from_canonical_u32);
-    let y: [F; RV32_REGISTER_NUM_LIMBS] = [19, 32, 1804, 60].map(F::from_canonical_u32);
-
-    let result = <BranchEqualCoreChip<RV32_REGISTER_NUM_LIMBS> as VmCoreChip<
-        F,
-        BasicAdapterInterface<F, ImmInstruction<F>, 2, 0, RV32_REGISTER_NUM_LIMBS, 0>,
-    >>::execute_instruction(&core, &instruction, 0, [x, y]);
-    let (output, _) = result.expect("execute_instruction failed");
-    assert!(output.to_pc.is_none());
-
-    instruction.opcode = BranchEqualOpcode::BNE.global_opcode();
-    let result = <BranchEqualCoreChip<RV32_REGISTER_NUM_LIMBS> as VmCoreChip<
-        F,
-        BasicAdapterInterface<F, ImmInstruction<F>, 2, 0, RV32_REGISTER_NUM_LIMBS, 0>,
-    >>::execute_instruction(&core, &instruction, 0, [x, y]);
-    let (output, _) = result.expect("execute_instruction failed");
-    assert!(output.to_pc.is_some());
-    assert_eq!(output.to_pc.unwrap(), 8);
+fn execute_roundtrip_sanity_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let mut harness = create_test_chip(&mut tester);
+
+    let x = [19, 4, 179, 60];
+    let y = [19, 32, 180, 60];
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        BranchEqualOpcode::BEQ,
+        Some(x),
+        Some(y),
+        Some(8),
+    );
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        BranchEqualOpcode::BNE,
+        Some(x),
+        Some(y),
+        Some(8),
+    );
 }
 
 #[test]
 fn run_eq_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [19, 4, 1790, 60];
-    let (cmp_result, _, diff_val) =
-        run_eq::<F, RV32_REGISTER_NUM_LIMBS>(BranchEqualOpcode::BEQ, &x, &x);
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [19, 4, 17, 60];
+    let (cmp_result, _, diff_val) = run_eq::<F, RV32_REGISTER_NUM_LIMBS>(true, &x, &x);
     assert!(cmp_result);
     assert_eq!(diff_val, F::ZERO);
 
-    let (cmp_result, _, diff_val) =
-        run_eq::<F, RV32_REGISTER_NUM_LIMBS>(BranchEqualOpcode::BNE, &x, &x);
+    let (cmp_result, _, diff_val) = run_eq::<F, RV32_REGISTER_NUM_LIMBS>(false, &x, &x);
     assert!(!cmp_result);
     assert_eq!(diff_val, F::ZERO);
 }
 
 #[test]
 fn run_ne_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [19, 4, 1790, 60];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [19, 32, 1804, 60];
-    let (cmp_result, diff_idx, diff_val) =
-        run_eq::<F, RV32_REGISTER_NUM_LIMBS>(BranchEqualOpcode::BEQ, &x, &y);
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [19, 4, 17, 60];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [19, 32, 18, 60];
+    let (cmp_result, diff_idx, diff_val) = run_eq::<F, RV32_REGISTER_NUM_LIMBS>(true, &x, &y);
     assert!(!cmp_result);
     assert_eq!(
-        diff_val * (F::from_canonical_u32(x[diff_idx]) - F::from_canonical_u32(y[diff_idx])),
+        diff_val * (F::from_canonical_u8(x[diff_idx]) - F::from_canonical_u8(y[diff_idx])),
         F::ONE
     );
 
-    let (cmp_result, diff_idx, diff_val) =
-        run_eq::<F, RV32_REGISTER_NUM_LIMBS>(BranchEqualOpcode::BNE, &x, &y);
+    let (cmp_result, diff_idx, diff_val) = run_eq::<F, RV32_REGISTER_NUM_LIMBS>(false, &x, &y);
     assert!(cmp_result);
     assert_eq!(
-        diff_val * (F::from_canonical_u32(x[diff_idx]) - F::from_canonical_u32(y[diff_idx])),
+        diff_val * (F::from_canonical_u8(x[diff_idx]) - F::from_canonical_u8(y[diff_idx])),
         F::ONE
     );
 }
diff --git a/extensions/rv32im/circuit/src/branch_lt/core.rs b/extensions/rv32im/circuit/src/branch_lt/core.rs
index 3eebb02146..e02ea46888 100644
--- a/extensions/rv32im/circuit/src/branch_lt/core.rs
+++ b/extensions/rv32im/circuit/src/branch_lt/core.rs
@@ -1,15 +1,13 @@
-use std::{
-    array,
-    borrow::{Borrow, BorrowMut},
-};
+use std::borrow::{Borrow, BorrowMut};
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, ImmInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     utils::not,
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
@@ -20,8 +18,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
 use strum::IntoEnumIterator;
 
 #[repr(C)]
@@ -53,7 +49,7 @@ pub struct BranchLessThanCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: us
     pub diff_val: T,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct BranchLessThanCoreAir<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bus: BitwiseOperationLookupBus,
     offset: usize,
@@ -188,183 +184,184 @@ where
 }
 
 #[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct BranchLessThanCoreRecord<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    #[serde(with = "BigArray")]
-    pub a: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub b: [T; NUM_LIMBS],
-    pub cmp_result: T,
-    pub cmp_lt: T,
-    pub imm: T,
-    pub a_msb_f: T,
-    pub b_msb_f: T,
-    pub diff_val: T,
-    pub diff_idx: usize,
-    pub opcode: BranchLessThanOpcode,
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct BranchLessThanCoreRecord<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    pub a: [u8; NUM_LIMBS],
+    pub b: [u8; NUM_LIMBS],
+    pub imm: u32,
+    pub local_opcode: u8,
+}
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct BranchLessThanExecutor<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
 }
 
-pub struct BranchLessThanCoreChip<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub air: BranchLessThanCoreAir<NUM_LIMBS, LIMB_BITS>,
+#[derive(Clone, derive_new::new)]
+pub struct BranchLessThanFiller<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
     pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
+    pub offset: usize,
 }
 
-impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> BranchLessThanCoreChip<NUM_LIMBS, LIMB_BITS> {
-    pub fn new(
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
-        offset: usize,
-    ) -> Self {
-        Self {
-            air: BranchLessThanCoreAir {
-                bus: bitwise_lookup_chip.bus(),
-                offset,
-            },
-            bitwise_lookup_chip,
+impl<F, A, RA, const NUM_LIMBS: usize, const LIMB_BITS: usize> PreflightExecutor<F, RA>
+    for BranchLessThanExecutor<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceExecutor<F, ReadData: Into<[[u8; NUM_LIMBS]; 2]>, WriteData = ()>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut BranchLessThanCoreRecord<NUM_LIMBS, LIMB_BITS>,
+        ),
+    >,
+{
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            BranchLessThanOpcode::from_usize(opcode - self.offset)
+        )
+    }
+
+    fn execute(
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
+        instruction: &Instruction<F>,
+    ) -> Result<(), ExecutionError> {
+        let &Instruction { opcode, c: imm, .. } = instruction;
+
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        let [rs1, rs2] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
+
+        core_record.a = rs1;
+        core_record.b = rs2;
+        core_record.imm = imm.as_canonical_u32();
+        core_record.local_opcode = opcode.local_opcode_idx(self.offset) as u8;
+
+        if run_cmp::<NUM_LIMBS, LIMB_BITS>(core_record.local_opcode, &rs1, &rs2).0 {
+            *state.pc = (F::from_canonical_u32(*state.pc) + imm).as_canonical_u32();
+        } else {
+            *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
         }
+
+        Ok(())
     }
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_LIMBS: usize, const LIMB_BITS: usize>
-    VmCoreChip<F, I> for BranchLessThanCoreChip<NUM_LIMBS, LIMB_BITS>
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for BranchLessThanFiller<A, NUM_LIMBS, LIMB_BITS>
 where
-    I::Reads: Into<[[F; NUM_LIMBS]; 2]>,
-    I::Writes: Default,
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
 {
-    type Record = BranchLessThanCoreRecord<F, NUM_LIMBS, LIMB_BITS>;
-    type Air = BranchLessThanCoreAir<NUM_LIMBS, LIMB_BITS>;
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+
+        let record: &BranchLessThanCoreRecord<NUM_LIMBS, LIMB_BITS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let core_row: &mut BranchLessThanCoreCols<F, NUM_LIMBS, LIMB_BITS> = core_row.borrow_mut();
+
+        let signed = record.local_opcode == BranchLessThanOpcode::BLT as u8
+            || record.local_opcode == BranchLessThanOpcode::BGE as u8;
+        let ge_op = record.local_opcode == BranchLessThanOpcode::BGE as u8
+            || record.local_opcode == BranchLessThanOpcode::BGEU as u8;
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
-        &self,
-        instruction: &Instruction<F>,
-        from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let Instruction { opcode, c: imm, .. } = *instruction;
-        let blt_opcode = BranchLessThanOpcode::from_usize(opcode.local_opcode_idx(self.air.offset));
-
-        let data: [[F; NUM_LIMBS]; 2] = reads.into();
-        let a = data[0].map(|x| x.as_canonical_u32());
-        let b = data[1].map(|y| y.as_canonical_u32());
         let (cmp_result, diff_idx, a_sign, b_sign) =
-            run_cmp::<NUM_LIMBS, LIMB_BITS>(blt_opcode, &a, &b);
+            run_cmp::<NUM_LIMBS, LIMB_BITS>(record.local_opcode, &record.a, &record.b);
 
-        let signed = matches!(
-            blt_opcode,
-            BranchLessThanOpcode::BLT | BranchLessThanOpcode::BGE
-        );
-        let ge_opcode = matches!(
-            blt_opcode,
-            BranchLessThanOpcode::BGE | BranchLessThanOpcode::BGEU
-        );
-        let cmp_lt = cmp_result ^ ge_opcode;
+        let cmp_lt = cmp_result ^ ge_op;
 
         // We range check (a_msb_f + 128) and (b_msb_f + 128) if signed,
         // a_msb_f and b_msb_f if not
         let (a_msb_f, a_msb_range) = if a_sign {
             (
-                -F::from_canonical_u32((1 << LIMB_BITS) - a[NUM_LIMBS - 1]),
-                a[NUM_LIMBS - 1] - (1 << (LIMB_BITS - 1)),
+                -F::from_canonical_u32((1 << LIMB_BITS) - record.a[NUM_LIMBS - 1] as u32),
+                record.a[NUM_LIMBS - 1] as u32 - (1 << (LIMB_BITS - 1)),
             )
         } else {
             (
-                F::from_canonical_u32(a[NUM_LIMBS - 1]),
-                a[NUM_LIMBS - 1] + ((signed as u32) << (LIMB_BITS - 1)),
+                F::from_canonical_u32(record.a[NUM_LIMBS - 1] as u32),
+                record.a[NUM_LIMBS - 1] as u32 + ((signed as u32) << (LIMB_BITS - 1)),
             )
         };
         let (b_msb_f, b_msb_range) = if b_sign {
             (
-                -F::from_canonical_u32((1 << LIMB_BITS) - b[NUM_LIMBS - 1]),
-                b[NUM_LIMBS - 1] - (1 << (LIMB_BITS - 1)),
+                -F::from_canonical_u32((1 << LIMB_BITS) - record.b[NUM_LIMBS - 1] as u32),
+                record.b[NUM_LIMBS - 1] as u32 - (1 << (LIMB_BITS - 1)),
             )
         } else {
             (
-                F::from_canonical_u32(b[NUM_LIMBS - 1]),
-                b[NUM_LIMBS - 1] + ((signed as u32) << (LIMB_BITS - 1)),
+                F::from_canonical_u32(record.b[NUM_LIMBS - 1] as u32),
+                record.b[NUM_LIMBS - 1] as u32 + ((signed as u32) << (LIMB_BITS - 1)),
             )
         };
-        self.bitwise_lookup_chip
-            .request_range(a_msb_range, b_msb_range);
 
-        let diff_val = if diff_idx == NUM_LIMBS {
-            0
+        core_row.diff_val = if diff_idx == NUM_LIMBS {
+            F::ZERO
         } else if diff_idx == (NUM_LIMBS - 1) {
             if cmp_lt {
                 b_msb_f - a_msb_f
             } else {
                 a_msb_f - b_msb_f
             }
-            .as_canonical_u32()
         } else if cmp_lt {
-            b[diff_idx] - a[diff_idx]
+            F::from_canonical_u8(record.b[diff_idx] - record.a[diff_idx])
         } else {
-            a[diff_idx] - b[diff_idx]
-        };
-
-        if diff_idx != NUM_LIMBS {
-            self.bitwise_lookup_chip.request_range(diff_val - 1, 0);
-        }
-
-        let output = AdapterRuntimeContext {
-            to_pc: cmp_result.then_some((F::from_canonical_u32(from_pc) + imm).as_canonical_u32()),
-            writes: Default::default(),
-        };
-        let record = BranchLessThanCoreRecord {
-            opcode: blt_opcode,
-            a: data[0],
-            b: data[1],
-            cmp_result: F::from_bool(cmp_result),
-            cmp_lt: F::from_bool(cmp_lt),
-            imm,
-            a_msb_f,
-            b_msb_f,
-            diff_val: F::from_canonical_u32(diff_val),
-            diff_idx,
+            F::from_canonical_u8(record.a[diff_idx] - record.b[diff_idx])
         };
 
-        Ok((output, record))
-    }
+        self.bitwise_lookup_chip
+            .request_range(a_msb_range, b_msb_range);
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            BranchLessThanOpcode::from_usize(opcode - self.air.offset)
-        )
-    }
+        core_row.diff_marker = [F::ZERO; NUM_LIMBS];
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let row_slice: &mut BranchLessThanCoreCols<_, NUM_LIMBS, LIMB_BITS> =
-            row_slice.borrow_mut();
-        row_slice.a = record.a;
-        row_slice.b = record.b;
-        row_slice.cmp_result = record.cmp_result;
-        row_slice.cmp_lt = record.cmp_lt;
-        row_slice.imm = record.imm;
-        row_slice.a_msb_f = record.a_msb_f;
-        row_slice.b_msb_f = record.b_msb_f;
-        row_slice.diff_marker = array::from_fn(|i| F::from_bool(i == record.diff_idx));
-        row_slice.diff_val = record.diff_val;
-        row_slice.opcode_blt_flag = F::from_bool(record.opcode == BranchLessThanOpcode::BLT);
-        row_slice.opcode_bltu_flag = F::from_bool(record.opcode == BranchLessThanOpcode::BLTU);
-        row_slice.opcode_bge_flag = F::from_bool(record.opcode == BranchLessThanOpcode::BGE);
-        row_slice.opcode_bgeu_flag = F::from_bool(record.opcode == BranchLessThanOpcode::BGEU);
-    }
+        if diff_idx != NUM_LIMBS {
+            self.bitwise_lookup_chip
+                .request_range(core_row.diff_val.as_canonical_u32() - 1, 0);
+            core_row.diff_marker[diff_idx] = F::ONE;
+        }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.cmp_lt = F::from_bool(cmp_lt);
+        core_row.b_msb_f = b_msb_f;
+        core_row.a_msb_f = a_msb_f;
+        core_row.opcode_bgeu_flag =
+            F::from_bool(record.local_opcode == BranchLessThanOpcode::BGEU as u8);
+        core_row.opcode_bge_flag =
+            F::from_bool(record.local_opcode == BranchLessThanOpcode::BGE as u8);
+        core_row.opcode_bltu_flag =
+            F::from_bool(record.local_opcode == BranchLessThanOpcode::BLTU as u8);
+        core_row.opcode_blt_flag =
+            F::from_bool(record.local_opcode == BranchLessThanOpcode::BLT as u8);
+
+        core_row.imm = F::from_canonical_u32(record.imm);
+        core_row.cmp_result = F::from_bool(cmp_result);
+        core_row.b = record.b.map(F::from_canonical_u8);
+        core_row.a = record.a.map(F::from_canonical_u8);
     }
 }
 
 // Returns (cmp_result, diff_idx, x_sign, y_sign)
+#[inline(always)]
 pub(super) fn run_cmp<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    local_opcode: BranchLessThanOpcode,
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
+    local_opcode: u8,
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
 ) -> (bool, usize, bool, bool) {
-    let signed =
-        local_opcode == BranchLessThanOpcode::BLT || local_opcode == BranchLessThanOpcode::BGE;
-    let ge_op =
-        local_opcode == BranchLessThanOpcode::BGE || local_opcode == BranchLessThanOpcode::BGEU;
+    let signed = local_opcode == BranchLessThanOpcode::BLT as u8
+        || local_opcode == BranchLessThanOpcode::BGE as u8;
+    let ge_op = local_opcode == BranchLessThanOpcode::BGE as u8
+        || local_opcode == BranchLessThanOpcode::BGEU as u8;
     let x_sign = (x[NUM_LIMBS - 1] >> (LIMB_BITS - 1) == 1) && signed;
     let y_sign = (y[NUM_LIMBS - 1] >> (LIMB_BITS - 1) == 1) && signed;
     for i in (0..NUM_LIMBS).rev() {
diff --git a/extensions/rv32im/circuit/src/branch_lt/execution.rs b/extensions/rv32im/circuit/src/branch_lt/execution.rs
new file mode 100644
index 0000000000..206a49e4a1
--- /dev/null
+++ b/extensions/rv32im/circuit/src/branch_lt/execution.rs
@@ -0,0 +1,197 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, LocalOpcode,
+};
+use openvm_rv32im_transpiler::BranchLessThanOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::BranchLessThanExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct BranchLePreCompute {
+    imm: isize,
+    a: u8,
+    b: u8,
+}
+
+impl<A, const NUM_LIMBS: usize, const LIMB_BITS: usize>
+    BranchLessThanExecutor<A, NUM_LIMBS, LIMB_BITS>
+{
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut BranchLePreCompute,
+    ) -> Result<BranchLessThanOpcode, StaticProgramError> {
+        let &Instruction {
+            opcode, a, b, c, d, ..
+        } = inst;
+        let local_opcode = BranchLessThanOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        let c = c.as_canonical_u32();
+        let imm = if F::ORDER_U32 - c < c {
+            -((F::ORDER_U32 - c) as isize)
+        } else {
+            c as isize
+        };
+        if d.as_canonical_u32() != RV32_REGISTER_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = BranchLePreCompute {
+            imm,
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+        };
+        Ok(local_opcode)
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> Executor<F>
+    for BranchLessThanExecutor<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<BranchLePreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let data: &mut BranchLePreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = match local_opcode {
+            BranchLessThanOpcode::BLT => execute_e1_impl::<_, _, BltOp>,
+            BranchLessThanOpcode::BLTU => execute_e1_impl::<_, _, BltuOp>,
+            BranchLessThanOpcode::BGE => execute_e1_impl::<_, _, BgeOp>,
+            BranchLessThanOpcode::BGEU => execute_e1_impl::<_, _, BgeuOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> MeteredExecutor<F>
+    for BranchLessThanExecutor<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<BranchLePreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<BranchLePreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = match local_opcode {
+            BranchLessThanOpcode::BLT => execute_e2_impl::<_, _, BltOp>,
+            BranchLessThanOpcode::BLTU => execute_e2_impl::<_, _, BltuOp>,
+            BranchLessThanOpcode::BGE => execute_e2_impl::<_, _, BgeOp>,
+            BranchLessThanOpcode::BGEU => execute_e2_impl::<_, _, BgeuOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: BranchLessThanOp>(
+    pre_compute: &BranchLePreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+    let rs2 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let jmp = <OP as BranchLessThanOp>::compute(rs1, rs2);
+    if jmp {
+        vm_state.pc = (vm_state.pc as isize + pre_compute.imm) as u32;
+    } else {
+        vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    };
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: BranchLessThanOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &BranchLePreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, OP>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, OP: BranchLessThanOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<BranchLePreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, OP>(&pre_compute.data, vm_state);
+}
+
+trait BranchLessThanOp {
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> bool;
+}
+struct BltOp;
+struct BltuOp;
+struct BgeOp;
+struct BgeuOp;
+
+impl BranchLessThanOp for BltOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> bool {
+        let rs1 = i32::from_le_bytes(rs1);
+        let rs2 = i32::from_le_bytes(rs2);
+        rs1 < rs2
+    }
+}
+impl BranchLessThanOp for BltuOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> bool {
+        let rs1 = u32::from_le_bytes(rs1);
+        let rs2 = u32::from_le_bytes(rs2);
+        rs1 < rs2
+    }
+}
+impl BranchLessThanOp for BgeOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> bool {
+        let rs1 = i32::from_le_bytes(rs1);
+        let rs2 = i32::from_le_bytes(rs2);
+        rs1 >= rs2
+    }
+}
+impl BranchLessThanOp for BgeuOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> bool {
+        let rs1 = u32::from_le_bytes(rs1);
+        let rs2 = u32::from_le_bytes(rs2);
+        rs1 >= rs2
+    }
+}
diff --git a/extensions/rv32im/circuit/src/branch_lt/mod.rs b/extensions/rv32im/circuit/src/branch_lt/mod.rs
index b0bf8fc417..4bde4c6086 100644
--- a/extensions/rv32im/circuit/src/branch_lt/mod.rs
+++ b/extensions/rv32im/circuit/src/branch_lt/mod.rs
@@ -1,16 +1,22 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
 use super::adapters::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
-use crate::adapters::Rv32BranchAdapterChip;
+use crate::adapters::{Rv32BranchAdapterAir, Rv32BranchAdapterExecutor, Rv32BranchAdapterFiller};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
+pub type Rv32BranchLessThanAir = VmAirWrapper<
+    Rv32BranchAdapterAir,
+    BranchLessThanCoreAir<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+>;
+pub type Rv32BranchLessThanExecutor =
+    BranchLessThanExecutor<Rv32BranchAdapterExecutor, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>;
 pub type Rv32BranchLessThanChip<F> = VmChipWrapper<
     F,
-    Rv32BranchAdapterChip<F>,
-    BranchLessThanCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+    BranchLessThanFiller<Rv32BranchAdapterFiller, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
 >;
diff --git a/extensions/rv32im/circuit/src/branch_lt/tests.rs b/extensions/rv32im/circuit/src/branch_lt/tests.rs
index 8c1d7f697a..6a64a6fc75 100644
--- a/extensions/rv32im/circuit/src/branch_lt/tests.rs
+++ b/extensions/rv32im/circuit/src/branch_lt/tests.rs
@@ -1,15 +1,14 @@
-use std::borrow::BorrowMut;
+use std::{array, borrow::BorrowMut, sync::Arc};
 
 use openvm_circuit::{
-    arch::{
-        testing::{memory::gen_pointer, TestAdapterChip, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-        BasicAdapterInterface, ExecutionBridge, ImmInstruction, InstructionExecutor, VmAdapterChip,
-        VmChipWrapper, VmCoreChip,
+    arch::testing::{
+        memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS,
     },
-    utils::{generate_long_number, i32_to_f},
+    utils::i32_to_f,
 };
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
 use openvm_instructions::{instruction::Instruction, program::PC_BITS, LocalOpcode};
 use openvm_rv32im_transpiler::BranchLessThanOpcode;
@@ -21,49 +20,92 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::{
-    core::{run_cmp, BranchLessThanCoreChip},
-    Rv32BranchLessThanChip,
-};
+use super::{run_cmp, Rv32BranchLessThanChip};
 use crate::{
     adapters::{
-        Rv32BranchAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS, RV_B_TYPE_IMM_BITS,
+        Rv32BranchAdapterAir, Rv32BranchAdapterExecutor, Rv32BranchAdapterFiller, RV32_CELL_BITS,
+        RV32_REGISTER_NUM_LIMBS, RV_B_TYPE_IMM_BITS,
     },
     branch_lt::BranchLessThanCoreCols,
+    test_utils::get_verification_error,
+    BranchLessThanCoreAir, BranchLessThanFiller, Rv32BranchLessThanAir, Rv32BranchLessThanExecutor,
 };
 
 type F = BabyBear;
+const MAX_INS_CAPACITY: usize = 128;
+const ABS_MAX_IMM: i32 = 1 << (RV_B_TYPE_IMM_BITS - 1);
+type Harness = TestChipHarness<
+    F,
+    Rv32BranchLessThanExecutor,
+    Rv32BranchLessThanAir,
+    Rv32BranchLessThanChip<F>,
+>;
 
-//////////////////////////////////////////////////////////////////////////////////////
-// POSITIVE TESTS
-//
-// Randomly generate computations and execute, ensuring that the generated trace
-// passes all constraints.
-//////////////////////////////////////////////////////////////////////////////////////
+fn create_test_chip(
+    tester: &mut VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let air = Rv32BranchLessThanAir::new(
+        Rv32BranchAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        BranchLessThanCoreAir::new(bitwise_bus, BranchLessThanOpcode::CLASS_OFFSET),
+    );
+    let executor = Rv32BranchLessThanExecutor::new(
+        Rv32BranchAdapterExecutor::new(),
+        BranchLessThanOpcode::CLASS_OFFSET,
+    );
+    let chip = Rv32BranchLessThanChip::new(
+        BranchLessThanFiller::new(
+            Rv32BranchAdapterFiller,
+            bitwise_chip.clone(),
+            BranchLessThanOpcode::CLASS_OFFSET,
+        ),
+        tester.memory_helper(),
+    );
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
 
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_branch_lt_rand_execute<E: InstructionExecutor<F>>(
+fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut E,
-    opcode: BranchLessThanOpcode,
-    a: [u32; RV32_REGISTER_NUM_LIMBS],
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    imm: i32,
+    harness: &mut Harness,
     rng: &mut StdRng,
+    opcode: BranchLessThanOpcode,
+    a: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    b: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    imm: Option<i32>,
 ) {
+    let a = a.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX)));
+    let b = b.unwrap_or(if rng.gen_bool(0.5) {
+        a
+    } else {
+        array::from_fn(|_| rng.gen_range(0..=u8::MAX))
+    });
+
+    let imm = imm.unwrap_or(rng.gen_range((-ABS_MAX_IMM)..ABS_MAX_IMM));
     let rs1 = gen_pointer(rng, 4);
     let rs2 = gen_pointer(rng, 4);
-    tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs1, a.map(F::from_canonical_u32));
-    tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs2, b.map(F::from_canonical_u32));
+    tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs1, a.map(F::from_canonical_u8));
+    tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs2, b.map(F::from_canonical_u8));
 
     tester.execute_with_pc(
-        chip,
+        harness,
         &Instruction::from_isize(
             opcode.global_opcode(),
             rs1 as isize,
@@ -75,7 +117,8 @@ fn run_rv32_branch_lt_rand_execute<E: InstructionExecutor<F>>(
         rng.gen_range(imm.unsigned_abs()..(1 << (PC_BITS - 1))),
     );
 
-    let (cmp_result, _, _, _) = run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &a, &b);
+    let (cmp_result, _, _, _) =
+        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode.local_usize() as u8, &a, &b);
     let from_pc = tester.execution.last_from_pc().as_canonical_u32() as i32;
     let to_pc = tester.execution.last_to_pc().as_canonical_u32() as i32;
     let pc_inc = if cmp_result { imm } else { 4 };
@@ -83,93 +126,69 @@ fn run_rv32_branch_lt_rand_execute<E: InstructionExecutor<F>>(
     assert_eq!(to_pc, from_pc + pc_inc);
 }
 
-fn run_rv32_branch_lt_rand_test(opcode: BranchLessThanOpcode, num_ops: usize) {
-    let mut rng = create_seeded_rng();
-    const ABS_MAX_BRANCH: i32 = 1 << (RV_B_TYPE_IMM_BITS - 1);
-
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+//////////////////////////////////////////////////////////////////////////////////////
+// POSITIVE TESTS
+//
+// Randomly generate computations and execute, ensuring that the generated trace
+// passes all constraints.
+//////////////////////////////////////////////////////////////////////////////////////
 
+#[test_case(BranchLessThanOpcode::BLT, 100)]
+#[test_case(BranchLessThanOpcode::BLTU, 100)]
+#[test_case(BranchLessThanOpcode::BGE, 100)]
+#[test_case(BranchLessThanOpcode::BGEU, 100)]
+fn rand_branch_lt_test(opcode: BranchLessThanOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32BranchLessThanChip::<F>::new(
-        Rv32BranchAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        BranchLessThanCoreChip::new(bitwise_chip.clone(), BranchLessThanOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, bitwise_chip) = create_test_chip(&mut tester);
 
     for _ in 0..num_ops {
-        let a = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        let b = if rng.gen_bool(0.5) {
-            a
-        } else {
-            generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng)
-        };
-        let imm = rng.gen_range((-ABS_MAX_BRANCH)..ABS_MAX_BRANCH);
-        run_rv32_branch_lt_rand_execute(&mut tester, &mut chip, opcode, a, b, imm, &mut rng);
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            opcode,
+            None,
+            None,
+            None,
+        );
     }
 
     // Test special case where b = c
-    run_rv32_branch_lt_rand_execute(
+    set_and_execute(
         &mut tester,
-        &mut chip,
-        opcode,
-        [101, 128, 202, 255],
-        [101, 128, 202, 255],
-        24,
+        &mut harness,
         &mut rng,
+        opcode,
+        Some([101, 128, 202, 255]),
+        Some([101, 128, 202, 255]),
+        Some(24),
     );
-    run_rv32_branch_lt_rand_execute(
+    set_and_execute(
         &mut tester,
-        &mut chip,
-        opcode,
-        [36, 0, 0, 0],
-        [36, 0, 0, 0],
-        24,
+        &mut harness,
         &mut rng,
+        opcode,
+        Some([36, 0, 0, 0]),
+        Some([36, 0, 0, 0]),
+        Some(24),
     );
 
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise_chip)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn rv32_blt_rand_test() {
-    run_rv32_branch_lt_rand_test(BranchLessThanOpcode::BLT, 10);
-}
-
-#[test]
-fn rv32_bltu_rand_test() {
-    run_rv32_branch_lt_rand_test(BranchLessThanOpcode::BLTU, 12);
-}
-
-#[test]
-fn rv32_bge_rand_test() {
-    run_rv32_branch_lt_rand_test(BranchLessThanOpcode::BGE, 12);
-}
-
-#[test]
-fn rv32_bgeu_rand_test() {
-    run_rv32_branch_lt_rand_test(BranchLessThanOpcode::BGEU, 12);
-}
-
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adapter is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-type Rv32BranchLessThanTestChip<F> = VmChipWrapper<
-    F,
-    TestAdapterChip<F>,
-    BranchLessThanCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
->;
-
 #[derive(Clone, Copy, Default, PartialEq)]
 struct BranchLessThanPrankValues<const NUM_LIMBS: usize> {
     pub a_msb: Option<i32>,
@@ -179,66 +198,31 @@ struct BranchLessThanPrankValues<const NUM_LIMBS: usize> {
 }
 
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_blt_negative_test(
+fn run_negative_branch_lt_test(
     opcode: BranchLessThanOpcode,
-    a: [u32; RV32_REGISTER_NUM_LIMBS],
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    cmp_result: bool,
+    a: [u8; RV32_REGISTER_NUM_LIMBS],
+    b: [u8; RV32_REGISTER_NUM_LIMBS],
+    prank_cmp_result: bool,
     prank_vals: BranchLessThanPrankValues<RV32_REGISTER_NUM_LIMBS>,
     interaction_error: bool,
 ) {
-    let imm = 16u32;
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
-    let mut tester: VmChipTestBuilder<BabyBear> = VmChipTestBuilder::default();
-    let mut chip = Rv32BranchLessThanTestChip::<F>::new(
-        TestAdapterChip::new(
-            vec![[a.map(F::from_canonical_u32), b.map(F::from_canonical_u32)].concat()],
-            vec![if cmp_result { Some(imm) } else { None }],
-            ExecutionBridge::new(tester.execution_bus(), tester.program_bus()),
-        ),
-        BranchLessThanCoreChip::new(bitwise_chip.clone(), BranchLessThanOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
+    let imm = 16i32;
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut harness, bitwise) = create_test_chip(&mut tester);
 
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(opcode.global_opcode(), [0, 0, imm as usize, 1, 1]),
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(a),
+        Some(b),
+        Some(imm),
     );
 
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
     let ge_opcode = opcode == BranchLessThanOpcode::BGE || opcode == BranchLessThanOpcode::BGEU;
-    let (_, _, a_sign, b_sign) = run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &a, &b);
-
-    if prank_vals != BranchLessThanPrankValues::default() {
-        debug_assert!(prank_vals.diff_val.is_some());
-        let a_msb = prank_vals.a_msb.unwrap_or(
-            a[RV32_REGISTER_NUM_LIMBS - 1] as i32 - if a_sign { 1 << RV32_CELL_BITS } else { 0 },
-        );
-        let b_msb = prank_vals.b_msb.unwrap_or(
-            b[RV32_REGISTER_NUM_LIMBS - 1] as i32 - if b_sign { 1 << RV32_CELL_BITS } else { 0 },
-        );
-        let signed_offset = match opcode {
-            BranchLessThanOpcode::BLT | BranchLessThanOpcode::BGE => 1 << (RV32_CELL_BITS - 1),
-            _ => 0,
-        };
-
-        bitwise_chip.clear();
-        bitwise_chip.request_range(
-            (a_msb + signed_offset) as u8 as u32,
-            (b_msb + signed_offset) as u8 as u32,
-        );
-
-        let diff_val = prank_vals
-            .diff_val
-            .unwrap()
-            .clamp(0, (1 << RV32_CELL_BITS) - 1);
-        if diff_val > 0 {
-            bitwise_chip.request_range(diff_val - 1, 0);
-        }
-    }
 
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut values = trace.row_slice(0).to_vec();
@@ -257,23 +241,19 @@ fn run_rv32_blt_negative_test(
         if let Some(diff_val) = prank_vals.diff_val {
             cols.diff_val = F::from_canonical_u32(diff_val);
         }
-        cols.cmp_result = F::from_bool(cmp_result);
-        cols.cmp_lt = F::from_bool(ge_opcode ^ cmp_result);
+        cols.cmp_result = F::from_bool(prank_cmp_result);
+        cols.cmp_lt = F::from_bool(ge_opcode ^ prank_cmp_result);
 
-        *trace = RowMajorMatrix::new(values, trace_width);
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
-        .load(bitwise_chip)
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
         .finalize();
-    tester.simple_test_with_expected_error(if interaction_error {
-        VerificationError::ChallengePhaseError
-    } else {
-        VerificationError::OodEvaluationMismatch
-    });
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
@@ -281,10 +261,10 @@ fn rv32_blt_wrong_lt_cmp_negative_test() {
     let a = [145, 34, 25, 205];
     let b = [73, 35, 25, 205];
     let prank_vals = Default::default();
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, false);
 }
 
 #[test]
@@ -292,10 +272,10 @@ fn rv32_blt_wrong_ge_cmp_negative_test() {
     let a = [73, 35, 25, 205];
     let b = [145, 34, 25, 205];
     let prank_vals = Default::default();
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, false, prank_vals, false);
 }
 
 #[test]
@@ -303,10 +283,10 @@ fn rv32_blt_wrong_eq_cmp_negative_test() {
     let a = [73, 35, 25, 205];
     let b = [73, 35, 25, 205];
     let prank_vals = Default::default();
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, false, prank_vals, false);
 }
 
 #[test]
@@ -317,10 +297,10 @@ fn rv32_blt_fake_diff_val_negative_test() {
         diff_val: Some(F::NEG_ONE.as_canonical_u32()),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, true);
 }
 
 #[test]
@@ -332,10 +312,10 @@ fn rv32_blt_zero_diff_val_negative_test() {
         diff_val: Some(0),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, true);
 }
 
 #[test]
@@ -347,10 +327,10 @@ fn rv32_blt_fake_diff_marker_negative_test() {
         diff_val: Some(72),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, false);
 }
 
 #[test]
@@ -362,10 +342,10 @@ fn rv32_blt_zero_diff_marker_negative_test() {
         diff_val: Some(0),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, false);
 }
 
 #[test]
@@ -378,8 +358,8 @@ fn rv32_blt_signed_wrong_a_msb_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, false);
 }
 
 #[test]
@@ -392,8 +372,8 @@ fn rv32_blt_signed_wrong_a_msb_sign_negative_test() {
         diff_val: Some(256),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, false, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, true, prank_vals, true);
 }
 
 #[test]
@@ -406,8 +386,8 @@ fn rv32_blt_signed_wrong_b_msb_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, false, prank_vals, false);
 }
 
 #[test]
@@ -420,8 +400,8 @@ fn rv32_blt_signed_wrong_b_msb_sign_negative_test() {
         diff_val: Some(256),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLT, a, b, true, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGE, a, b, false, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLT, a, b, true, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGE, a, b, false, prank_vals, true);
 }
 
 #[test]
@@ -434,8 +414,8 @@ fn rv32_blt_unsigned_wrong_a_msb_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, true, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, false, prank_vals, false);
 }
 
 #[test]
@@ -448,8 +428,8 @@ fn rv32_blt_unsigned_wrong_a_msb_sign_negative_test() {
         diff_val: Some(256),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, true, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, false, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, true, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, false, prank_vals, true);
 }
 
 #[test]
@@ -462,8 +442,8 @@ fn rv32_blt_unsigned_wrong_b_msb_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, false);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, false);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, false);
 }
 
 #[test]
@@ -476,8 +456,8 @@ fn rv32_blt_unsigned_wrong_b_msb_sign_negative_test() {
         diff_val: Some(256),
         ..Default::default()
     };
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, true);
-    run_rv32_blt_negative_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BLTU, a, b, false, prank_vals, true);
+    run_negative_branch_lt_test(BranchLessThanOpcode::BGEU, a, b, true, prank_vals, true);
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////
@@ -487,51 +467,52 @@ fn rv32_blt_unsigned_wrong_b_msb_sign_negative_test() {
 ///////////////////////////////////////////////////////////////////////////////////////
 
 #[test]
-fn execute_pc_increment_sanity_test() {
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let core = BranchLessThanCoreChip::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>::new(
-        bitwise_chip,
-        BranchLessThanOpcode::CLASS_OFFSET,
+fn execute_roundtrip_sanity_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut chip, _) = create_test_chip(&mut tester);
+
+    let x = [145, 34, 25, 205];
+    set_and_execute(
+        &mut tester,
+        &mut chip,
+        &mut rng,
+        BranchLessThanOpcode::BLT,
+        Some(x),
+        Some(x),
+        Some(8),
     );
 
-    let mut instruction = Instruction::<F> {
-        opcode: BranchLessThanOpcode::BLT.global_opcode(),
-        c: F::from_canonical_u8(8),
-        ..Default::default()
-    };
-    let x: [F; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205].map(F::from_canonical_u32);
-
-    let result = <BranchLessThanCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> as VmCoreChip<
-        F,
-        BasicAdapterInterface<F, ImmInstruction<F>, 2, 0, RV32_REGISTER_NUM_LIMBS, 0>,
-    >>::execute_instruction(&core, &instruction, 0, [x, x]);
-    let (output, _) = result.expect("execute_instruction failed");
-    assert!(output.to_pc.is_none());
-
-    instruction.opcode = BranchLessThanOpcode::BGE.global_opcode();
-    let result = <BranchLessThanCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> as VmCoreChip<
-        F,
-        BasicAdapterInterface<F, ImmInstruction<F>, 2, 0, RV32_REGISTER_NUM_LIMBS, 0>,
-    >>::execute_instruction(&core, &instruction, 0, [x, x]);
-    let (output, _) = result.expect("execute_instruction failed");
-    assert!(output.to_pc.is_some());
-    assert_eq!(output.to_pc.unwrap(), 8);
+    set_and_execute(
+        &mut tester,
+        &mut chip,
+        &mut rng,
+        BranchLessThanOpcode::BGE,
+        Some(x),
+        Some(x),
+        Some(8),
+    );
 }
 
 #[test]
 fn run_cmp_unsigned_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [73, 35, 25, 205];
-    let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BLTU, &x, &y);
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [73, 35, 25, 205];
+    let (cmp_result, diff_idx, x_sign, y_sign) = run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
+        BranchLessThanOpcode::BLTU as u8,
+        &x,
+        &y,
+    );
     assert!(cmp_result);
     assert_eq!(diff_idx, 1);
     assert!(!x_sign); // unsigned
     assert!(!y_sign); // unsigned
 
-    let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BGEU, &x, &y);
+    let (cmp_result, diff_idx, x_sign, y_sign) = run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
+        BranchLessThanOpcode::BGEU as u8,
+        &x,
+        &y,
+    );
     assert!(!cmp_result);
     assert_eq!(diff_idx, 1);
     assert!(!x_sign); // unsigned
@@ -540,17 +521,17 @@ fn run_cmp_unsigned_sanity_test() {
 
 #[test]
 fn run_cmp_same_sign_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [73, 35, 25, 205];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [73, 35, 25, 205];
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BLT, &x, &y);
+        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BLT as u8, &x, &y);
     assert!(cmp_result);
     assert_eq!(diff_idx, 1);
     assert!(x_sign); // negative
     assert!(y_sign); // negative
 
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BGE, &x, &y);
+        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BGE as u8, &x, &y);
     assert!(!cmp_result);
     assert_eq!(diff_idx, 1);
     assert!(x_sign); // negative
@@ -559,17 +540,17 @@ fn run_cmp_same_sign_sanity_test() {
 
 #[test]
 fn run_cmp_diff_sign_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [45, 35, 25, 55];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [173, 34, 25, 205];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [45, 35, 25, 55];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [173, 34, 25, 205];
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BLT, &x, &y);
+        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BLT as u8, &x, &y);
     assert!(!cmp_result);
     assert_eq!(diff_idx, 3);
     assert!(!x_sign); // positive
     assert!(y_sign); // negative
 
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BGE, &x, &y);
+        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BGE as u8, &x, &y);
     assert!(cmp_result);
     assert_eq!(diff_idx, 3);
     assert!(!x_sign); // positive
@@ -578,27 +559,33 @@ fn run_cmp_diff_sign_sanity_test() {
 
 #[test]
 fn run_cmp_eq_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [45, 35, 25, 55];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [45, 35, 25, 55];
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BLT, &x, &x);
+        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BLT as u8, &x, &x);
     assert!(!cmp_result);
     assert_eq!(diff_idx, RV32_REGISTER_NUM_LIMBS);
     assert_eq!(x_sign, y_sign);
 
-    let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BLTU, &x, &x);
+    let (cmp_result, diff_idx, x_sign, y_sign) = run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
+        BranchLessThanOpcode::BLTU as u8,
+        &x,
+        &x,
+    );
     assert!(!cmp_result);
     assert_eq!(diff_idx, RV32_REGISTER_NUM_LIMBS);
     assert_eq!(x_sign, y_sign);
 
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BGE, &x, &x);
+        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BGE as u8, &x, &x);
     assert!(cmp_result);
     assert_eq!(diff_idx, RV32_REGISTER_NUM_LIMBS);
     assert_eq!(x_sign, y_sign);
 
-    let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(BranchLessThanOpcode::BGEU, &x, &x);
+    let (cmp_result, diff_idx, x_sign, y_sign) = run_cmp::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
+        BranchLessThanOpcode::BGEU as u8,
+        &x,
+        &x,
+    );
     assert!(cmp_result);
     assert_eq!(diff_idx, RV32_REGISTER_NUM_LIMBS);
     assert_eq!(x_sign, y_sign);
diff --git a/extensions/rv32im/circuit/src/divrem/core.rs b/extensions/rv32im/circuit/src/divrem/core.rs
index b21c32345e..b024e1350f 100644
--- a/extensions/rv32im/circuit/src/divrem/core.rs
+++ b/extensions/rv32im/circuit/src/divrem/core.rs
@@ -5,17 +5,18 @@ use std::{
 
 use num_bigint::BigUint;
 use num_integer::Integer;
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip},
     utils::{not, select},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_rv32im_transpiler::DivRemOpcode;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -23,8 +24,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use serde_big_array::BigArray;
 use strum::IntoEnumIterator;
 
 #[repr(C)]
@@ -67,7 +66,7 @@ pub struct DivRemCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub opcode_remu_flag: T,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct DivRemCoreAir<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bitwise_lookup_bus: BitwiseOperationLookupBus,
     pub range_tuple_bus: RangeTupleCheckerBus<2>,
@@ -342,14 +341,38 @@ where
     }
 }
 
-pub struct DivRemCoreChip<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub air: DivRemCoreAir<NUM_LIMBS, LIMB_BITS>,
+#[derive(Debug, Eq, PartialEq)]
+#[repr(u8)]
+pub(super) enum DivRemCoreSpecialCase {
+    None,
+    ZeroDivisor,
+    SignedOverflow,
+}
+
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct DivRemCoreRecord<const NUM_LIMBS: usize> {
+    pub b: [u8; NUM_LIMBS],
+    pub c: [u8; NUM_LIMBS],
+    pub local_opcode: u8,
+}
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct DivRemExecutor<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
+}
+
+pub struct DivRemFiller<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
     pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
     pub range_tuple_chip: SharedRangeTupleCheckerChip<2>,
 }
 
-impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> DivRemCoreChip<NUM_LIMBS, LIMB_BITS> {
+impl<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> DivRemFiller<A, NUM_LIMBS, LIMB_BITS> {
     pub fn new(
+        adapter: A,
         bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
         range_tuple_chip: SharedRangeTupleCheckerChip<2>,
         offset: usize,
@@ -369,83 +392,105 @@ impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> DivRemCoreChip<NUM_LIMBS, L
         );
 
         Self {
-            air: DivRemCoreAir {
-                bitwise_lookup_bus: bitwise_lookup_chip.bus(),
-                range_tuple_bus: *range_tuple_chip.bus(),
-                offset,
-            },
+            adapter,
+            offset,
             bitwise_lookup_chip,
             range_tuple_chip,
         }
     }
 }
 
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "T: Serialize + DeserializeOwned")]
-pub struct DivRemCoreRecord<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    #[serde(with = "BigArray")]
-    pub b: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub c: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub q: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub r: [T; NUM_LIMBS],
-    pub zero_divisor: T,
-    pub r_zero: T,
-    pub b_sign: T,
-    pub c_sign: T,
-    pub q_sign: T,
-    pub sign_xor: T,
-    pub c_sum_inv: T,
-    pub r_sum_inv: T,
-    #[serde(with = "BigArray")]
-    pub r_prime: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub r_inv: [T; NUM_LIMBS],
-    pub lt_diff_val: T,
-    pub lt_diff_idx: usize,
-    pub opcode: DivRemOpcode,
-}
-
-#[derive(Debug, Eq, PartialEq)]
-#[repr(u8)]
-pub(super) enum DivRemCoreSpecialCase {
-    None,
-    ZeroDivisor,
-    SignedOverflow,
-}
-
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_LIMBS: usize, const LIMB_BITS: usize>
-    VmCoreChip<F, I> for DivRemCoreChip<NUM_LIMBS, LIMB_BITS>
+impl<F, A, RA, const NUM_LIMBS: usize, const LIMB_BITS: usize> PreflightExecutor<F, RA>
+    for DivRemExecutor<A, NUM_LIMBS, LIMB_BITS>
 where
-    I::Reads: Into<[[F; NUM_LIMBS]; 2]>,
-    I::Writes: From<[[F; NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData: Into<[[u8; NUM_LIMBS]; 2]>,
+            WriteData: From<[[u8; NUM_LIMBS]; 1]>,
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut DivRemCoreRecord<NUM_LIMBS>),
+    >,
 {
-    type Record = DivRemCoreRecord<F, NUM_LIMBS, LIMB_BITS>;
-    type Air = DivRemCoreAir<NUM_LIMBS, LIMB_BITS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!("{:?}", DivRemOpcode::from_usize(opcode - self.offset))
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
+    ) -> Result<(), ExecutionError> {
         let Instruction { opcode, .. } = instruction;
-        let divrem_opcode = DivRemOpcode::from_usize(opcode.local_opcode_idx(self.air.offset));
 
-        let is_div = divrem_opcode == DivRemOpcode::DIV || divrem_opcode == DivRemOpcode::DIVU;
-        let is_signed = divrem_opcode == DivRemOpcode::DIV || divrem_opcode == DivRemOpcode::REM;
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        let data: [[F; NUM_LIMBS]; 2] = reads.into();
-        let b = data[0].map(|x| x.as_canonical_u32());
-        let c = data[1].map(|y| y.as_canonical_u32());
-        let (q, r, b_sign, c_sign, q_sign, case) =
-            run_divrem::<NUM_LIMBS, LIMB_BITS>(is_signed, &b, &c);
+        core_record.local_opcode = opcode.local_opcode_idx(self.offset) as u8;
 
-        let carries = run_mul_carries::<NUM_LIMBS, LIMB_BITS>(is_signed, &c, &q, &r, q_sign);
+        let is_signed = core_record.local_opcode == DivRemOpcode::DIV as u8
+            || core_record.local_opcode == DivRemOpcode::REM as u8;
+        let is_div = core_record.local_opcode == DivRemOpcode::DIV as u8
+            || core_record.local_opcode == DivRemOpcode::DIVU as u8;
+
+        [core_record.b, core_record.c] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
+
+        let b = core_record.b.map(u32::from);
+        let c = core_record.c.map(u32::from);
+        let (q, r, _, _, _, _) = run_divrem::<NUM_LIMBS, LIMB_BITS>(is_signed, &b, &c);
+
+        let rd = if is_div {
+            q.map(|x| x as u8)
+        } else {
+            r.map(|x| x as u8)
+        };
+
+        self.adapter
+            .write(state.memory, instruction, [rd].into(), &mut adapter_record);
+
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for DivRemFiller<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &DivRemCoreRecord<NUM_LIMBS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut DivRemCoreCols<F, NUM_LIMBS, LIMB_BITS> = core_row.borrow_mut();
+
+        let opcode = DivRemOpcode::from_usize(record.local_opcode as usize);
+        let is_signed = opcode == DivRemOpcode::DIV || opcode == DivRemOpcode::REM;
+
+        let (q, r, b_sign, c_sign, q_sign, case) = run_divrem::<NUM_LIMBS, LIMB_BITS>(
+            is_signed,
+            &record.b.map(u32::from),
+            &record.c.map(u32::from),
+        );
+
+        let carries = run_mul_carries::<NUM_LIMBS, LIMB_BITS>(
+            is_signed,
+            &record.c.map(u32::from),
+            &q,
+            &r,
+            q_sign,
+        );
         for i in 0..NUM_LIMBS {
             self.range_tuple_chip.add_count(&[q[i], carries[i]]);
             self.range_tuple_chip
@@ -464,94 +509,61 @@ where
             let b_sign_mask = if b_sign { 1 << (LIMB_BITS - 1) } else { 0 };
             let c_sign_mask = if c_sign { 1 << (LIMB_BITS - 1) } else { 0 };
             self.bitwise_lookup_chip.request_range(
-                (b[NUM_LIMBS - 1] - b_sign_mask) << 1,
-                (c[NUM_LIMBS - 1] - c_sign_mask) << 1,
+                (record.b[NUM_LIMBS - 1] as u32 - b_sign_mask) << 1,
+                (record.c[NUM_LIMBS - 1] as u32 - c_sign_mask) << 1,
             );
         }
 
-        let c_sum_f = data[1].iter().fold(F::ZERO, |acc, c| acc + *c);
-        let c_sum_inv_f = c_sum_f.try_inverse().unwrap_or(F::ZERO);
+        // Write in a reverse order
+        core_row.opcode_remu_flag = F::from_bool(opcode == DivRemOpcode::REMU);
+        core_row.opcode_rem_flag = F::from_bool(opcode == DivRemOpcode::REM);
+        core_row.opcode_divu_flag = F::from_bool(opcode == DivRemOpcode::DIVU);
+        core_row.opcode_div_flag = F::from_bool(opcode == DivRemOpcode::DIV);
 
-        let r_sum_f = r
-            .iter()
-            .fold(F::ZERO, |acc, r| acc + F::from_canonical_u32(*r));
-        let r_sum_inv_f = r_sum_f.try_inverse().unwrap_or(F::ZERO);
-
-        let (lt_diff_idx, lt_diff_val) = if case == DivRemCoreSpecialCase::None && !r_zero {
-            let idx = run_sltu_diff_idx(&c, &r_prime, c_sign);
+        core_row.lt_diff = F::ZERO;
+        core_row.lt_marker = [F::ZERO; NUM_LIMBS];
+        if case == DivRemCoreSpecialCase::None && !r_zero {
+            let idx = run_sltu_diff_idx(&record.c.map(u32::from), &r_prime, c_sign);
             let val = if c_sign {
-                r_prime[idx] - c[idx]
+                r_prime[idx] - record.c[idx] as u32
             } else {
-                c[idx] - r_prime[idx]
+                record.c[idx] as u32 - r_prime[idx]
             };
             self.bitwise_lookup_chip.request_range(val - 1, 0);
-            (idx, val)
-        } else {
-            (NUM_LIMBS, 0)
-        };
+            core_row.lt_diff = F::from_canonical_u32(val);
+            core_row.lt_marker[idx] = F::ONE;
+        }
 
         let r_prime_f = r_prime.map(F::from_canonical_u32);
-        let output = AdapterRuntimeContext::without_pc([
-            (if is_div { &q } else { &r }).map(F::from_canonical_u32)
-        ]);
-        let record = DivRemCoreRecord {
-            opcode: divrem_opcode,
-            b: data[0],
-            c: data[1],
-            q: q.map(F::from_canonical_u32),
-            r: r.map(F::from_canonical_u32),
-            zero_divisor: F::from_bool(case == DivRemCoreSpecialCase::ZeroDivisor),
-            r_zero: F::from_bool(r_zero),
-            b_sign: F::from_bool(b_sign),
-            c_sign: F::from_bool(c_sign),
-            q_sign: F::from_bool(q_sign),
-            sign_xor: F::from_bool(sign_xor),
-            c_sum_inv: c_sum_inv_f,
-            r_sum_inv: r_sum_inv_f,
-            r_prime: r_prime_f,
-            r_inv: r_prime_f.map(|r| (r - F::from_canonical_u32(256)).inverse()),
-            lt_diff_val: F::from_canonical_u32(lt_diff_val),
-            lt_diff_idx,
-        };
+        core_row.r_inv = r_prime_f.map(|r| (r - F::from_canonical_u32(256)).inverse());
+        core_row.r_prime = r_prime_f;
 
-        Ok((output, record))
-    }
+        let r_sum_f = r
+            .iter()
+            .fold(F::ZERO, |acc, r| acc + F::from_canonical_u32(*r));
+        core_row.r_sum_inv = r_sum_f.try_inverse().unwrap_or(F::ZERO);
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!("{:?}", DivRemOpcode::from_usize(opcode - self.air.offset))
-    }
+        let c_sum_f = F::from_canonical_u32(record.c.iter().fold(0, |acc, c| acc + *c as u32));
+        core_row.c_sum_inv = c_sum_f.try_inverse().unwrap_or(F::ZERO);
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let row_slice: &mut DivRemCoreCols<_, NUM_LIMBS, LIMB_BITS> = row_slice.borrow_mut();
-        row_slice.b = record.b;
-        row_slice.c = record.c;
-        row_slice.q = record.q;
-        row_slice.r = record.r;
-        row_slice.zero_divisor = record.zero_divisor;
-        row_slice.r_zero = record.r_zero;
-        row_slice.b_sign = record.b_sign;
-        row_slice.c_sign = record.c_sign;
-        row_slice.q_sign = record.q_sign;
-        row_slice.sign_xor = record.sign_xor;
-        row_slice.c_sum_inv = record.c_sum_inv;
-        row_slice.r_sum_inv = record.r_sum_inv;
-        row_slice.r_prime = record.r_prime;
-        row_slice.r_inv = record.r_inv;
-        row_slice.lt_marker = array::from_fn(|i| F::from_bool(i == record.lt_diff_idx));
-        row_slice.lt_diff = record.lt_diff_val;
-        row_slice.opcode_div_flag = F::from_bool(record.opcode == DivRemOpcode::DIV);
-        row_slice.opcode_divu_flag = F::from_bool(record.opcode == DivRemOpcode::DIVU);
-        row_slice.opcode_rem_flag = F::from_bool(record.opcode == DivRemOpcode::REM);
-        row_slice.opcode_remu_flag = F::from_bool(record.opcode == DivRemOpcode::REMU);
-    }
+        core_row.sign_xor = F::from_bool(sign_xor);
+        core_row.q_sign = F::from_bool(q_sign);
+        core_row.c_sign = F::from_bool(c_sign);
+        core_row.b_sign = F::from_bool(b_sign);
+
+        core_row.r_zero = F::from_bool(r_zero);
+        core_row.zero_divisor = F::from_bool(case == DivRemCoreSpecialCase::ZeroDivisor);
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.r = r.map(F::from_canonical_u32);
+        core_row.q = q.map(F::from_canonical_u32);
+        core_row.c = record.c.map(F::from_canonical_u8);
+        core_row.b = record.b.map(F::from_canonical_u8);
     }
 }
 
 // Returns (quotient, remainder, x_sign, y_sign, q_sign, case) where case = 0 for normal, 1
 // for zero divisor, and 2 for signed overflow
+#[inline(always)]
 pub(super) fn run_divrem<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     signed: bool,
     x: &[u32; NUM_LIMBS],
@@ -628,6 +640,7 @@ pub(super) fn run_divrem<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     (q, r, x_sign, y_sign, q_sign, DivRemCoreSpecialCase::None)
 }
 
+#[inline(always)]
 pub(super) fn run_sltu_diff_idx<const NUM_LIMBS: usize>(
     x: &[u32; NUM_LIMBS],
     y: &[u32; NUM_LIMBS],
@@ -644,6 +657,7 @@ pub(super) fn run_sltu_diff_idx<const NUM_LIMBS: usize>(
 }
 
 // returns carries of d * q + r
+#[inline(always)]
 pub(super) fn run_mul_carries<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     signed: bool,
     d: &[u32; NUM_LIMBS],
@@ -684,6 +698,7 @@ pub(super) fn run_mul_carries<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     carry
 }
 
+#[inline(always)]
 fn limbs_to_biguint<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     x: &[u32; NUM_LIMBS],
 ) -> BigUint {
@@ -696,6 +711,7 @@ fn limbs_to_biguint<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     res
 }
 
+#[inline(always)]
 fn biguint_to_limbs<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     x: &BigUint,
 ) -> [u32; NUM_LIMBS] {
@@ -711,6 +727,7 @@ fn biguint_to_limbs<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     res
 }
 
+#[inline(always)]
 fn negate<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     x: &[u32; NUM_LIMBS],
 ) -> [u32; NUM_LIMBS] {
diff --git a/extensions/rv32im/circuit/src/divrem/execution.rs b/extensions/rv32im/circuit/src/divrem/execution.rs
new file mode 100644
index 0000000000..dd87de540b
--- /dev/null
+++ b/extensions/rv32im/circuit/src/divrem/execution.rs
@@ -0,0 +1,208 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32im_transpiler::DivRemOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::DivRemExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct DivRemPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl<A, const LIMB_BITS: usize> DivRemExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut DivRemPreCompute,
+    ) -> Result<DivRemOpcode, StaticProgramError> {
+        let &Instruction {
+            opcode, a, b, c, d, ..
+        } = inst;
+        let local_opcode = DivRemOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        if d.as_canonical_u32() != RV32_REGISTER_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        let pre_compute: &mut DivRemPreCompute = data.borrow_mut();
+        *pre_compute = DivRemPreCompute {
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            c: c.as_canonical_u32() as u8,
+        };
+        Ok(local_opcode)
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> Executor<F>
+    for DivRemExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<DivRemPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let data: &mut DivRemPreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = match local_opcode {
+            DivRemOpcode::DIV => execute_e1_impl::<_, _, DivOp>,
+            DivRemOpcode::DIVU => execute_e1_impl::<_, _, DivuOp>,
+            DivRemOpcode::REM => execute_e1_impl::<_, _, RemOp>,
+            DivRemOpcode::REMU => execute_e1_impl::<_, _, RemuOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> MeteredExecutor<F>
+    for DivRemExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<DivRemPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<DivRemPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = match local_opcode {
+            DivRemOpcode::DIV => execute_e2_impl::<_, _, DivOp>,
+            DivRemOpcode::DIVU => execute_e2_impl::<_, _, DivuOp>,
+            DivRemOpcode::REM => execute_e2_impl::<_, _, RemOp>,
+            DivRemOpcode::REMU => execute_e2_impl::<_, _, RemuOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: DivRemOp>(
+    pre_compute: &DivRemPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.c as u32);
+    let result = <OP as DivRemOp>::compute(rs1, rs2);
+    vm_state.vm_write::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32, &result);
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: DivRemOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &DivRemPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, OP>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, OP: DivRemOp>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<DivRemPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, OP>(&pre_compute.data, vm_state);
+}
+
+trait DivRemOp {
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4];
+}
+struct DivOp;
+struct DivuOp;
+struct RemOp;
+struct RemuOp;
+
+impl DivRemOp for DivOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4] {
+        let rs1_i32 = i32::from_le_bytes(rs1);
+        let rs2_i32 = i32::from_le_bytes(rs2);
+        match (rs1_i32, rs2_i32) {
+            (_, 0) => [u8::MAX; 4],
+            (i32::MIN, -1) => rs1,
+            _ => (rs1_i32 / rs2_i32).to_le_bytes(),
+        }
+    }
+}
+
+impl DivRemOp for DivuOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4] {
+        if rs2 == [0; 4] {
+            [u8::MAX; 4]
+        } else {
+            let rs1 = u32::from_le_bytes(rs1);
+            let rs2 = u32::from_le_bytes(rs2);
+            (rs1 / rs2).to_le_bytes()
+        }
+    }
+}
+
+impl DivRemOp for RemOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4] {
+        let rs1_i32 = i32::from_le_bytes(rs1);
+        let rs2_i32 = i32::from_le_bytes(rs2);
+        match (rs1_i32, rs2_i32) {
+            (_, 0) => rs1,
+            (i32::MIN, -1) => [0; 4],
+            _ => (rs1_i32 % rs2_i32).to_le_bytes(),
+        }
+    }
+}
+
+impl DivRemOp for RemuOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4] {
+        if rs2 == [0; 4] {
+            rs1
+        } else {
+            let rs1 = u32::from_le_bytes(rs1);
+            let rs2 = u32::from_le_bytes(rs2);
+            (rs1 % rs2).to_le_bytes()
+        }
+    }
+}
diff --git a/extensions/rv32im/circuit/src/divrem/mod.rs b/extensions/rv32im/circuit/src/divrem/mod.rs
index 979ab38dc3..507cdeaf60 100644
--- a/extensions/rv32im/circuit/src/divrem/mod.rs
+++ b/extensions/rv32im/circuit/src/divrem/mod.rs
@@ -1,15 +1,18 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::{Rv32MultAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use super::adapters::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use crate::adapters::{Rv32MultAdapterAir, Rv32MultAdapterExecutor, Rv32MultAdapterFiller};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
-pub type Rv32DivRemChip<F> = VmChipWrapper<
-    F,
-    Rv32MultAdapterChip<F>,
-    DivRemCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
->;
+pub type Rv32DivRemAir =
+    VmAirWrapper<Rv32MultAdapterAir, DivRemCoreAir<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
+pub type Rv32DivRemExecutor =
+    DivRemExecutor<Rv32MultAdapterExecutor, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>;
+pub type Rv32DivRemChip<F> =
+    VmChipWrapper<F, DivRemFiller<Rv32MultAdapterFiller, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
diff --git a/extensions/rv32im/circuit/src/divrem/tests.rs b/extensions/rv32im/circuit/src/divrem/tests.rs
index 41d8a9cc46..806f6e9483 100644
--- a/extensions/rv32im/circuit/src/divrem/tests.rs
+++ b/extensions/rv32im/circuit/src/divrem/tests.rs
@@ -1,21 +1,24 @@
-use std::{array, borrow::BorrowMut};
+use std::{array, borrow::BorrowMut, sync::Arc};
 
 use openvm_circuit::{
-    arch::{
-        testing::{
-            memory::gen_pointer, TestAdapterChip, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS,
-            RANGE_TUPLE_CHECKER_BUS,
-        },
-        ExecutionBridge, InstructionExecutor, VmAdapterChip, VmChipWrapper,
+    arch::testing::{
+        memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS,
+        RANGE_TUPLE_CHECKER_BUS,
     },
     utils::generate_long_number,
 };
 use openvm_circuit_primitives::{
-    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
-    range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip},
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
+    range_tuple::{
+        RangeTupleCheckerAir, RangeTupleCheckerBus, RangeTupleCheckerChip,
+        SharedRangeTupleCheckerChip,
+    },
 };
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_rv32im_transpiler::DivRemOpcode;
+use openvm_rv32im_transpiler::DivRemOpcode::{self, *};
 use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{Field, FieldAlgebra},
@@ -24,29 +27,29 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
 use super::core::run_divrem;
 use crate::{
-    adapters::{Rv32MultAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
+    adapters::{
+        Rv32MultAdapterAir, Rv32MultAdapterExecutor, Rv32MultAdapterFiller, RV32_CELL_BITS,
+        RV32_REGISTER_NUM_LIMBS,
+    },
     divrem::{
-        run_mul_carries, run_sltu_diff_idx, DivRemCoreChip, DivRemCoreCols, DivRemCoreSpecialCase,
-        Rv32DivRemChip,
+        run_mul_carries, run_sltu_diff_idx, DivRemCoreCols, DivRemCoreSpecialCase, Rv32DivRemChip,
     },
+    test_utils::get_verification_error,
+    DivRemCoreAir, DivRemFiller, Rv32DivRemAir, Rv32DivRemExecutor,
 };
 
 type F = BabyBear;
-
-//////////////////////////////////////////////////////////////////////////////////////
-// POSITIVE TESTS
-//
-// Randomly generate computations and execute, ensuring that the generated trace
-// passes all constraints.
-//////////////////////////////////////////////////////////////////////////////////////
+const MAX_INS_CAPACITY: usize = 128;
+// the max number of limbs we currently support MUL for is 32 (i.e. for U256s)
+const MAX_NUM_LIMBS: u32 = 32;
+type Harness = TestChipHarness<F, Rv32DivRemExecutor, Rv32DivRemAir, Rv32DivRemChip<F>>;
 
 fn limb_sra<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     x: [u32; NUM_LIMBS],
@@ -57,15 +60,70 @@ fn limb_sra<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     array::from_fn(|i| if i + shift < NUM_LIMBS { x[i] } else { ext })
 }
 
+fn create_test_chip(
+    tester: &mut VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+    (RangeTupleCheckerAir<2>, SharedRangeTupleCheckerChip<2>),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let range_tuple_bus = RangeTupleCheckerBus::new(
+        RANGE_TUPLE_CHECKER_BUS,
+        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
+    );
+
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+    let range_tuple_chip =
+        SharedRangeTupleCheckerChip::new(RangeTupleCheckerChip::<2>::new(range_tuple_bus));
+
+    let air = Rv32DivRemAir::new(
+        Rv32MultAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        DivRemCoreAir::new(bitwise_bus, range_tuple_bus, DivRemOpcode::CLASS_OFFSET),
+    );
+    let executor = Rv32DivRemExecutor::new(Rv32MultAdapterExecutor, DivRemOpcode::CLASS_OFFSET);
+    let chip = Rv32DivRemChip::<F>::new(
+        DivRemFiller::new(
+            Rv32MultAdapterFiller,
+            bitwise_chip.clone(),
+            range_tuple_chip.clone(),
+            DivRemOpcode::CLASS_OFFSET,
+        ),
+        tester.memory_helper(),
+    );
+
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (
+        harness,
+        (bitwise_chip.air, bitwise_chip),
+        (range_tuple_chip.air, range_tuple_chip),
+    )
+}
+
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_divrem_rand_write_execute<E: InstructionExecutor<F>>(
-    opcode: DivRemOpcode,
+fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut E,
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    c: [u32; RV32_REGISTER_NUM_LIMBS],
+    harness: &mut Harness,
     rng: &mut StdRng,
+    opcode: DivRemOpcode,
+    b: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    c: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
 ) {
+    let b = b.unwrap_or(generate_long_number::<
+        RV32_REGISTER_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >(rng));
+    let c = c.unwrap_or(limb_sra::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
+        generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(rng),
+        rng.gen_range(0..(RV32_REGISTER_NUM_LIMBS - 1)),
+    ));
+
     let rs1 = gen_pointer(rng, 4);
     let rs2 = gen_pointer(rng, 4);
     let rd = gen_pointer(rng, 4);
@@ -73,13 +131,13 @@ fn run_rv32_divrem_rand_write_execute<E: InstructionExecutor<F>>(
     tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs1, b.map(F::from_canonical_u32));
     tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs2, c.map(F::from_canonical_u32));
 
-    let is_div = opcode == DivRemOpcode::DIV || opcode == DivRemOpcode::DIVU;
-    let is_signed = opcode == DivRemOpcode::DIV || opcode == DivRemOpcode::REM;
+    let is_div = opcode == DIV || opcode == DIVU;
+    let is_signed = opcode == DIV || opcode == REM;
 
     let (q, r, _, _, _, _) =
         run_divrem::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(is_signed, &b, &c);
     tester.execute(
-        chip,
+        harness,
         &Instruction::from_usize(opcode.global_opcode(), [rd, rs1, rs2, 1, 0]),
     );
 
@@ -89,136 +147,101 @@ fn run_rv32_divrem_rand_write_execute<E: InstructionExecutor<F>>(
     );
 }
 
-fn run_rv32_divrem_rand_test(opcode: DivRemOpcode, num_ops: usize) {
-    // the max number of limbs we currently support MUL for is 32 (i.e. for U256s)
-    const MAX_NUM_LIMBS: u32 = 32;
-    let mut rng = create_seeded_rng();
-
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let range_tuple_bus = RangeTupleCheckerBus::new(
-        RANGE_TUPLE_CHECKER_BUS,
-        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
-    );
-
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let range_tuple_checker = SharedRangeTupleCheckerChip::new(range_tuple_bus);
+//////////////////////////////////////////////////////////////////////////////////////
+// POSITIVE TESTS
+//
+// Randomly generate computations and execute, ensuring that the generated trace
+// passes all constraints.
+//////////////////////////////////////////////////////////////////////////////////////
 
+#[test_case(DIV, 100)]
+#[test_case(DIVU, 100)]
+#[test_case(REM, 100)]
+#[test_case(REMU, 100)]
+fn rand_divrem_test(opcode: DivRemOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32DivRemChip::<F>::new(
-        Rv32MultAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        DivRemCoreChip::new(
-            bitwise_chip.clone(),
-            range_tuple_checker.clone(),
-            DivRemOpcode::CLASS_OFFSET,
-        ),
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, bitwise, range_tuple) = create_test_chip(&mut tester);
 
     for _ in 0..num_ops {
-        let b = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        let leading_zeros = rng.gen_range(0..(RV32_REGISTER_NUM_LIMBS - 1));
-        let c = limb_sra::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-            generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng),
-            leading_zeros,
-        );
-        run_rv32_divrem_rand_write_execute(opcode, &mut tester, &mut chip, b, c, &mut rng);
+        set_and_execute(&mut tester, &mut harness, &mut rng, opcode, None, None);
     }
 
     // Test special cases in addition to random cases (i.e. zero divisor with b > 0,
     // zero divisor with b < 0, r = 0 (3 cases), and signed overflow).
-    run_rv32_divrem_rand_write_execute(
-        opcode,
+    set_and_execute(
         &mut tester,
-        &mut chip,
-        [98, 188, 163, 127],
-        [0, 0, 0, 0],
+        &mut harness,
         &mut rng,
-    );
-    run_rv32_divrem_rand_write_execute(
         opcode,
+        Some([98, 188, 163, 127]),
+        Some([0, 0, 0, 0]),
+    );
+    set_and_execute(
         &mut tester,
-        &mut chip,
-        [98, 188, 163, 229],
-        [0, 0, 0, 0],
+        &mut harness,
         &mut rng,
-    );
-    run_rv32_divrem_rand_write_execute(
         opcode,
+        Some([98, 188, 163, 229]),
+        Some([0, 0, 0, 0]),
+    );
+    set_and_execute(
         &mut tester,
-        &mut chip,
-        [0, 0, 0, 128],
-        [0, 1, 0, 0],
+        &mut harness,
         &mut rng,
-    );
-    run_rv32_divrem_rand_write_execute(
         opcode,
+        Some([0, 0, 0, 128]),
+        Some([0, 1, 0, 0]),
+    );
+    set_and_execute(
         &mut tester,
-        &mut chip,
-        [0, 0, 0, 127],
-        [0, 1, 0, 0],
+        &mut harness,
         &mut rng,
-    );
-    run_rv32_divrem_rand_write_execute(
         opcode,
+        Some([0, 0, 0, 127]),
+        Some([0, 1, 0, 0]),
+    );
+    set_and_execute(
         &mut tester,
-        &mut chip,
-        [0, 0, 0, 0],
-        [0, 0, 0, 0],
+        &mut harness,
         &mut rng,
+        opcode,
+        Some([0, 0, 0, 0]),
+        Some([0, 0, 0, 0]),
     );
-    run_rv32_divrem_rand_write_execute(
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
         opcode,
+        Some([0, 0, 0, 0]),
+        Some([0, 0, 0, 0]),
+    );
+    set_and_execute(
         &mut tester,
-        &mut chip,
-        [0, 0, 0, 128],
-        [255, 255, 255, 255],
+        &mut harness,
         &mut rng,
+        opcode,
+        Some([0, 0, 0, 128]),
+        Some([255, 255, 255, 255]),
     );
 
     let tester = tester
         .build()
-        .load(chip)
-        .load(bitwise_chip)
-        .load(range_tuple_checker)
+        .load(harness)
+        .load_periphery(bitwise)
+        .load_periphery(range_tuple)
         .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn rv32_div_rand_test() {
-    run_rv32_divrem_rand_test(DivRemOpcode::DIV, 100);
-}
-
-#[test]
-fn rv32_divu_rand_test() {
-    run_rv32_divrem_rand_test(DivRemOpcode::DIVU, 100);
-}
-
-#[test]
-fn rv32_rem_rand_test() {
-    run_rv32_divrem_rand_test(DivRemOpcode::REM, 100);
-}
-
-#[test]
-fn rv32_remu_rand_test() {
-    run_rv32_divrem_rand_test(DivRemOpcode::REMU, 100);
-}
-
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adapter is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-type Rv32DivRemTestChip<F> =
-    VmChipWrapper<F, TestAdapterChip<F>, DivRemCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
-
 #[derive(Default, Clone, Copy)]
 struct DivRemPrankValues<const NUM_LIMBS: usize> {
     pub q: Option<[u32; NUM_LIMBS]>,
@@ -229,84 +252,27 @@ struct DivRemPrankValues<const NUM_LIMBS: usize> {
     pub r_zero: Option<bool>,
 }
 
-fn run_rv32_divrem_negative_test(
-    signed: bool,
+fn run_negative_divrem_test(
+    opcode: DivRemOpcode,
     b: [u32; RV32_REGISTER_NUM_LIMBS],
     c: [u32; RV32_REGISTER_NUM_LIMBS],
-    prank_vals: &DivRemPrankValues<RV32_REGISTER_NUM_LIMBS>,
+    prank_vals: DivRemPrankValues<RV32_REGISTER_NUM_LIMBS>,
     interaction_error: bool,
 ) {
-    // the max number of limbs we currently support MUL for is 32 (i.e. for U256s)
-    const MAX_NUM_LIMBS: u32 = 32;
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let range_tuple_bus = RangeTupleCheckerBus::new(
-        RANGE_TUPLE_CHECKER_BUS,
-        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
-    );
-
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let range_tuple_chip = SharedRangeTupleCheckerChip::new(range_tuple_bus);
-
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32DivRemTestChip::<F>::new(
-        TestAdapterChip::new(
-            vec![[b.map(F::from_canonical_u32), c.map(F::from_canonical_u32)].concat(); 2],
-            vec![None],
-            ExecutionBridge::new(tester.execution_bus(), tester.program_bus()),
-        ),
-        DivRemCoreChip::new(
-            bitwise_chip.clone(),
-            range_tuple_chip.clone(),
-            DivRemOpcode::CLASS_OFFSET,
-        ),
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, bitwise, range_tuple) = create_test_chip(&mut tester);
 
-    let (div_opcode, rem_opcode) = if signed {
-        (DivRemOpcode::DIV, DivRemOpcode::REM)
-    } else {
-        (DivRemOpcode::DIVU, DivRemOpcode::REMU)
-    };
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(div_opcode.global_opcode(), [0, 0, 0, 1, 1]),
-    );
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(rem_opcode.global_opcode(), [0, 0, 0, 1, 1]),
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(b),
+        Some(c),
     );
 
-    let (q, r, b_sign, c_sign, q_sign, case) =
-        run_divrem::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(signed, &b, &c);
-    let q = prank_vals.q.unwrap_or(q);
-    let r = prank_vals.r.unwrap_or(r);
-    let carries =
-        run_mul_carries::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(signed, &c, &q, &r, q_sign);
-
-    range_tuple_chip.clear();
-    for i in 0..RV32_REGISTER_NUM_LIMBS {
-        range_tuple_chip.add_count(&[q[i], carries[i]]);
-        range_tuple_chip.add_count(&[r[i], carries[i + RV32_REGISTER_NUM_LIMBS]]);
-    }
-
-    if let Some(diff_val) = prank_vals.diff_val {
-        bitwise_chip.clear();
-        if signed {
-            let b_sign_mask = if b_sign { 1 << (RV32_CELL_BITS - 1) } else { 0 };
-            let c_sign_mask = if c_sign { 1 << (RV32_CELL_BITS - 1) } else { 0 };
-            bitwise_chip.request_range(
-                (b[RV32_REGISTER_NUM_LIMBS - 1] - b_sign_mask) << 1,
-                (c[RV32_REGISTER_NUM_LIMBS - 1] - c_sign_mask) << 1,
-            );
-        }
-        if case == DivRemCoreSpecialCase::None {
-            bitwise_chip.request_range(diff_val - 1, 0);
-        }
-    }
-
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut values = trace.row_slice(0).to_vec();
         let cols: &mut DivRemCoreCols<F, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> =
@@ -338,21 +304,17 @@ fn run_rv32_divrem_negative_test(
             cols.r_zero = F::from_bool(r_zero);
         }
 
-        *trace = RowMajorMatrix::new(values, trace_width);
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
-        .load(bitwise_chip)
-        .load(range_tuple_chip)
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
+        .load_periphery(range_tuple)
         .finalize();
-    tester.simple_test_with_expected_error(if interaction_error {
-        VerificationError::ChallengePhaseError
-    } else {
-        VerificationError::OodEvaluationMismatch
-    });
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
@@ -363,7 +325,8 @@ fn rv32_divrem_unsigned_wrong_q_negative_test() {
         q: Some([245, 168, 7, 0]),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, true);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, true);
+    run_negative_divrem_test(REMU, b, c, prank_vals, true);
 }
 
 #[test]
@@ -376,7 +339,8 @@ fn rv32_divrem_unsigned_wrong_r_negative_test() {
         diff_val: Some(31),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, true);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, true);
+    run_negative_divrem_test(REMU, b, c, prank_vals, true);
 }
 
 #[test]
@@ -387,7 +351,8 @@ fn rv32_divrem_unsigned_high_mult_negative_test() {
         q: Some([128, 0, 0, 1]),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, true);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, true);
+    run_negative_divrem_test(REMU, b, c, prank_vals, true);
 }
 
 #[test]
@@ -400,7 +365,8 @@ fn rv32_divrem_unsigned_zero_divisor_wrong_r_negative_test() {
         diff_val: Some(255),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, true);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, true);
+    run_negative_divrem_test(REMU, b, c, prank_vals, true);
 }
 
 #[test]
@@ -411,7 +377,8 @@ fn rv32_divrem_signed_wrong_q_negative_test() {
         q: Some([74, 61, 255, 255]),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, true);
+    run_negative_divrem_test(DIV, b, c, prank_vals, true);
+    run_negative_divrem_test(REM, b, c, prank_vals, true);
 }
 
 #[test]
@@ -424,7 +391,8 @@ fn rv32_divrem_signed_wrong_r_negative_test() {
         diff_val: Some(20),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, true);
+    run_negative_divrem_test(DIV, b, c, prank_vals, true);
+    run_negative_divrem_test(REM, b, c, prank_vals, true);
 }
 
 #[test]
@@ -435,7 +403,8 @@ fn rv32_divrem_signed_high_mult_negative_test() {
         q: Some([1, 0, 0, 1]),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, true);
+    run_negative_divrem_test(DIV, b, c, prank_vals, true);
+    run_negative_divrem_test(REM, b, c, prank_vals, true);
 }
 
 #[test]
@@ -449,7 +418,8 @@ fn rv32_divrem_signed_r_wrong_sign_negative_test() {
         diff_val: Some(192),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, false);
+    run_negative_divrem_test(DIV, b, c, prank_vals, false);
+    run_negative_divrem_test(REM, b, c, prank_vals, false);
 }
 
 #[test]
@@ -463,7 +433,8 @@ fn rv32_divrem_signed_r_wrong_prime_negative_test() {
         diff_val: Some(36),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, false);
+    run_negative_divrem_test(DIV, b, c, prank_vals, false);
+    run_negative_divrem_test(REM, b, c, prank_vals, false);
 }
 
 #[test]
@@ -476,7 +447,8 @@ fn rv32_divrem_signed_zero_divisor_wrong_r_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, true);
+    run_negative_divrem_test(DIV, b, c, prank_vals, true);
+    run_negative_divrem_test(REM, b, c, prank_vals, true);
 }
 
 #[test]
@@ -491,8 +463,10 @@ fn rv32_divrem_false_zero_divisor_flag_negative_test() {
         zero_divisor: Some(true),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, false);
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, false);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, false);
+    run_negative_divrem_test(REMU, b, c, prank_vals, false);
+    run_negative_divrem_test(DIV, b, c, prank_vals, false);
+    run_negative_divrem_test(REM, b, c, prank_vals, false);
 }
 
 #[test]
@@ -507,8 +481,10 @@ fn rv32_divrem_false_r_zero_flag_negative_test() {
         r_zero: Some(true),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, false);
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, false);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, false);
+    run_negative_divrem_test(REMU, b, c, prank_vals, false);
+    run_negative_divrem_test(DIV, b, c, prank_vals, false);
+    run_negative_divrem_test(REM, b, c, prank_vals, false);
 }
 
 #[test]
@@ -519,8 +495,10 @@ fn rv32_divrem_unset_zero_divisor_flag_negative_test() {
         zero_divisor: Some(false),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, false);
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, false);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, false);
+    run_negative_divrem_test(REMU, b, c, prank_vals, false);
+    run_negative_divrem_test(DIV, b, c, prank_vals, false);
+    run_negative_divrem_test(REM, b, c, prank_vals, false);
 }
 
 #[test]
@@ -532,8 +510,10 @@ fn rv32_divrem_wrong_r_zero_flag_negative_test() {
         r_zero: Some(true),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, false);
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, false);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, false);
+    run_negative_divrem_test(REMU, b, c, prank_vals, false);
+    run_negative_divrem_test(DIV, b, c, prank_vals, false);
+    run_negative_divrem_test(REM, b, c, prank_vals, false);
 }
 
 #[test]
@@ -544,8 +524,10 @@ fn rv32_divrem_unset_r_zero_flag_negative_test() {
         r_zero: Some(false),
         ..Default::default()
     };
-    run_rv32_divrem_negative_test(true, b, c, &prank_vals, false);
-    run_rv32_divrem_negative_test(false, b, c, &prank_vals, false);
+    run_negative_divrem_test(DIVU, b, c, prank_vals, false);
+    run_negative_divrem_test(REMU, b, c, prank_vals, false);
+    run_negative_divrem_test(DIV, b, c, prank_vals, false);
+    run_negative_divrem_test(REM, b, c, prank_vals, false);
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////
diff --git a/extensions/rv32im/circuit/src/extension.rs b/extensions/rv32im/circuit/src/extension.rs
index f8dd2fbf54..55581f3914 100644
--- a/extensions/rv32im/circuit/src/extension.rs
+++ b/extensions/rv32im/circuit/src/extension.rs
@@ -1,108 +1,43 @@
+use std::sync::Arc;
+
 use derive_more::derive::From;
 use openvm_circuit::{
     arch::{
-        InitFileGenerator, SystemConfig, SystemPort, VmExtension, VmInventory, VmInventoryBuilder,
-        VmInventoryError,
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError, ExecutionBridge,
+        ExecutorInventoryBuilder, ExecutorInventoryError, RowMajorMatrixArena, VmCircuitExtension,
+        VmExecutionExtension, VmProverExtension,
     },
-    system::phantom::PhantomChip,
+    system::{memory::SharedMemoryHelper, SystemPort},
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor, VmConfig};
+use openvm_circuit_derive::{AnyEnum, MeteredExecutor};
 use openvm_circuit_primitives::{
-    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
-    range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip},
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
+    range_tuple::{
+        RangeTupleCheckerAir, RangeTupleCheckerBus, RangeTupleCheckerChip,
+        SharedRangeTupleCheckerChip,
+    },
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
 use openvm_instructions::{program::DEFAULT_PC_STEP, LocalOpcode, PhantomDiscriminant};
 use openvm_rv32im_transpiler::{
     BaseAluOpcode, BranchEqualOpcode, BranchLessThanOpcode, DivRemOpcode, LessThanOpcode,
     MulHOpcode, MulOpcode, Rv32AuipcOpcode, Rv32HintStoreOpcode, Rv32JalLuiOpcode, Rv32JalrOpcode,
     Rv32LoadStoreOpcode, Rv32Phantom, ShiftOpcode,
 };
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
 use serde::{Deserialize, Serialize};
 use strum::IntoEnumIterator;
 
 use crate::{adapters::*, *};
 
-/// Config for a VM with base extension and IO extension
-#[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
-pub struct Rv32IConfig {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub base: Rv32I,
-    #[extension]
-    pub io: Rv32Io,
-}
-
-// Default implementation uses no init file
-impl InitFileGenerator for Rv32IConfig {}
-
-/// Config for a VM with base extension, IO extension, and multiplication extension
-#[derive(Clone, Debug, Default, VmConfig, derive_new::new, Serialize, Deserialize)]
-pub struct Rv32ImConfig {
-    #[config]
-    pub rv32i: Rv32IConfig,
-    #[extension]
-    pub mul: Rv32M,
-}
-
-// Default implementation uses no init file
-impl InitFileGenerator for Rv32ImConfig {}
-
-impl Default for Rv32IConfig {
-    fn default() -> Self {
-        let system = SystemConfig::default().with_continuations();
-        Self {
-            system,
-            base: Default::default(),
-            io: Default::default(),
-        }
-    }
-}
-
-impl Rv32IConfig {
-    pub fn with_public_values(public_values: usize) -> Self {
-        let system = SystemConfig::default()
-            .with_continuations()
-            .with_public_values(public_values);
-        Self {
-            system,
-            base: Default::default(),
-            io: Default::default(),
-        }
-    }
-
-    pub fn with_public_values_and_segment_len(public_values: usize, segment_len: usize) -> Self {
-        let system = SystemConfig::default()
-            .with_continuations()
-            .with_public_values(public_values)
-            .with_max_segment_len(segment_len);
-        Self {
-            system,
-            base: Default::default(),
-            io: Default::default(),
-        }
-    }
-}
-
-impl Rv32ImConfig {
-    pub fn with_public_values(public_values: usize) -> Self {
-        Self {
-            rv32i: Rv32IConfig::with_public_values(public_values),
-            mul: Default::default(),
-        }
-    }
-
-    pub fn with_public_values_and_segment_len(public_values: usize, segment_len: usize) -> Self {
-        Self {
-            rv32i: Rv32IConfig::with_public_values_and_segment_len(public_values, segment_len),
-            mul: Default::default(),
-        }
-    }
-}
-
-// ============ Extension Implementations ============
+// ============ Extension Struct Definitions ============
 
 /// RISC-V 32-bit Base (RV32I) Extension
 #[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
@@ -134,361 +69,630 @@ fn default_range_tuple_checker_sizes() -> [u32; 2] {
 // ============ Executor and Periphery Enums for Extension ============
 
 /// RISC-V 32-bit Base (RV32I) Instruction Executors
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum Rv32IExecutor<F: PrimeField32> {
+#[derive(Clone, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum Rv32IExecutor {
     // Rv32 (for standard 32-bit integers):
-    BaseAlu(Rv32BaseAluChip<F>),
-    LessThan(Rv32LessThanChip<F>),
-    Shift(Rv32ShiftChip<F>),
-    LoadStore(Rv32LoadStoreChip<F>),
-    LoadSignExtend(Rv32LoadSignExtendChip<F>),
-    BranchEqual(Rv32BranchEqualChip<F>),
-    BranchLessThan(Rv32BranchLessThanChip<F>),
-    JalLui(Rv32JalLuiChip<F>),
-    Jalr(Rv32JalrChip<F>),
-    Auipc(Rv32AuipcChip<F>),
+    BaseAlu(Rv32BaseAluExecutor),
+    LessThan(Rv32LessThanExecutor),
+    Shift(Rv32ShiftExecutor),
+    LoadStore(Rv32LoadStoreExecutor),
+    LoadSignExtend(Rv32LoadSignExtendExecutor),
+    BranchEqual(Rv32BranchEqualExecutor),
+    BranchLessThan(Rv32BranchLessThanExecutor),
+    JalLui(Rv32JalLuiExecutor),
+    Jalr(Rv32JalrExecutor),
+    Auipc(Rv32AuipcExecutor),
 }
 
 /// RISC-V 32-bit Multiplication Extension (RV32M) Instruction Executors
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum Rv32MExecutor<F: PrimeField32> {
-    Multiplication(Rv32MultiplicationChip<F>),
-    MultiplicationHigh(Rv32MulHChip<F>),
-    DivRem(Rv32DivRemChip<F>),
+#[derive(Clone, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum Rv32MExecutor {
+    Multiplication(Rv32MultiplicationExecutor),
+    MultiplicationHigh(Rv32MulHExecutor),
+    DivRem(Rv32DivRemExecutor),
 }
 
 /// RISC-V 32-bit Io Instruction Executors
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum Rv32IoExecutor<F: PrimeField32> {
-    HintStore(Rv32HintStoreChip<F>),
+#[derive(Clone, Copy, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum Rv32IoExecutor {
+    HintStore(Rv32HintStoreExecutor),
 }
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum Rv32IPeriphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    // We put this only to get the <F> generic to work
-    Phantom(PhantomChip<F>),
-}
+// ============ VmExtension Implementations ============
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum Rv32MPeriphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    /// Only needed for multiplication extension
-    RangeTupleChecker(SharedRangeTupleCheckerChip<2>),
-    // We put this only to get the <F> generic to work
-    Phantom(PhantomChip<F>),
-}
+impl<F: PrimeField32> VmExecutionExtension<F> for Rv32I {
+    type Executor = Rv32IExecutor;
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum Rv32IoPeriphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    // We put this only to get the <F> generic to work
-    Phantom(PhantomChip<F>),
-}
+    fn extend_execution(
+        &self,
+        inventory: &mut ExecutorInventoryBuilder<F, Rv32IExecutor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
 
-// ============ VmExtension Implementations ============
+        let base_alu =
+            Rv32BaseAluExecutor::new(Rv32BaseAluAdapterExecutor, BaseAluOpcode::CLASS_OFFSET);
+        inventory.add_executor(base_alu, BaseAluOpcode::iter().map(|x| x.global_opcode()))?;
 
-impl<F: PrimeField32> VmExtension<F> for Rv32I {
-    type Executor = Rv32IExecutor<F>;
-    type Periphery = Rv32IPeriphery<F>;
+        let lt = LessThanExecutor::new(Rv32BaseAluAdapterExecutor, LessThanOpcode::CLASS_OFFSET);
+        inventory.add_executor(lt, LessThanOpcode::iter().map(|x| x.global_opcode()))?;
 
-    fn build(
-        &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Rv32IExecutor<F>, Rv32IPeriphery<F>>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
+        let shift = ShiftExecutor::new(Rv32BaseAluAdapterExecutor, ShiftOpcode::CLASS_OFFSET);
+        inventory.add_executor(shift, ShiftOpcode::iter().map(|x| x.global_opcode()))?;
+
+        let load_store = LoadStoreExecutor::new(
+            Rv32LoadStoreAdapterExecutor::new(pointer_max_bits),
+            Rv32LoadStoreOpcode::CLASS_OFFSET,
+        );
+        inventory.add_executor(
+            load_store,
+            Rv32LoadStoreOpcode::iter()
+                .take(Rv32LoadStoreOpcode::STOREB as usize + 1)
+                .map(|x| x.global_opcode()),
+        )?;
+
+        let load_sign_extend =
+            LoadSignExtendExecutor::new(Rv32LoadStoreAdapterExecutor::new(pointer_max_bits));
+        inventory.add_executor(
+            load_sign_extend,
+            [Rv32LoadStoreOpcode::LOADB, Rv32LoadStoreOpcode::LOADH].map(|x| x.global_opcode()),
+        )?;
+
+        let beq = BranchEqualExecutor::new(
+            Rv32BranchAdapterExecutor,
+            BranchEqualOpcode::CLASS_OFFSET,
+            DEFAULT_PC_STEP,
+        );
+        inventory.add_executor(beq, BranchEqualOpcode::iter().map(|x| x.global_opcode()))?;
+
+        let blt = BranchLessThanExecutor::new(
+            Rv32BranchAdapterExecutor,
+            BranchLessThanOpcode::CLASS_OFFSET,
+        );
+        inventory.add_executor(blt, BranchLessThanOpcode::iter().map(|x| x.global_opcode()))?;
+
+        let jal_lui = Rv32JalLuiExecutor::new(Rv32CondRdWriteAdapterExecutor::new(
+            Rv32RdWriteAdapterExecutor,
+        ));
+        inventory.add_executor(jal_lui, Rv32JalLuiOpcode::iter().map(|x| x.global_opcode()))?;
+
+        let jalr = Rv32JalrExecutor::new(Rv32JalrAdapterExecutor);
+        inventory.add_executor(jalr, Rv32JalrOpcode::iter().map(|x| x.global_opcode()))?;
+
+        let auipc = Rv32AuipcExecutor::new(Rv32RdWriteAdapterExecutor);
+        inventory.add_executor(auipc, Rv32AuipcOpcode::iter().map(|x| x.global_opcode()))?;
+
+        // There is no downside to adding phantom sub-executors, so we do it in the base extension.
+        inventory.add_phantom_sub_executor(
+            phantom::Rv32HintInputSubEx,
+            PhantomDiscriminant(Rv32Phantom::HintInput as u16),
+        )?;
+        inventory.add_phantom_sub_executor(
+            phantom::Rv32HintRandomSubEx,
+            PhantomDiscriminant(Rv32Phantom::HintRandom as u16),
+        )?;
+        inventory.add_phantom_sub_executor(
+            phantom::Rv32PrintStrSubEx,
+            PhantomDiscriminant(Rv32Phantom::PrintStr as u16),
+        )?;
+        inventory.add_phantom_sub_executor(
+            phantom::Rv32HintLoadByKeySubEx,
+            PhantomDiscriminant(Rv32Phantom::HintLoadByKey as u16),
+        )?;
+
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Rv32I {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
         let SystemPort {
             execution_bus,
             program_bus,
             memory_bridge,
-        } = builder.system_port();
-
-        let range_checker = builder.system_base().range_checker_chip.clone();
-        let offline_memory = builder.system_base().offline_memory();
-        let pointer_max_bits = builder.system_config().memory_config.pointer_max_bits;
-
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
+        } = inventory.system().port();
+
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let range_checker = inventory.range_checker().bus;
+        let pointer_max_bits = inventory.pointer_max_bits();
+
+        let bitwise_lu = {
+            // A trick to get around Rust's borrow rules
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
         };
 
-        let base_alu_chip = Rv32BaseAluChip::new(
-            Rv32BaseAluAdapterChip::new(
-                execution_bus,
-                program_bus,
+        let base_alu = Rv32BaseAluAir::new(
+            Rv32BaseAluAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu),
+            BaseAluCoreAir::new(bitwise_lu, BaseAluOpcode::CLASS_OFFSET),
+        );
+        inventory.add_air(base_alu);
+
+        let lt = Rv32LessThanAir::new(
+            Rv32BaseAluAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu),
+            LessThanCoreAir::new(bitwise_lu, LessThanOpcode::CLASS_OFFSET),
+        );
+        inventory.add_air(lt);
+
+        let shift = Rv32ShiftAir::new(
+            Rv32BaseAluAdapterAir::new(exec_bridge, memory_bridge, bitwise_lu),
+            ShiftCoreAir::new(bitwise_lu, range_checker, ShiftOpcode::CLASS_OFFSET),
+        );
+        inventory.add_air(shift);
+
+        let load_store = Rv32LoadStoreAir::new(
+            Rv32LoadStoreAdapterAir::new(
                 memory_bridge,
-                bitwise_lu_chip.clone(),
+                exec_bridge,
+                range_checker,
+                pointer_max_bits,
             ),
-            BaseAluCoreChip::new(bitwise_lu_chip.clone(), BaseAluOpcode::CLASS_OFFSET),
-            offline_memory.clone(),
+            LoadStoreCoreAir::new(Rv32LoadStoreOpcode::CLASS_OFFSET),
         );
-        inventory.add_executor(
-            base_alu_chip,
-            BaseAluOpcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_air(load_store);
 
-        let lt_chip = Rv32LessThanChip::new(
-            Rv32BaseAluAdapterChip::new(
-                execution_bus,
-                program_bus,
+        let load_sign_extend = Rv32LoadSignExtendAir::new(
+            Rv32LoadStoreAdapterAir::new(
                 memory_bridge,
-                bitwise_lu_chip.clone(),
+                exec_bridge,
+                range_checker,
+                pointer_max_bits,
             ),
-            LessThanCoreChip::new(bitwise_lu_chip.clone(), LessThanOpcode::CLASS_OFFSET),
-            offline_memory.clone(),
+            LoadSignExtendCoreAir::new(range_checker),
         );
-        inventory.add_executor(lt_chip, LessThanOpcode::iter().map(|x| x.global_opcode()))?;
+        inventory.add_air(load_sign_extend);
 
-        let shift_chip = Rv32ShiftChip::new(
-            Rv32BaseAluAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                bitwise_lu_chip.clone(),
+        let beq = Rv32BranchEqualAir::new(
+            Rv32BranchAdapterAir::new(exec_bridge, memory_bridge),
+            BranchEqualCoreAir::new(BranchEqualOpcode::CLASS_OFFSET, DEFAULT_PC_STEP),
+        );
+        inventory.add_air(beq);
+
+        let blt = Rv32BranchLessThanAir::new(
+            Rv32BranchAdapterAir::new(exec_bridge, memory_bridge),
+            BranchLessThanCoreAir::new(bitwise_lu, BranchLessThanOpcode::CLASS_OFFSET),
+        );
+        inventory.add_air(blt);
+
+        let jal_lui = Rv32JalLuiAir::new(
+            Rv32CondRdWriteAdapterAir::new(Rv32RdWriteAdapterAir::new(memory_bridge, exec_bridge)),
+            Rv32JalLuiCoreAir::new(bitwise_lu),
+        );
+        inventory.add_air(jal_lui);
+
+        let jalr = Rv32JalrAir::new(
+            Rv32JalrAdapterAir::new(memory_bridge, exec_bridge),
+            Rv32JalrCoreAir::new(bitwise_lu, range_checker),
+        );
+        inventory.add_air(jalr);
+
+        let auipc = Rv32AuipcAir::new(
+            Rv32RdWriteAdapterAir::new(memory_bridge, exec_bridge),
+            Rv32AuipcCoreAir::new(bitwise_lu),
+        );
+        inventory.add_air(auipc);
+
+        Ok(())
+    }
+}
+
+pub struct Rv32ImCpuProverExt;
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, Rv32I> for Rv32ImCpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        _: &Rv32I,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let pointer_max_bits = inventory.airs().pointer_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+
+        // These calls to next_air are not strictly necessary to construct the chips, but provide a
+        // safeguard to ensure that chip construction matches the circuit definition
+        inventory.next_air::<Rv32BaseAluAir>()?;
+        let base_alu = Rv32BaseAluChip::new(
+            BaseAluFiller::new(
+                Rv32BaseAluAdapterFiller::new(bitwise_lu.clone()),
+                bitwise_lu.clone(),
+                BaseAluOpcode::CLASS_OFFSET,
             ),
-            ShiftCoreChip::new(
-                bitwise_lu_chip.clone(),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(base_alu);
+
+        inventory.next_air::<Rv32LessThanAir>()?;
+        let lt = Rv32LessThanChip::new(
+            LessThanFiller::new(
+                Rv32BaseAluAdapterFiller::new(bitwise_lu.clone()),
+                bitwise_lu.clone(),
+                LessThanOpcode::CLASS_OFFSET,
+            ),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(lt);
+
+        inventory.next_air::<Rv32ShiftAir>()?;
+        let shift = Rv32ShiftChip::new(
+            ShiftFiller::new(
+                Rv32BaseAluAdapterFiller::new(bitwise_lu.clone()),
+                bitwise_lu.clone(),
                 range_checker.clone(),
                 ShiftOpcode::CLASS_OFFSET,
             ),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(shift_chip, ShiftOpcode::iter().map(|x| x.global_opcode()))?;
+        inventory.add_executor_chip(shift);
 
+        inventory.next_air::<Rv32LoadStoreAir>()?;
         let load_store_chip = Rv32LoadStoreChip::new(
-            Rv32LoadStoreAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                pointer_max_bits,
-                range_checker.clone(),
+            LoadStoreFiller::new(
+                Rv32LoadStoreAdapterFiller::new(pointer_max_bits, range_checker.clone()),
+                Rv32LoadStoreOpcode::CLASS_OFFSET,
             ),
-            LoadStoreCoreChip::new(Rv32LoadStoreOpcode::CLASS_OFFSET),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            load_store_chip,
-            Rv32LoadStoreOpcode::iter()
-                .take(Rv32LoadStoreOpcode::STOREB as usize + 1)
-                .map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(load_store_chip);
 
-        let load_sign_extend_chip = Rv32LoadSignExtendChip::new(
-            Rv32LoadStoreAdapterChip::new(
-                execution_bus,
-                program_bus,
-                memory_bridge,
-                pointer_max_bits,
+        inventory.next_air::<Rv32LoadSignExtendAir>()?;
+        let load_sign_extend = Rv32LoadSignExtendChip::new(
+            LoadSignExtendFiller::new(
+                Rv32LoadStoreAdapterFiller::new(pointer_max_bits, range_checker.clone()),
                 range_checker.clone(),
             ),
-            LoadSignExtendCoreChip::new(range_checker.clone()),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            load_sign_extend_chip,
-            [Rv32LoadStoreOpcode::LOADB, Rv32LoadStoreOpcode::LOADH].map(|x| x.global_opcode()),
-        )?;
-
-        let beq_chip = Rv32BranchEqualChip::new(
-            Rv32BranchAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            BranchEqualCoreChip::new(BranchEqualOpcode::CLASS_OFFSET, DEFAULT_PC_STEP),
-            offline_memory.clone(),
+        inventory.add_executor_chip(load_sign_extend);
+
+        inventory.next_air::<Rv32BranchEqualAir>()?;
+        let beq = Rv32BranchEqualChip::new(
+            BranchEqualFiller::new(
+                Rv32BranchAdapterFiller,
+                BranchEqualOpcode::CLASS_OFFSET,
+                DEFAULT_PC_STEP,
+            ),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            beq_chip,
-            BranchEqualOpcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(beq);
 
-        let blt_chip = Rv32BranchLessThanChip::new(
-            Rv32BranchAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            BranchLessThanCoreChip::new(
-                bitwise_lu_chip.clone(),
+        inventory.next_air::<Rv32BranchLessThanAir>()?;
+        let blt = Rv32BranchLessThanChip::new(
+            BranchLessThanFiller::new(
+                Rv32BranchAdapterFiller,
+                bitwise_lu.clone(),
                 BranchLessThanOpcode::CLASS_OFFSET,
             ),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            blt_chip,
-            BranchLessThanOpcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(blt);
 
-        let jal_lui_chip = Rv32JalLuiChip::new(
-            Rv32CondRdWriteAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            Rv32JalLuiCoreChip::new(bitwise_lu_chip.clone()),
-            offline_memory.clone(),
+        inventory.next_air::<Rv32JalLuiAir>()?;
+        let jal_lui = Rv32JalLuiChip::new(
+            Rv32JalLuiFiller::new(
+                Rv32CondRdWriteAdapterFiller::new(Rv32RdWriteAdapterFiller),
+                bitwise_lu.clone(),
+            ),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            jal_lui_chip,
-            Rv32JalLuiOpcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(jal_lui);
 
-        let jalr_chip = Rv32JalrChip::new(
-            Rv32JalrAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            Rv32JalrCoreChip::new(bitwise_lu_chip.clone(), range_checker.clone()),
-            offline_memory.clone(),
+        inventory.next_air::<Rv32JalrAir>()?;
+        let jalr = Rv32JalrChip::new(
+            Rv32JalrFiller::new(
+                Rv32JalrAdapterFiller,
+                bitwise_lu.clone(),
+                range_checker.clone(),
+            ),
+            mem_helper.clone(),
         );
-        inventory.add_executor(jalr_chip, Rv32JalrOpcode::iter().map(|x| x.global_opcode()))?;
+        inventory.add_executor_chip(jalr);
 
-        let auipc_chip = Rv32AuipcChip::new(
-            Rv32RdWriteAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            Rv32AuipcCoreChip::new(bitwise_lu_chip.clone()),
-            offline_memory.clone(),
+        inventory.next_air::<Rv32AuipcAir>()?;
+        let auipc = Rv32AuipcChip::new(
+            Rv32AuipcFiller::new(Rv32RdWriteAdapterFiller, bitwise_lu.clone()),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            auipc_chip,
-            Rv32AuipcOpcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(auipc);
 
-        // There is no downside to adding phantom sub-executors, so we do it in the base extension.
-        builder.add_phantom_sub_executor(
-            phantom::Rv32HintInputSubEx,
-            PhantomDiscriminant(Rv32Phantom::HintInput as u16),
-        )?;
-        builder.add_phantom_sub_executor(
-            phantom::Rv32HintRandomSubEx::new(),
-            PhantomDiscriminant(Rv32Phantom::HintRandom as u16),
-        )?;
-        builder.add_phantom_sub_executor(
-            phantom::Rv32PrintStrSubEx,
-            PhantomDiscriminant(Rv32Phantom::PrintStr as u16),
-        )?;
-        builder.add_phantom_sub_executor(
-            phantom::Rv32HintLoadByKeySubEx,
-            PhantomDiscriminant(Rv32Phantom::HintLoadByKey as u16),
-        )?;
-
-        Ok(inventory)
+        Ok(())
     }
 }
 
-impl<F: PrimeField32> VmExtension<F> for Rv32M {
-    type Executor = Rv32MExecutor<F>;
-    type Periphery = Rv32MPeriphery<F>;
+impl<F> VmExecutionExtension<F> for Rv32M {
+    type Executor = Rv32MExecutor;
 
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Rv32MExecutor<F>, Rv32MPeriphery<F>>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
+        inventory: &mut ExecutorInventoryBuilder<F, Rv32MExecutor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let mult =
+            Rv32MultiplicationExecutor::new(Rv32MultAdapterExecutor, MulOpcode::CLASS_OFFSET);
+        inventory.add_executor(mult, MulOpcode::iter().map(|x| x.global_opcode()))?;
+
+        let mul_h = Rv32MulHExecutor::new(Rv32MultAdapterExecutor, MulHOpcode::CLASS_OFFSET);
+        inventory.add_executor(mul_h, MulHOpcode::iter().map(|x| x.global_opcode()))?;
+
+        let div_rem = Rv32DivRemExecutor::new(Rv32MultAdapterExecutor, DivRemOpcode::CLASS_OFFSET);
+        inventory.add_executor(div_rem, DivRemOpcode::iter().map(|x| x.global_opcode()))?;
+
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Rv32M {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
         let SystemPort {
             execution_bus,
             program_bus,
             memory_bridge,
-        } = builder.system_port();
-        let offline_memory = builder.system_base().offline_memory();
-
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
+        } = inventory.system().port();
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+
+        let bitwise_lu = {
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
         };
 
-        let range_tuple_checker = if let Some(chip) = builder
-            .find_chip::<SharedRangeTupleCheckerChip<2>>()
-            .into_iter()
-            .find(|c| {
-                c.bus().sizes[0] >= self.range_tuple_checker_sizes[0]
-                    && c.bus().sizes[1] >= self.range_tuple_checker_sizes[1]
-            }) {
-            chip.clone()
-        } else {
-            let range_tuple_bus =
-                RangeTupleCheckerBus::new(builder.new_bus_idx(), self.range_tuple_checker_sizes);
-            let chip = SharedRangeTupleCheckerChip::new(range_tuple_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
+        let range_tuple_checker = {
+            let existing_air = inventory.find_air::<RangeTupleCheckerAir<2>>().find(|c| {
+                c.bus.sizes[0] >= self.range_tuple_checker_sizes[0]
+                    && c.bus.sizes[1] >= self.range_tuple_checker_sizes[1]
+            });
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = RangeTupleCheckerBus::new(
+                    inventory.new_bus_idx(),
+                    self.range_tuple_checker_sizes,
+                );
+                let air = RangeTupleCheckerAir { bus };
+                inventory.add_air(air);
+                air.bus
+            }
         };
 
-        let mul_chip = Rv32MultiplicationChip::new(
-            Rv32MultAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            MultiplicationCoreChip::new(range_tuple_checker.clone(), MulOpcode::CLASS_OFFSET),
-            offline_memory.clone(),
+        let mult = Rv32MultiplicationAir::new(
+            Rv32MultAdapterAir::new(exec_bridge, memory_bridge),
+            MultiplicationCoreAir::new(range_tuple_checker, MulOpcode::CLASS_OFFSET),
         );
-        inventory.add_executor(mul_chip, MulOpcode::iter().map(|x| x.global_opcode()))?;
+        inventory.add_air(mult);
 
-        let mul_h_chip = Rv32MulHChip::new(
-            Rv32MultAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            MulHCoreChip::new(bitwise_lu_chip.clone(), range_tuple_checker.clone()),
-            offline_memory.clone(),
+        let mul_h = Rv32MulHAir::new(
+            Rv32MultAdapterAir::new(exec_bridge, memory_bridge),
+            MulHCoreAir::new(bitwise_lu, range_tuple_checker),
         );
-        inventory.add_executor(mul_h_chip, MulHOpcode::iter().map(|x| x.global_opcode()))?;
+        inventory.add_air(mul_h);
 
-        let div_rem_chip = Rv32DivRemChip::new(
-            Rv32MultAdapterChip::new(execution_bus, program_bus, memory_bridge),
-            DivRemCoreChip::new(
-                bitwise_lu_chip.clone(),
+        let div_rem = Rv32DivRemAir::new(
+            Rv32MultAdapterAir::new(exec_bridge, memory_bridge),
+            DivRemCoreAir::new(bitwise_lu, range_tuple_checker, DivRemOpcode::CLASS_OFFSET),
+        );
+        inventory.add_air(div_rem);
+
+        Ok(())
+    }
+}
+
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, Rv32M> for Rv32ImCpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        extension: &Rv32M,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+
+        let range_tuple_checker = {
+            let existing_chip = inventory
+                .find_chip::<SharedRangeTupleCheckerChip<2>>()
+                .find(|c| {
+                    c.bus().sizes[0] >= extension.range_tuple_checker_sizes[0]
+                        && c.bus().sizes[1] >= extension.range_tuple_checker_sizes[1]
+                });
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &RangeTupleCheckerAir<2> = inventory.next_air()?;
+                let chip = SharedRangeTupleCheckerChip::new(RangeTupleCheckerChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+
+        // These calls to next_air are not strictly necessary to construct the chips, but provide a
+        // safeguard to ensure that chip construction matches the circuit definition
+        inventory.next_air::<Rv32MultiplicationAir>()?;
+        let mult = Rv32MultiplicationChip::new(
+            MultiplicationFiller::new(
+                Rv32MultAdapterFiller,
+                range_tuple_checker.clone(),
+                MulOpcode::CLASS_OFFSET,
+            ),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(mult);
+
+        inventory.next_air::<Rv32MulHAir>()?;
+        let mul_h = Rv32MulHChip::new(
+            MulHFiller::new(
+                Rv32MultAdapterFiller,
+                bitwise_lu.clone(),
+                range_tuple_checker.clone(),
+            ),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(mul_h);
+
+        inventory.next_air::<Rv32DivRemAir>()?;
+        let div_rem = Rv32DivRemChip::new(
+            DivRemFiller::new(
+                Rv32MultAdapterFiller,
+                bitwise_lu.clone(),
                 range_tuple_checker.clone(),
                 DivRemOpcode::CLASS_OFFSET,
             ),
-            offline_memory.clone(),
+            mem_helper.clone(),
         );
-        inventory.add_executor(
-            div_rem_chip,
-            DivRemOpcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(div_rem);
 
-        Ok(inventory)
+        Ok(())
     }
 }
 
-impl<F: PrimeField32> VmExtension<F> for Rv32Io {
-    type Executor = Rv32IoExecutor<F>;
-    type Periphery = Rv32IoPeriphery<F>;
+impl<F> VmExecutionExtension<F> for Rv32Io {
+    type Executor = Rv32IoExecutor;
 
-    fn build(
+    fn extend_execution(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
+        inventory: &mut ExecutorInventoryBuilder<F, Rv32IoExecutor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
+        let hint_store =
+            Rv32HintStoreExecutor::new(pointer_max_bits, Rv32HintStoreOpcode::CLASS_OFFSET);
+        inventory.add_executor(
+            hint_store,
+            Rv32HintStoreOpcode::iter().map(|x| x.global_opcode()),
+        )?;
+
+        Ok(())
+    }
+}
+
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Rv32Io {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
         let SystemPort {
             execution_bus,
             program_bus,
             memory_bridge,
-        } = builder.system_port();
-        let offline_memory = builder.system_base().offline_memory();
-
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
+        } = inventory.system().port();
+
+        let exec_bridge = ExecutionBridge::new(execution_bus, program_bus);
+        let pointer_max_bits = inventory.pointer_max_bits();
+
+        let bitwise_lu = {
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
         };
 
-        let mut hintstore_chip = Rv32HintStoreChip::new(
-            execution_bus,
-            program_bus,
-            bitwise_lu_chip.clone(),
+        let hint_store = Rv32HintStoreAir::new(
+            exec_bridge,
             memory_bridge,
-            offline_memory.clone(),
-            builder.system_config().memory_config.pointer_max_bits,
+            bitwise_lu,
             Rv32HintStoreOpcode::CLASS_OFFSET,
+            pointer_max_bits,
         );
-        hintstore_chip.set_streams(builder.streams().clone());
+        inventory.add_air(hint_store);
 
-        inventory.add_executor(
-            hintstore_chip,
-            Rv32HintStoreOpcode::iter().map(|x| x.global_opcode()),
-        )?;
+        Ok(())
+    }
+}
 
-        Ok(inventory)
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, Rv32Io> for Rv32ImCpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
+        &self,
+        _: &Rv32Io,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+        let pointer_max_bits = inventory.airs().pointer_max_bits();
+
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
+        };
+
+        inventory.next_air::<Rv32HintStoreAir>()?;
+        let hint_store = Rv32HintStoreChip::new(
+            Rv32HintStoreFiller::new(pointer_max_bits, bitwise_lu.clone()),
+            mem_helper.clone(),
+        );
+        inventory.add_executor_chip(hint_store);
+
+        Ok(())
     }
 }
 
@@ -497,34 +701,28 @@ mod phantom {
     use eyre::bail;
     use openvm_circuit::{
         arch::{PhantomSubExecutor, Streams},
-        system::memory::MemoryController,
+        system::memory::online::GuestMemory,
     };
     use openvm_instructions::PhantomDiscriminant;
     use openvm_stark_backend::p3_field::{Field, PrimeField32};
-    use rand::{rngs::OsRng, Rng};
+    use rand::{rngs::StdRng, Rng};
 
-    use crate::adapters::unsafe_read_rv32_register;
+    use crate::adapters::{memory_read, read_rv32_register};
 
     pub struct Rv32HintInputSubEx;
-    pub struct Rv32HintRandomSubEx {
-        rng: OsRng,
-    }
-    impl Rv32HintRandomSubEx {
-        pub fn new() -> Self {
-            Self { rng: OsRng }
-        }
-    }
+    pub struct Rv32HintRandomSubEx;
     pub struct Rv32PrintStrSubEx;
     pub struct Rv32HintLoadByKeySubEx;
 
     impl<F: Field> PhantomSubExecutor<F> for Rv32HintInputSubEx {
         fn phantom_execute(
-            &mut self,
-            _: &MemoryController<F>,
+            &self,
+            _: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            _: F,
-            _: F,
+            _: u32,
+            _: u32,
             _: u16,
         ) -> eyre::Result<()> {
             let mut hint = match streams.input_stream.pop_front() {
@@ -550,18 +748,24 @@ mod phantom {
 
     impl<F: PrimeField32> PhantomSubExecutor<F> for Rv32HintRandomSubEx {
         fn phantom_execute(
-            &mut self,
-            memory: &MemoryController<F>,
+            &self,
+            memory: &GuestMemory,
             streams: &mut Streams<F>,
+            rng: &mut StdRng,
             _: PhantomDiscriminant,
-            a: F,
-            _: F,
+            a: u32,
+            _: u32,
             _: u16,
         ) -> eyre::Result<()> {
-            let len = unsafe_read_rv32_register(memory, a) as usize;
+            static WARN_ONCE: std::sync::Once = std::sync::Once::new();
+            WARN_ONCE.call_once(|| {
+                eprintln!("WARNING: Using fixed-seed RNG for deterministic randomness. Consider security implications for your use case.");
+            });
+
+            let len = read_rv32_register(memory, a) as usize;
             streams.hint_stream.clear();
             streams.hint_stream.extend(
-                std::iter::repeat_with(|| F::from_canonical_u8(self.rng.gen::<u8>())).take(len * 4),
+                std::iter::repeat_with(|| F::from_canonical_u8(rng.gen::<u8>())).take(len * 4),
             );
             Ok(())
         }
@@ -569,23 +773,20 @@ mod phantom {
 
     impl<F: PrimeField32> PhantomSubExecutor<F> for Rv32PrintStrSubEx {
         fn phantom_execute(
-            &mut self,
-            memory: &MemoryController<F>,
+            &self,
+            memory: &GuestMemory,
             _: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            a: F,
-            b: F,
+            a: u32,
+            b: u32,
             _: u16,
         ) -> eyre::Result<()> {
-            let rd = unsafe_read_rv32_register(memory, a);
-            let rs1 = unsafe_read_rv32_register(memory, b);
+            let rd = read_rv32_register(memory, a);
+            let rs1 = read_rv32_register(memory, b);
             let bytes = (0..rs1)
-                .map(|i| -> eyre::Result<u8> {
-                    let val = memory.unsafe_read_cell(F::TWO, F::from_canonical_u32(rd + i));
-                    let byte: u8 = val.as_canonical_u32().try_into()?;
-                    Ok(byte)
-                })
-                .collect::<eyre::Result<Vec<u8>>>()?;
+                .map(|i| memory_read::<1>(memory, 2, rd + i)[0])
+                .collect::<Vec<u8>>();
             let peeked_str = String::from_utf8(bytes)?;
             print!("{peeked_str}");
             Ok(())
@@ -594,22 +795,19 @@ mod phantom {
 
     impl<F: PrimeField32> PhantomSubExecutor<F> for Rv32HintLoadByKeySubEx {
         fn phantom_execute(
-            &mut self,
-            memory: &MemoryController<F>,
+            &self,
+            memory: &GuestMemory,
             streams: &mut Streams<F>,
+            _: &mut StdRng,
             _: PhantomDiscriminant,
-            a: F,
-            b: F,
+            a: u32,
+            b: u32,
             _: u16,
         ) -> eyre::Result<()> {
-            let ptr = unsafe_read_rv32_register(memory, a);
-            let len = unsafe_read_rv32_register(memory, b);
+            let ptr = read_rv32_register(memory, a);
+            let len = read_rv32_register(memory, b);
             let key: Vec<u8> = (0..len)
-                .map(|i| {
-                    memory
-                        .unsafe_read_cell(F::TWO, F::from_canonical_u32(ptr + i))
-                        .as_canonical_u32() as u8
-                })
+                .map(|i| memory_read::<1>(memory, 2, ptr + i)[0])
                 .collect();
             if let Some(val) = streams.kv_store.get(&key) {
                 let to_push = hint_load_by_key_decode::<F>(val);
diff --git a/extensions/rv32im/circuit/src/hintstore/execution.rs b/extensions/rv32im/circuit/src/hintstore/execution.rs
new file mode 100644
index 0000000000..2e87cc9cd9
--- /dev/null
+++ b/extensions/rv32im/circuit/src/hintstore/execution.rs
@@ -0,0 +1,178 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32im_transpiler::{
+    Rv32HintStoreOpcode,
+    Rv32HintStoreOpcode::{HINT_BUFFER, HINT_STOREW},
+};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::Rv32HintStoreExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct HintStorePreCompute {
+    c: u32,
+    a: u8,
+    b: u8,
+}
+
+impl Rv32HintStoreExecutor {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut HintStorePreCompute,
+    ) -> Result<Rv32HintStoreOpcode, StaticProgramError> {
+        let &Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e.as_canonical_u32() != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = {
+            HintStorePreCompute {
+                c: c.as_canonical_u32(),
+                a: a.as_canonical_u32() as u8,
+                b: b.as_canonical_u32() as u8,
+            }
+        };
+        Ok(Rv32HintStoreOpcode::from_usize(
+            opcode.local_opcode_idx(self.offset),
+        ))
+    }
+}
+
+impl<F> Executor<F> for Rv32HintStoreExecutor
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<HintStorePreCompute>()
+    }
+
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut HintStorePreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(pc, inst, pre_compute)?;
+        let fn_ptr = match local_opcode {
+            HINT_STOREW => execute_e1_impl::<_, _, true>,
+            HINT_BUFFER => execute_e1_impl::<_, _, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F> MeteredExecutor<F> for Rv32HintStoreExecutor
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<HintStorePreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<HintStorePreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+        let fn_ptr = match local_opcode {
+            HINT_STOREW => execute_e2_impl::<_, _, true>,
+            HINT_BUFFER => execute_e2_impl::<_, _, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+/// Return the number of used rows.
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_HINT_STOREW: bool>(
+    pre_compute: &HintStorePreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) -> u32 {
+    let mem_ptr_limbs = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let mem_ptr = u32::from_le_bytes(mem_ptr_limbs);
+
+    let num_words = if IS_HINT_STOREW {
+        1
+    } else {
+        let num_words_limbs = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.a as u32);
+        u32::from_le_bytes(num_words_limbs)
+    };
+    debug_assert_ne!(num_words, 0);
+
+    if vm_state.streams.hint_stream.len() < RV32_REGISTER_NUM_LIMBS * num_words as usize {
+        vm_state.exit_code = Err(ExecutionError::HintOutOfBounds { pc: vm_state.pc });
+        return 0;
+    }
+
+    for word_index in 0..num_words {
+        let data: [u8; RV32_REGISTER_NUM_LIMBS] = std::array::from_fn(|_| {
+            vm_state
+                .streams
+                .hint_stream
+                .pop_front()
+                .unwrap()
+                .as_canonical_u32() as u8
+        });
+        vm_state.vm_write(
+            RV32_MEMORY_AS,
+            mem_ptr + (RV32_REGISTER_NUM_LIMBS as u32 * word_index),
+            &data,
+        );
+    }
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+    num_words
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_HINT_STOREW: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &HintStorePreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, IS_HINT_STOREW>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const IS_HINT_STOREW: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<HintStorePreCompute> = pre_compute.borrow();
+    let height_delta = execute_e12_impl::<F, CTX, IS_HINT_STOREW>(&pre_compute.data, vm_state);
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, height_delta);
+}
diff --git a/extensions/rv32im/circuit/src/hintstore/mod.rs b/extensions/rv32im/circuit/src/hintstore/mod.rs
index d566292207..c82511d69d 100644
--- a/extensions/rv32im/circuit/src/hintstore/mod.rs
+++ b/extensions/rv32im/circuit/src/hintstore/mod.rs
@@ -1,25 +1,21 @@
-use std::{
-    borrow::{Borrow, BorrowMut},
-    sync::{Arc, Mutex, OnceLock},
-};
+use std::borrow::{Borrow, BorrowMut};
 
 use openvm_circuit::{
-    arch::{
-        ExecutionBridge, ExecutionBus, ExecutionError, ExecutionState, InstructionExecutor, Streams,
-    },
-    system::{
-        memory::{
-            offline_checker::{MemoryBridge, MemoryReadAuxCols, MemoryWriteAuxCols},
-            MemoryAddress, MemoryAuxColsFactory, MemoryController, OfflineMemory, RecordId,
+    arch::*,
+    system::memory::{
+        offline_checker::{
+            MemoryBridge, MemoryReadAuxCols, MemoryReadAuxRecord, MemoryWriteAuxCols,
+            MemoryWriteBytesAuxRecord,
         },
-        program::ProgramBus,
+        online::TracingMemory,
+        MemoryAddress, MemoryAuxColsFactory,
     },
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
-    utils::{next_power_of_two_or_zero, not},
+    utils::not,
 };
-use openvm_circuit_primitives_derive::AlignedBorrow;
+use openvm_circuit_primitives_derive::{AlignedBorrow, AlignedBytesBorrow};
 use openvm_instructions::{
     instruction::Instruction,
     program::DEFAULT_PC_STEP,
@@ -31,19 +27,17 @@ use openvm_rv32im_transpiler::{
     Rv32HintStoreOpcode::{HINT_BUFFER, HINT_STOREW},
 };
 use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
     interaction::InteractionBuilder,
     p3_air::{Air, AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra, PrimeField32},
     p3_matrix::{dense::RowMajorMatrix, Matrix},
-    prover::types::AirProofInput,
-    rap::{AnyRap, BaseAirWithPublicValues, PartitionedBaseAir},
-    Chip, ChipUsageGetter,
+    p3_maybe_rayon::prelude::*,
+    rap::{BaseAirWithPublicValues, PartitionedBaseAir},
 };
-use serde::{Deserialize, Serialize};
 
-use crate::adapters::{compose, decompose};
+use crate::adapters::{read_rv32_register, tracing_read, tracing_write};
 
+mod execution;
 #[cfg(test)]
 mod tests;
 
@@ -70,7 +64,7 @@ pub struct Rv32HintStoreCols<T> {
     pub num_words_aux_cols: MemoryReadAuxCols<T>,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct Rv32HintStoreAir {
     pub execution_bridge: ExecutionBridge,
     pub memory_bridge: MemoryBridge,
@@ -182,7 +176,6 @@ impl<AB: InteractionBuilder> Air<AB> for Rv32HintStoreAir {
                 &local_cols.write_aux,
             )
             .eval(builder, is_valid.clone());
-
         let expected_opcode = (local_cols.is_single
             * AB::F::from_canonical_usize(HINT_STOREW as usize + self.offset))
             + (local_cols.is_buffer
@@ -264,139 +257,111 @@ impl<AB: InteractionBuilder> Air<AB> for Rv32HintStoreAir {
     }
 }
 
-#[derive(Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct Rv32HintStoreRecord<F: Field> {
-    pub from_state: ExecutionState<u32>,
-    pub instruction: Instruction<F>,
-    pub mem_ptr_read: RecordId,
-    pub mem_ptr: u32,
-    pub num_words: u32,
-
-    pub num_words_read: Option<RecordId>,
-    pub hints: Vec<([F; RV32_REGISTER_NUM_LIMBS], RecordId)>,
-}
-
-pub struct Rv32HintStoreChip<F: Field> {
-    air: Rv32HintStoreAir,
-    pub records: Vec<Rv32HintStoreRecord<F>>,
-    pub height: usize,
-    offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    pub streams: OnceLock<Arc<Mutex<Streams<F>>>>,
-    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+#[derive(Copy, Clone, Debug)]
+pub struct Rv32HintStoreMetadata {
+    num_words: usize,
 }
 
-impl<F: PrimeField32> Rv32HintStoreChip<F> {
-    pub fn new(
-        execution_bus: ExecutionBus,
-        program_bus: ProgramBus,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-        memory_bridge: MemoryBridge,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-        pointer_max_bits: usize,
-        offset: usize,
-    ) -> Self {
-        let air = Rv32HintStoreAir {
-            execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
-            memory_bridge,
-            bitwise_operation_lookup_bus: bitwise_lookup_chip.bus(),
-            offset,
-            pointer_max_bits,
-        };
-        Self {
-            records: vec![],
-            air,
-            height: 0,
-            offline_memory,
-            streams: OnceLock::new(),
-            bitwise_lookup_chip,
-        }
-    }
-    pub fn set_streams(&mut self, streams: Arc<Mutex<Streams<F>>>) {
-        self.streams
-            .set(streams)
-            .map_err(|_| "streams have already been set.")
-            .unwrap();
+impl MultiRowMetadata for Rv32HintStoreMetadata {
+    #[inline(always)]
+    fn get_num_rows(&self) -> usize {
+        self.num_words
     }
 }
 
-impl<F: PrimeField32> InstructionExecutor<F> for Rv32HintStoreChip<F> {
-    fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>, ExecutionError> {
-        let &Instruction {
-            opcode,
-            a: num_words_ptr,
-            b: mem_ptr_ptr,
-            d,
-            e,
-            ..
-        } = instruction;
-        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
-        debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
-        let local_opcode =
-            Rv32HintStoreOpcode::from_usize(opcode.local_opcode_idx(self.air.offset));
+pub type Rv32HintStoreLayout = MultiRowLayout<Rv32HintStoreMetadata>;
 
-        let (mem_ptr_read, mem_ptr_limbs) = memory.read::<RV32_REGISTER_NUM_LIMBS>(d, mem_ptr_ptr);
-        let (num_words, num_words_read) = if local_opcode == HINT_STOREW {
-            memory.increment_timestamp();
-            (1, None)
-        } else {
-            let (num_words_read, num_words_limbs) =
-                memory.read::<RV32_REGISTER_NUM_LIMBS>(d, num_words_ptr);
-            (compose(num_words_limbs), Some(num_words_read))
-        };
-        debug_assert_ne!(num_words, 0);
-        debug_assert!(num_words <= (1 << self.air.pointer_max_bits));
+// This is the part of the record that we keep only once per instruction
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32HintStoreRecordHeader {
+    pub num_words: u32,
 
-        let mem_ptr = compose(mem_ptr_limbs);
+    pub from_pc: u32,
+    pub timestamp: u32,
 
-        debug_assert!(mem_ptr <= (1 << self.air.pointer_max_bits));
+    pub mem_ptr_ptr: u32,
+    pub mem_ptr: u32,
+    pub mem_ptr_aux_record: MemoryReadAuxRecord,
 
-        let mut streams = self.streams.get().unwrap().lock().unwrap();
-        if streams.hint_stream.len() < RV32_REGISTER_NUM_LIMBS * num_words as usize {
-            return Err(ExecutionError::HintOutOfBounds { pc: from_state.pc });
-        }
+    // will set `num_words_ptr` to `u32::MAX` in case of single hint
+    pub num_words_ptr: u32,
+    pub num_words_read: MemoryReadAuxRecord,
+}
 
-        let mut record = Rv32HintStoreRecord {
-            from_state,
-            instruction: instruction.clone(),
-            mem_ptr_read,
-            mem_ptr,
-            num_words,
-            num_words_read,
-            hints: vec![],
-        };
+// This is the part of the record that we keep `num_words` times per instruction
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32HintStoreVar {
+    pub data_write_aux: MemoryWriteBytesAuxRecord<RV32_REGISTER_NUM_LIMBS>,
+    pub data: [u8; RV32_REGISTER_NUM_LIMBS],
+}
 
-        for word_index in 0..num_words {
-            if word_index != 0 {
-                memory.increment_timestamp();
-                memory.increment_timestamp();
-            }
+/// **SAFETY**: the order of the fields in `Rv32HintStoreRecord` and `Rv32HintStoreVar` is
+/// important. The chip also assumes that the offset of the fields `write_aux` and `data` in
+/// `Rv32HintStoreCols` is bigger than `size_of::<Rv32HintStoreRecord>()`
+#[derive(Debug)]
+pub struct Rv32HintStoreRecordMut<'a> {
+    pub inner: &'a mut Rv32HintStoreRecordHeader,
+    pub var: &'a mut [Rv32HintStoreVar],
+}
 
-            let data: [F; RV32_REGISTER_NUM_LIMBS] =
-                std::array::from_fn(|_| streams.hint_stream.pop_front().unwrap());
-            let (write, _) = memory.write(
-                e,
-                F::from_canonical_u32(mem_ptr + (RV32_REGISTER_NUM_LIMBS as u32 * word_index)),
-                data,
-            );
-            record.hints.push((data, write));
+/// Custom borrowing that splits the buffer into a fixed `Rv32HintStoreRecord` header
+/// followed by a slice of `Rv32HintStoreVar`'s of length `num_words` provided at runtime.
+/// Uses `align_to_mut()` to make sure the slice is properly aligned to `Rv32HintStoreVar`.
+/// Has debug assertions to make sure the above works as expected.
+impl<'a> CustomBorrow<'a, Rv32HintStoreRecordMut<'a>, Rv32HintStoreLayout> for [u8] {
+    fn custom_borrow(&'a mut self, layout: Rv32HintStoreLayout) -> Rv32HintStoreRecordMut<'a> {
+        let (header_buf, rest) =
+            unsafe { self.split_at_mut_unchecked(size_of::<Rv32HintStoreRecordHeader>()) };
+
+        let (_, vars, _) = unsafe { rest.align_to_mut::<Rv32HintStoreVar>() };
+        Rv32HintStoreRecordMut {
+            inner: header_buf.borrow_mut(),
+            var: &mut vars[..layout.metadata.num_words],
         }
+    }
 
-        self.height += record.hints.len();
-        self.records.push(record);
+    unsafe fn extract_layout(&self) -> Rv32HintStoreLayout {
+        let header: &Rv32HintStoreRecordHeader = self.borrow();
+        MultiRowLayout::new(Rv32HintStoreMetadata {
+            num_words: header.num_words as usize,
+        })
+    }
+}
 
-        let next_state = ExecutionState {
-            pc: from_state.pc + DEFAULT_PC_STEP,
-            timestamp: memory.timestamp(),
-        };
-        Ok(next_state)
+impl SizedRecord<Rv32HintStoreLayout> for Rv32HintStoreRecordMut<'_> {
+    fn size(layout: &Rv32HintStoreLayout) -> usize {
+        let mut total_len = size_of::<Rv32HintStoreRecordHeader>();
+        // Align the pointer to the alignment of `Rv32HintStoreVar`
+        total_len = total_len.next_multiple_of(align_of::<Rv32HintStoreVar>());
+        total_len += size_of::<Rv32HintStoreVar>() * layout.metadata.num_words;
+        total_len
     }
 
+    fn alignment(_layout: &Rv32HintStoreLayout) -> usize {
+        align_of::<Rv32HintStoreRecordHeader>()
+    }
+}
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32HintStoreExecutor {
+    pub pointer_max_bits: usize,
+    pub offset: usize,
+}
+
+#[derive(Clone, derive_new::new)]
+pub struct Rv32HintStoreFiller {
+    pointer_max_bits: usize,
+    bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+}
+
+impl<F, RA> PreflightExecutor<F, RA> for Rv32HintStoreExecutor
+where
+    F: PrimeField32,
+    for<'buf> RA:
+        RecordArena<'buf, MultiRowLayout<Rv32HintStoreMetadata>, Rv32HintStoreRecordMut<'buf>>,
+{
     fn get_opcode_name(&self, opcode: usize) -> String {
         if opcode == HINT_STOREW.global_opcode().as_usize() {
             String::from("HINT_STOREW")
@@ -406,123 +371,214 @@ impl<F: PrimeField32> InstructionExecutor<F> for Rv32HintStoreChip<F> {
             unreachable!("unsupported opcode: {}", opcode)
         }
     }
-}
 
-impl<F: Field> ChipUsageGetter for Rv32HintStoreChip<F> {
-    fn air_name(&self) -> String {
-        "Rv32HintStoreAir".to_string()
-    }
+    fn execute(
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
+        instruction: &Instruction<F>,
+    ) -> Result<(), ExecutionError> {
+        let &Instruction {
+            opcode, a, b, d, e, ..
+        } = instruction;
 
-    fn current_trace_height(&self) -> usize {
-        self.height
-    }
+        let a = a.as_canonical_u32();
+        let b = b.as_canonical_u32();
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+        debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
 
-    fn trace_width(&self) -> usize {
-        Rv32HintStoreCols::<F>::width()
-    }
-}
+        let local_opcode = Rv32HintStoreOpcode::from_usize(opcode.local_opcode_idx(self.offset));
 
-impl<F: PrimeField32> Rv32HintStoreChip<F> {
-    // returns number of used u32s
-    fn record_to_rows(
-        record: Rv32HintStoreRecord<F>,
-        aux_cols_factory: &MemoryAuxColsFactory<F>,
-        slice: &mut [F],
-        memory: &OfflineMemory<F>,
-        bitwise_lookup_chip: &SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
-        pointer_max_bits: usize,
-    ) -> usize {
-        let width = Rv32HintStoreCols::<F>::width();
-        let cols: &mut Rv32HintStoreCols<F> = slice[..width].borrow_mut();
-
-        cols.is_single = F::from_bool(record.num_words_read.is_none());
-        cols.is_buffer = F::from_bool(record.num_words_read.is_some());
-        cols.is_buffer_start = cols.is_buffer;
-
-        cols.from_state = record.from_state.map(F::from_canonical_u32);
-        cols.mem_ptr_ptr = record.instruction.b;
-        aux_cols_factory.generate_read_aux(
-            memory.record_by_id(record.mem_ptr_read),
-            &mut cols.mem_ptr_aux_cols,
-        );
+        // We do untraced read of `num_words` in order to allocate the record first
+        let num_words = if local_opcode == HINT_STOREW {
+            1
+        } else {
+            read_rv32_register(state.memory.data(), a)
+        };
 
-        cols.num_words_ptr = record.instruction.a;
-        if let Some(num_words_read) = record.num_words_read {
-            aux_cols_factory.generate_read_aux(
-                memory.record_by_id(num_words_read),
-                &mut cols.num_words_aux_cols,
-            );
-        }
+        let record = state.ctx.alloc(MultiRowLayout::new(Rv32HintStoreMetadata {
+            num_words: num_words as usize,
+        }));
 
-        let mut mem_ptr = record.mem_ptr;
-        let mut rem_words = record.num_words;
-        let mut used_u32s = 0;
+        record.inner.from_pc = *state.pc;
+        record.inner.timestamp = state.memory.timestamp;
+        record.inner.mem_ptr_ptr = b;
 
-        let mem_ptr_msl = mem_ptr >> ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS);
-        let rem_words_msl = rem_words >> ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS);
-        bitwise_lookup_chip.request_range(
-            mem_ptr_msl << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - pointer_max_bits),
-            rem_words_msl << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - pointer_max_bits),
-        );
-        for (i, &(data, write)) in record.hints.iter().enumerate() {
-            for half in 0..(RV32_REGISTER_NUM_LIMBS / 2) {
-                bitwise_lookup_chip.request_range(
-                    data[2 * half].as_canonical_u32(),
-                    data[2 * half + 1].as_canonical_u32(),
-                );
-            }
+        record.inner.mem_ptr = u32::from_le_bytes(tracing_read(
+            state.memory,
+            RV32_REGISTER_AS,
+            b,
+            &mut record.inner.mem_ptr_aux_record.prev_timestamp,
+        ));
 
-            let cols: &mut Rv32HintStoreCols<F> = slice[used_u32s..used_u32s + width].borrow_mut();
-            cols.from_state.timestamp =
-                F::from_canonical_u32(record.from_state.timestamp + (3 * i as u32));
-            cols.data = data;
-            aux_cols_factory.generate_write_aux(memory.record_by_id(write), &mut cols.write_aux);
-            cols.rem_words_limbs = decompose(rem_words);
-            cols.mem_ptr_limbs = decompose(mem_ptr);
-            if i != 0 {
-                cols.is_buffer = F::ONE;
-            }
-            used_u32s += width;
-            mem_ptr += RV32_REGISTER_NUM_LIMBS as u32;
-            rem_words -= 1;
-        }
+        debug_assert!(record.inner.mem_ptr <= (1 << self.pointer_max_bits));
+        debug_assert_ne!(num_words, 0);
+        debug_assert!(num_words <= (1 << self.pointer_max_bits));
 
-        used_u32s
-    }
+        record.inner.num_words = num_words;
+        if local_opcode == HINT_STOREW {
+            state.memory.increment_timestamp();
+            record.inner.num_words_ptr = u32::MAX;
+        } else {
+            record.inner.num_words_ptr = a;
+            tracing_read::<RV32_REGISTER_NUM_LIMBS>(
+                state.memory,
+                RV32_REGISTER_AS,
+                record.inner.num_words_ptr,
+                &mut record.inner.num_words_read.prev_timestamp,
+            );
+        };
+
+        if state.streams.hint_stream.len() < RV32_REGISTER_NUM_LIMBS * num_words as usize {
+            return Err(ExecutionError::HintOutOfBounds { pc: *state.pc });
+        }
 
-    fn generate_trace(self) -> RowMajorMatrix<F> {
-        let width = self.trace_width();
-        let height = next_power_of_two_or_zero(self.height);
-        let mut flat_trace = F::zero_vec(width * height);
+        for idx in 0..(num_words as usize) {
+            if idx != 0 {
+                state.memory.increment_timestamp();
+                state.memory.increment_timestamp();
+            }
 
-        let memory = self.offline_memory.lock().unwrap();
+            let data_f: [F; RV32_REGISTER_NUM_LIMBS] =
+                std::array::from_fn(|_| state.streams.hint_stream.pop_front().unwrap());
+            let data: [u8; RV32_REGISTER_NUM_LIMBS] =
+                data_f.map(|byte| byte.as_canonical_u32() as u8);
 
-        let aux_cols_factory = memory.aux_cols_factory();
+            record.var[idx].data = data;
 
-        let mut used_u32s = 0;
-        for record in self.records {
-            used_u32s += Self::record_to_rows(
-                record,
-                &aux_cols_factory,
-                &mut flat_trace[used_u32s..],
-                &memory,
-                &self.bitwise_lookup_chip,
-                self.air.pointer_max_bits,
+            tracing_write(
+                state.memory,
+                RV32_MEMORY_AS,
+                record.inner.mem_ptr + (RV32_REGISTER_NUM_LIMBS * idx) as u32,
+                data,
+                &mut record.var[idx].data_write_aux.prev_timestamp,
+                &mut record.var[idx].data_write_aux.prev_data,
             );
         }
-        // padding rows can just be all zeros
-        RowMajorMatrix::new(flat_trace, width)
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
     }
 }
 
-impl<SC: StarkGenericConfig> Chip<SC> for Rv32HintStoreChip<Val<SC>>
-where
-    Val<SC>: PrimeField32,
-{
-    fn air(&self) -> Arc<dyn AnyRap<SC>> {
-        Arc::new(self.air)
-    }
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        AirProofInput::simple_no_pis(self.generate_trace())
+impl<F: PrimeField32> TraceFiller<F> for Rv32HintStoreFiller {
+    fn fill_trace(
+        &self,
+        mem_helper: &MemoryAuxColsFactory<F>,
+        trace: &mut RowMajorMatrix<F>,
+        rows_used: usize,
+    ) {
+        if rows_used == 0 {
+            return;
+        }
+
+        let width = trace.width;
+        debug_assert_eq!(width, size_of::<Rv32HintStoreCols<u8>>());
+        let mut trace = &mut trace.values[..width * rows_used];
+        let mut sizes = Vec::with_capacity(rows_used);
+        let mut chunks = Vec::with_capacity(rows_used);
+
+        while !trace.is_empty() {
+            let record: &Rv32HintStoreRecordHeader =
+                unsafe { get_record_from_slice(&mut trace, ()) };
+            let (chunk, rest) = trace.split_at_mut(width * record.num_words as usize);
+            sizes.push(record.num_words);
+            chunks.push(chunk);
+            trace = rest;
+        }
+
+        let msl_rshift: u32 = ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS) as u32;
+        let msl_lshift: u32 =
+            (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.pointer_max_bits) as u32;
+
+        chunks
+            .par_iter_mut()
+            .zip(sizes.par_iter())
+            .for_each(|(chunk, &num_words)| {
+                let record: Rv32HintStoreRecordMut = unsafe {
+                    get_record_from_slice(
+                        chunk,
+                        MultiRowLayout::new(Rv32HintStoreMetadata {
+                            num_words: num_words as usize,
+                        }),
+                    )
+                };
+                self.bitwise_lookup_chip.request_range(
+                    (record.inner.mem_ptr >> msl_rshift) << msl_lshift,
+                    (num_words >> msl_rshift) << msl_lshift,
+                );
+
+                let mut timestamp = record.inner.timestamp + num_words * 3;
+                let mut mem_ptr = record.inner.mem_ptr + num_words * RV32_REGISTER_NUM_LIMBS as u32;
+
+                // Assuming that `num_words` is usually small (e.g. 1 for `HINT_STOREW`)
+                // it is better to do a serial pass of the rows per instruction (going from the last
+                // row to the first row) instead of a parallel pass, since need to
+                // copy the record to a new buffer in parallel case.
+                chunk
+                    .rchunks_exact_mut(width)
+                    .zip(record.var.iter().enumerate().rev())
+                    .for_each(|(row, (idx, var))| {
+                        for pair in var.data.chunks_exact(2) {
+                            self.bitwise_lookup_chip
+                                .request_range(pair[0] as u32, pair[1] as u32);
+                        }
+
+                        let cols: &mut Rv32HintStoreCols<F> = row.borrow_mut();
+                        let is_single = record.inner.num_words_ptr == u32::MAX;
+                        timestamp -= 3;
+                        if idx == 0 && !is_single {
+                            mem_helper.fill(
+                                record.inner.num_words_read.prev_timestamp,
+                                timestamp + 1,
+                                cols.num_words_aux_cols.as_mut(),
+                            );
+                            cols.num_words_ptr = F::from_canonical_u32(record.inner.num_words_ptr);
+                        } else {
+                            mem_helper.fill_zero(cols.num_words_aux_cols.as_mut());
+                            cols.num_words_ptr = F::ZERO;
+                        }
+
+                        cols.is_buffer_start = F::from_bool(idx == 0 && !is_single);
+
+                        // Note: writing in reverse
+                        cols.data = var.data.map(|x| F::from_canonical_u8(x));
+
+                        cols.write_aux.set_prev_data(
+                            var.data_write_aux
+                                .prev_data
+                                .map(|x| F::from_canonical_u8(x)),
+                        );
+                        mem_helper.fill(
+                            var.data_write_aux.prev_timestamp,
+                            timestamp + 2,
+                            cols.write_aux.as_mut(),
+                        );
+
+                        if idx == 0 {
+                            mem_helper.fill(
+                                record.inner.mem_ptr_aux_record.prev_timestamp,
+                                timestamp,
+                                cols.mem_ptr_aux_cols.as_mut(),
+                            );
+                        } else {
+                            mem_helper.fill_zero(cols.mem_ptr_aux_cols.as_mut());
+                        }
+
+                        mem_ptr -= RV32_REGISTER_NUM_LIMBS as u32;
+                        cols.mem_ptr_limbs = mem_ptr.to_le_bytes().map(|x| F::from_canonical_u8(x));
+                        cols.mem_ptr_ptr = F::from_canonical_u32(record.inner.mem_ptr_ptr);
+
+                        cols.from_state.timestamp = F::from_canonical_u32(timestamp);
+                        cols.from_state.pc = F::from_canonical_u32(record.inner.from_pc);
+
+                        cols.rem_words_limbs = (num_words - idx as u32)
+                            .to_le_bytes()
+                            .map(|x| F::from_canonical_u8(x));
+                        cols.is_buffer = F::from_bool(!is_single);
+                        cols.is_single = F::from_bool(is_single);
+                    });
+            })
     }
 }
+
+pub type Rv32HintStoreChip<F> = VmChipWrapper<F, Rv32HintStoreFiller>;
diff --git a/extensions/rv32im/circuit/src/hintstore/tests.rs b/extensions/rv32im/circuit/src/hintstore/tests.rs
index 204070762c..363fabab96 100644
--- a/extensions/rv32im/circuit/src/hintstore/tests.rs
+++ b/extensions/rv32im/circuit/src/hintstore/tests.rs
@@ -1,20 +1,17 @@
-use std::{
-    array,
-    borrow::BorrowMut,
-    sync::{Arc, Mutex},
-};
+use std::{borrow::BorrowMut, sync::Arc};
 
 use openvm_circuit::arch::{
-    testing::{memory::gen_pointer, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-    Streams,
+    testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
+    Arena, DenseRecordArena, MatrixRecordArena, PreflightExecutor,
 };
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
 use openvm_instructions::{
     instruction::Instruction,
-    riscv::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
-    VmOpcode,
+    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
 };
 use openvm_rv32im_transpiler::Rv32HintStoreOpcode::{self, *};
 use openvm_stark_backend::{
@@ -24,104 +21,100 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
 };
-use openvm_stark_sdk::{config::setup_tracing, p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::{rngs::StdRng, Rng};
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{rngs::StdRng, Rng, RngCore};
 
-use super::{Rv32HintStoreChip, Rv32HintStoreCols};
-use crate::adapters::decompose;
+use super::{Rv32HintStoreAir, Rv32HintStoreChip, Rv32HintStoreCols, Rv32HintStoreExecutor};
+use crate::{test_utils::get_verification_error, Rv32HintStoreFiller, Rv32HintStoreLayout};
 
 type F = BabyBear;
+const MAX_INS_CAPACITY: usize = 4096;
+type Harness<RA> =
+    TestChipHarness<F, Rv32HintStoreExecutor, Rv32HintStoreAir, Rv32HintStoreChip<F>, RA>;
 
-fn set_and_execute(
+fn create_test_chip<RA: Arena>(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut Rv32HintStoreChip<F>,
-    rng: &mut StdRng,
-    opcode: Rv32HintStoreOpcode,
+) -> (
+    Harness<RA>,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
 ) {
-    let mem_ptr = rng.gen_range(
-        0..(1
-            << (tester
-                .memory_controller()
-                .borrow()
-                .mem_config()
-                .pointer_max_bits
-                - 2)),
-    ) << 2;
-    let b = gen_pointer(rng, 4);
-
-    tester.write(1, b, decompose(mem_ptr));
-
-    let read_data: [F; RV32_REGISTER_NUM_LIMBS] =
-        array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..(1 << RV32_CELL_BITS))));
-    for data in read_data {
-        chip.streams
-            .get()
-            .unwrap()
-            .lock()
-            .unwrap()
-            .hint_stream
-            .push_back(data);
-    }
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
 
-    tester.execute(
-        chip,
-        &Instruction::from_usize(VmOpcode::from_usize(opcode as usize), [0, b, 0, 1, 2]),
+    let air = Rv32HintStoreAir::new(
+        tester.execution_bridge(),
+        tester.memory_bridge(),
+        bitwise_chip.bus(),
+        Rv32HintStoreOpcode::CLASS_OFFSET,
+        tester.address_bits(),
     );
+    let executor =
+        Rv32HintStoreExecutor::new(tester.address_bits(), Rv32HintStoreOpcode::CLASS_OFFSET);
+    let chip = Rv32HintStoreChip::<F>::new(
+        Rv32HintStoreFiller::new(tester.address_bits(), bitwise_chip.clone()),
+        tester.memory_helper(),
+    );
+
+    let harness = Harness::<RA>::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-    let write_data = read_data;
-    assert_eq!(write_data, tester.read::<4>(2, mem_ptr as usize));
+    (harness, (bitwise_chip.air, bitwise_chip))
 }
 
-fn set_and_execute_buffer(
+fn set_and_execute<RA: Arena>(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut Rv32HintStoreChip<F>,
+    harness: &mut Harness<RA>,
     rng: &mut StdRng,
     opcode: Rv32HintStoreOpcode,
-) {
-    let mem_ptr = rng.gen_range(
-        0..(1
-            << (tester
-                .memory_controller()
-                .borrow()
-                .mem_config()
-                .pointer_max_bits
-                - 2)),
-    ) << 2;
-    let b = gen_pointer(rng, 4);
-
-    tester.write(1, b, decompose(mem_ptr));
-
-    let num_words = rng.gen_range(1..20);
-    let a = gen_pointer(rng, 4);
-    tester.write(1, a, decompose(num_words));
-
-    let data: Vec<[F; RV32_REGISTER_NUM_LIMBS]> = (0..num_words)
-        .map(|_| array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..(1 << RV32_CELL_BITS)))))
-        .collect();
-    for i in 0..num_words {
-        for datum in data[i as usize] {
-            chip.streams
-                .get()
-                .unwrap()
-                .lock()
-                .unwrap()
-                .hint_stream
-                .push_back(datum);
-        }
+) where
+    Rv32HintStoreExecutor: PreflightExecutor<F, RA>,
+{
+    let num_words = match opcode {
+        HINT_STOREW => 1,
+        HINT_BUFFER => rng.gen_range(1..28),
+    } as u32;
+
+    let a = if opcode == HINT_BUFFER {
+        let a = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+        tester.write(
+            RV32_REGISTER_AS as usize,
+            a,
+            num_words.to_le_bytes().map(F::from_canonical_u8),
+        );
+        a
+    } else {
+        0
+    };
+
+    let mem_ptr = gen_pointer(rng, 4) as u32;
+    let b = gen_pointer(rng, RV32_REGISTER_NUM_LIMBS);
+    tester.write(1, b, mem_ptr.to_le_bytes().map(F::from_canonical_u8));
+
+    let mut input = Vec::with_capacity(num_words as usize * 4);
+    for _ in 0..num_words {
+        let data = rng.next_u32().to_le_bytes().map(F::from_canonical_u8);
+        input.extend(data);
+        tester.streams.hint_stream.extend(data);
     }
 
     tester.execute(
-        chip,
-        &Instruction::from_usize(VmOpcode::from_usize(opcode as usize), [a, b, 0, 1, 2]),
+        harness,
+        &Instruction::from_usize(
+            opcode.global_opcode(),
+            [a, b, 0, RV32_REGISTER_AS as usize, RV32_MEMORY_AS as usize],
+        ),
     );
 
-    for i in 0..num_words {
-        assert_eq!(
-            data[i as usize],
-            tester.read::<4>(2, mem_ptr as usize + (i as usize * RV32_REGISTER_NUM_LIMBS))
-        );
+    for idx in 0..num_words as usize {
+        let data = tester.read::<4>(RV32_MEMORY_AS as usize, mem_ptr as usize + idx * 4);
+
+        let expected: [F; 4] = input[idx * 4..(idx + 1) * 4].try_into().unwrap();
+        assert_eq!(data, expected);
     }
 }
 
@@ -131,39 +124,28 @@ fn set_and_execute_buffer(
 /// Randomly generate computations and execute, ensuring that the generated trace
 /// passes all constraints.
 ///////////////////////////////////////////////////////////////////////////////////////
+
 #[test]
 fn rand_hintstore_test() {
-    setup_tracing();
     let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
 
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-
-    let mut chip = Rv32HintStoreChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        bitwise_chip.clone(),
-        tester.memory_bridge(),
-        tester.offline_memory_mutex_arc(),
-        tester.address_bits(),
-        0,
-    );
-    chip.set_streams(Arc::new(Mutex::new(Streams::default())));
-
-    let num_tests: usize = 8;
-    for _ in 0..num_tests {
-        if rng.gen_bool(0.5) {
-            set_and_execute(&mut tester, &mut chip, &mut rng, HINT_STOREW);
+    let (mut harness, bitwise) = create_test_chip(&mut tester);
+    let num_ops: usize = 100;
+    for _ in 0..num_ops {
+        let opcode = if rng.gen_bool(0.5) {
+            HINT_STOREW
         } else {
-            set_and_execute_buffer(&mut tester, &mut chip, &mut rng, HINT_BUFFER);
-        }
+            HINT_BUFFER
+        };
+        set_and_execute(&mut tester, &mut harness, &mut rng, opcode);
     }
 
-    drop(range_checker_chip);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
@@ -171,64 +153,44 @@ fn rand_hintstore_test() {
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adaptor is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
 #[allow(clippy::too_many_arguments)]
 fn run_negative_hintstore_test(
     opcode: Rv32HintStoreOpcode,
-    data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
-    expected_error: VerificationError,
+    prank_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    interaction_error: bool,
 ) {
     let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
+    let (mut harness, bitwise) = create_test_chip(&mut tester);
 
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-
-    let mut chip = Rv32HintStoreChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        bitwise_chip.clone(),
-        tester.memory_bridge(),
-        tester.offline_memory_mutex_arc(),
-        tester.address_bits(),
-        0,
-    );
-    chip.set_streams(Arc::new(Mutex::new(Streams::default())));
-
-    set_and_execute(&mut tester, &mut chip, &mut rng, opcode);
+    set_and_execute(&mut tester, &mut harness, &mut rng, opcode);
 
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut trace_row = trace.row_slice(0).to_vec();
         let cols: &mut Rv32HintStoreCols<F> = trace_row.as_mut_slice().borrow_mut();
-        if let Some(data) = data {
+        if let Some(data) = prank_data {
             cols.data = data.map(F::from_canonical_u32);
         }
         *trace = RowMajorMatrix::new(trace_row, trace.width());
     };
 
-    drop(range_checker_chip);
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
-        .load(bitwise_chip)
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
         .finalize();
-    tester.simple_test_with_expected_error(expected_error);
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
 fn negative_hintstore_tests() {
-    run_negative_hintstore_test(
-        HINT_STOREW,
-        Some([92, 187, 45, 280]),
-        VerificationError::ChallengePhaseError,
-    );
+    run_negative_hintstore_test(HINT_STOREW, Some([92, 187, 45, 280]), true);
 }
+
 ///////////////////////////////////////////////////////////////////////////////////////
 /// SANITY TESTS
 ///
@@ -239,22 +201,47 @@ fn execute_roundtrip_sanity_test() {
     let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
 
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+    let (mut harness, _) = create_test_chip::<MatrixRecordArena<F>>(&mut tester);
 
-    let mut chip = Rv32HintStoreChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        bitwise_chip.clone(),
-        tester.memory_bridge(),
-        tester.offline_memory_mutex_arc(),
-        tester.address_bits(),
-        0,
-    );
-    chip.set_streams(Arc::new(Mutex::new(Streams::default())));
+    let num_ops: usize = 10;
+    for _ in 0..num_ops {
+        set_and_execute(&mut tester, &mut harness, &mut rng, HINT_STOREW);
+    }
+}
+
+///////////////////////////////////////////////////////////////////////////////////////
+/// DENSE TESTS
+///
+/// Ensure that the chip works as expected with dense records.
+/// We first execute some instructions with a [DenseRecordArena] and transfer the records
+/// to a [MatrixRecordArena]. After transferring we generate the trace and make sure that
+/// all the constraints pass.
+///////////////////////////////////////////////////////////////////////////////////////
+
+#[test]
+fn dense_record_arena_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut sparse_harness, bitwise) = create_test_chip::<MatrixRecordArena<F>>(&mut tester);
+
+    {
+        let mut dense_harness = create_test_chip::<DenseRecordArena>(&mut tester).0;
 
-    let num_tests: usize = 100;
-    for _ in 0..num_tests {
-        set_and_execute(&mut tester, &mut chip, &mut rng, HINT_STOREW);
+        let num_ops: usize = 100;
+        for _ in 0..num_ops {
+            set_and_execute(&mut tester, &mut dense_harness, &mut rng, HINT_STOREW);
+        }
+
+        let mut record_interpreter = dense_harness
+            .arena
+            .get_record_seeker::<_, Rv32HintStoreLayout>();
+        record_interpreter.transfer_to_matrix_arena(&mut sparse_harness.arena);
     }
+
+    let tester = tester
+        .build()
+        .load(sparse_harness)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
 }
diff --git a/extensions/rv32im/circuit/src/jal_lui/core.rs b/extensions/rv32im/circuit/src/jal_lui/core.rs
index 2ba10e615e..40cf6f5c15 100644
--- a/extensions/rv32im/circuit/src/jal_lui/core.rs
+++ b/extensions/rv32im/circuit/src/jal_lui/core.rs
@@ -1,14 +1,12 @@
-use std::{
-    array,
-    borrow::{Borrow, BorrowMut},
-};
+use std::borrow::{Borrow, BorrowMut};
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, ImmInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
-use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+use openvm_circuit_primitives::{
+    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
@@ -23,9 +21,13 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
 
-use crate::adapters::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS, RV_J_TYPE_IMM_BITS};
+use crate::adapters::{
+    Rv32CondRdWriteAdapterExecutor, Rv32CondRdWriteAdapterFiller, RV32_CELL_BITS,
+    RV32_REGISTER_NUM_LIMBS, RV_J_TYPE_IMM_BITS,
+};
+
+pub(super) const ADDITIONAL_BITS: u32 = 0b11000000;
 
 #[repr(C)]
 #[derive(Debug, Clone, AlignedBorrow)]
@@ -36,7 +38,7 @@ pub struct Rv32JalLuiCoreCols<T> {
     pub is_lui: T,
 }
 
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, Copy, derive_new::new)]
 pub struct Rv32JalLuiCoreAir {
     pub bus: BitwiseOperationLookupBus,
 }
@@ -141,134 +143,127 @@ where
 }
 
 #[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Field")]
-pub struct Rv32JalLuiCoreRecord<F: Field> {
-    pub rd_data: [F; RV32_REGISTER_NUM_LIMBS],
-    pub imm: F,
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32JalLuiCoreRecord {
+    pub imm: u32,
+    pub rd_data: [u8; RV32_REGISTER_NUM_LIMBS],
     pub is_jal: bool,
-    pub is_lui: bool,
 }
 
-pub struct Rv32JalLuiCoreChip {
-    pub air: Rv32JalLuiCoreAir,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32JalLuiExecutor<A = Rv32CondRdWriteAdapterExecutor> {
+    pub adapter: A,
 }
 
-impl Rv32JalLuiCoreChip {
-    pub fn new(bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>) -> Self {
-        Self {
-            air: Rv32JalLuiCoreAir {
-                bus: bitwise_lookup_chip.bus(),
-            },
-            bitwise_lookup_chip,
-        }
-    }
+#[derive(Clone, derive_new::new)]
+pub struct Rv32JalLuiFiller<A = Rv32CondRdWriteAdapterFiller> {
+    adapter: A,
+    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>> VmCoreChip<F, I> for Rv32JalLuiCoreChip
+impl<F, A, RA> PreflightExecutor<F, RA> for Rv32JalLuiExecutor<A>
 where
-    I::Writes: From<[[F; RV32_REGISTER_NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + for<'a> AdapterTraceExecutor<F, ReadData = (), WriteData = [u8; RV32_REGISTER_NUM_LIMBS]>,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut Rv32JalLuiCoreRecord),
+    >,
 {
-    type Record = Rv32JalLuiCoreRecord<F>;
-    type Air = Rv32JalLuiCoreAir;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            Rv32JalLuiOpcode::from_usize(opcode - Rv32JalLuiOpcode::CLASS_OFFSET)
+        )
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_pc: u32,
-        _reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let local_opcode = Rv32JalLuiOpcode::from_usize(
-            instruction
-                .opcode
-                .local_opcode_idx(Rv32JalLuiOpcode::CLASS_OFFSET),
-        );
-        let imm = instruction.c;
-
-        let signed_imm = match local_opcode {
-            JAL => {
-                // Note: signed_imm is a signed integer and imm is a field element
-                (imm + F::from_canonical_u32(1 << (RV_J_TYPE_IMM_BITS - 1))).as_canonical_u32()
-                    as i32
-                    - (1 << (RV_J_TYPE_IMM_BITS - 1))
-            }
-            LUI => imm.as_canonical_u32() as i32,
-        };
-        let (to_pc, rd_data) = run_jal_lui(local_opcode, from_pc, signed_imm);
+    ) -> Result<(), ExecutionError> {
+        let &Instruction { opcode, c: imm, .. } = instruction;
 
-        for i in 0..(RV32_REGISTER_NUM_LIMBS / 2) {
-            self.bitwise_lookup_chip
-                .request_range(rd_data[i * 2], rd_data[i * 2 + 1]);
-        }
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        if local_opcode == JAL {
-            let last_limb_bits = PC_BITS - RV32_CELL_BITS * (RV32_REGISTER_NUM_LIMBS - 1);
-            let additional_bits = (last_limb_bits..RV32_CELL_BITS).fold(0, |acc, x| acc + (1 << x));
-            self.bitwise_lookup_chip
-                .request_xor(rd_data[3], additional_bits);
-        }
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        let rd_data = rd_data.map(F::from_canonical_u32);
+        let is_jal = opcode.local_opcode_idx(Rv32JalLuiOpcode::CLASS_OFFSET) == JAL as usize;
+        let signed_imm = get_signed_imm(is_jal, imm);
 
-        let output = AdapterRuntimeContext {
-            to_pc: Some(to_pc),
-            writes: [rd_data].into(),
-        };
-
-        Ok((
-            output,
-            Rv32JalLuiCoreRecord {
-                rd_data,
-                imm,
-                is_jal: local_opcode == JAL,
-                is_lui: local_opcode == LUI,
-            },
-        ))
-    }
+        let (to_pc, rd_data) = run_jal_lui(is_jal, *state.pc, signed_imm);
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            Rv32JalLuiOpcode::from_usize(opcode - Rv32JalLuiOpcode::CLASS_OFFSET)
-        )
+        core_record.imm = imm.as_canonical_u32();
+        core_record.rd_data = rd_data;
+        core_record.is_jal = is_jal;
+
+        self.adapter
+            .write(state.memory, instruction, rd_data, &mut adapter_record);
+
+        *state.pc = to_pc;
+
+        Ok(())
     }
+}
+
+impl<F, A> TraceFiller<F> for Rv32JalLuiFiller<A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &Rv32JalLuiCoreRecord = unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut Rv32JalLuiCoreCols<F> = core_row.borrow_mut();
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let core_cols: &mut Rv32JalLuiCoreCols<F> = row_slice.borrow_mut();
-        core_cols.rd_data = record.rd_data;
-        core_cols.imm = record.imm;
-        core_cols.is_jal = F::from_bool(record.is_jal);
-        core_cols.is_lui = F::from_bool(record.is_lui);
+        for pair in record.rd_data.chunks_exact(2) {
+            self.bitwise_lookup_chip
+                .request_range(pair[0] as u32, pair[1] as u32);
+        }
+        if record.is_jal {
+            self.bitwise_lookup_chip
+                .request_xor(record.rd_data[3] as u32, ADDITIONAL_BITS);
+        }
+
+        // Writing in reverse order
+        core_row.is_lui = F::from_bool(!record.is_jal);
+        core_row.is_jal = F::from_bool(record.is_jal);
+        core_row.rd_data = record.rd_data.map(F::from_canonical_u8);
+        core_row.imm = F::from_canonical_u32(record.imm);
     }
+}
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+// returns the canonical signed representation of the immediate
+// `imm` can be "negative" as a field element
+pub(super) fn get_signed_imm<F: PrimeField32>(is_jal: bool, imm: F) -> i32 {
+    let imm_f = imm.as_canonical_u32();
+    if is_jal {
+        if imm_f < (1 << (RV_J_TYPE_IMM_BITS - 1)) {
+            imm_f as i32
+        } else {
+            let neg_imm_f = F::ORDER_U32 - imm_f;
+            debug_assert!(neg_imm_f < (1 << (RV_J_TYPE_IMM_BITS - 1)));
+            -(neg_imm_f as i32)
+        }
+    } else {
+        imm_f as i32
     }
 }
 
 // returns (to_pc, rd_data)
-pub(super) fn run_jal_lui(
-    opcode: Rv32JalLuiOpcode,
-    pc: u32,
-    imm: i32,
-) -> (u32, [u32; RV32_REGISTER_NUM_LIMBS]) {
-    match opcode {
-        JAL => {
-            let rd_data = array::from_fn(|i| {
-                ((pc + DEFAULT_PC_STEP) >> (8 * i)) & ((1 << RV32_CELL_BITS) - 1)
-            });
-            let next_pc = pc as i32 + imm;
-            assert!(next_pc >= 0);
-            (next_pc as u32, rd_data)
-        }
-        LUI => {
-            let imm = imm as u32;
-            let rd = imm << 12;
-            let rd_data =
-                array::from_fn(|i| (rd >> (RV32_CELL_BITS * i)) & ((1 << RV32_CELL_BITS) - 1));
-            (pc + DEFAULT_PC_STEP, rd_data)
-        }
+#[inline(always)]
+pub(super) fn run_jal_lui(is_jal: bool, pc: u32, imm: i32) -> (u32, [u8; RV32_REGISTER_NUM_LIMBS]) {
+    if is_jal {
+        let rd_data = (pc + DEFAULT_PC_STEP).to_le_bytes();
+        let next_pc = pc as i32 + imm;
+        debug_assert!(next_pc >= 0);
+        (next_pc as u32, rd_data)
+    } else {
+        let imm = imm as u32;
+        let rd = imm << 12;
+        (pc + DEFAULT_PC_STEP, rd.to_le_bytes())
     }
 }
diff --git a/extensions/rv32im/circuit/src/jal_lui/execution.rs b/extensions/rv32im/circuit/src/jal_lui/execution.rs
new file mode 100644
index 0000000000..129fe32202
--- /dev/null
+++ b/extensions/rv32im/circuit/src/jal_lui/execution.rs
@@ -0,0 +1,168 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_AS, LocalOpcode,
+};
+use openvm_rv32im_transpiler::Rv32JalLuiOpcode::{self, JAL};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::{get_signed_imm, Rv32JalLuiExecutor};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct JalLuiPreCompute {
+    signed_imm: i32,
+    a: u8,
+}
+
+impl<A> Rv32JalLuiExecutor<A> {
+    /// Return (IS_JAL, ENABLED)
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        inst: &Instruction<F>,
+        data: &mut JalLuiPreCompute,
+    ) -> Result<(bool, bool), StaticProgramError> {
+        let local_opcode = Rv32JalLuiOpcode::from_usize(
+            inst.opcode.local_opcode_idx(Rv32JalLuiOpcode::CLASS_OFFSET),
+        );
+        let is_jal = local_opcode == JAL;
+        let signed_imm = get_signed_imm(is_jal, inst.c);
+
+        *data = JalLuiPreCompute {
+            signed_imm,
+            a: inst.a.as_canonical_u32() as u8,
+        };
+        let enabled = !inst.f.is_zero();
+        Ok((is_jal, enabled))
+    }
+}
+
+impl<F, A> Executor<F> for Rv32JalLuiExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<JalLuiPreCompute>()
+    }
+
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let data: &mut JalLuiPreCompute = data.borrow_mut();
+        let (is_jal, enabled) = self.pre_compute_impl(inst, data)?;
+        let fn_ptr = match (is_jal, enabled) {
+            (true, true) => execute_e1_impl::<_, _, true, true>,
+            (true, false) => execute_e1_impl::<_, _, true, false>,
+            (false, true) => execute_e1_impl::<_, _, false, true>,
+            (false, false) => execute_e1_impl::<_, _, false, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A> MeteredExecutor<F> for Rv32JalLuiExecutor<A>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<JalLuiPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<JalLuiPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let (is_jal, enabled) = self.pre_compute_impl(inst, &mut data.data)?;
+        let fn_ptr = match (is_jal, enabled) {
+            (true, true) => execute_e2_impl::<_, _, true, true>,
+            (true, false) => execute_e2_impl::<_, _, true, false>,
+            (false, true) => execute_e2_impl::<_, _, false, true>,
+            (false, false) => execute_e2_impl::<_, _, false, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const IS_JAL: bool,
+    const ENABLED: bool,
+>(
+    pre_compute: &JalLuiPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let JalLuiPreCompute { a, signed_imm } = *pre_compute;
+
+    let rd = if IS_JAL {
+        let rd_data = (vm_state.pc + DEFAULT_PC_STEP).to_le_bytes();
+        let next_pc = vm_state.pc as i32 + signed_imm;
+        debug_assert!(next_pc >= 0);
+        vm_state.pc = next_pc as u32;
+        rd_data
+    } else {
+        let imm = signed_imm as u32;
+        let rd = imm << 12;
+        vm_state.pc += DEFAULT_PC_STEP;
+        rd.to_le_bytes()
+    };
+
+    if ENABLED {
+        vm_state.vm_write(RV32_REGISTER_AS, a as u32, &rd);
+    }
+
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const IS_JAL: bool,
+    const ENABLED: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &JalLuiPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, IS_JAL, ENABLED>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const IS_JAL: bool,
+    const ENABLED: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<JalLuiPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, IS_JAL, ENABLED>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/rv32im/circuit/src/jal_lui/mod.rs b/extensions/rv32im/circuit/src/jal_lui/mod.rs
index 779b710bea..7116fdf164 100644
--- a/extensions/rv32im/circuit/src/jal_lui/mod.rs
+++ b/extensions/rv32im/circuit/src/jal_lui/mod.rs
@@ -1,11 +1,13 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use crate::adapters::Rv32CondRdWriteAdapterChip;
+use crate::adapters::Rv32CondRdWriteAdapterAir;
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
-pub type Rv32JalLuiChip<F> = VmChipWrapper<F, Rv32CondRdWriteAdapterChip<F>, Rv32JalLuiCoreChip>;
+pub type Rv32JalLuiAir = VmAirWrapper<Rv32CondRdWriteAdapterAir, Rv32JalLuiCoreAir>;
+pub type Rv32JalLuiChip<F> = VmChipWrapper<F, Rv32JalLuiFiller>;
diff --git a/extensions/rv32im/circuit/src/jal_lui/tests.rs b/extensions/rv32im/circuit/src/jal_lui/tests.rs
index 35e258cbfb..2751b9eedd 100644
--- a/extensions/rv32im/circuit/src/jal_lui/tests.rs
+++ b/extensions/rv32im/circuit/src/jal_lui/tests.rs
@@ -1,41 +1,85 @@
-use std::borrow::BorrowMut;
+use std::{borrow::BorrowMut, sync::Arc};
 
-use openvm_circuit::arch::{
-    testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-    VmAdapterChip,
-};
+use openvm_circuit::arch::testing::{TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
 use openvm_instructions::{instruction::Instruction, program::PC_BITS, LocalOpcode};
 use openvm_rv32im_transpiler::Rv32JalLuiOpcode::{self, *};
 use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{FieldAlgebra, PrimeField32},
-    p3_matrix::{dense::RowMajorMatrix, Matrix},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    Chip, ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::{run_jal_lui, Rv32JalLuiChip, Rv32JalLuiCoreChip};
+use super::{run_jal_lui, Rv32JalLuiChip, Rv32JalLuiCoreAir, Rv32JalLuiExecutor};
 use crate::{
     adapters::{
-        Rv32CondRdWriteAdapterChip, Rv32CondRdWriteAdapterCols, RV32_CELL_BITS,
-        RV32_REGISTER_NUM_LIMBS, RV_IS_TYPE_IMM_BITS,
+        Rv32CondRdWriteAdapterAir, Rv32CondRdWriteAdapterCols, Rv32CondRdWriteAdapterExecutor,
+        Rv32CondRdWriteAdapterFiller, Rv32RdWriteAdapterAir, Rv32RdWriteAdapterExecutor,
+        Rv32RdWriteAdapterFiller, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS, RV_IS_TYPE_IMM_BITS,
     },
-    jal_lui::Rv32JalLuiCoreCols,
+    jal_lui::{Rv32JalLuiCoreCols, ADDITIONAL_BITS},
+    test_utils::get_verification_error,
+    Rv32JalLuiAir, Rv32JalLuiFiller,
 };
 
 const IMM_BITS: usize = 20;
 const LIMB_MAX: u32 = (1 << RV32_CELL_BITS) - 1;
+const MAX_INS_CAPACITY: usize = 128;
+type Harness = TestChipHarness<F, Rv32JalLuiExecutor, Rv32JalLuiAir, Rv32JalLuiChip<F>>;
+
 type F = BabyBear;
 
+fn create_test_chip(
+    tester: &VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let air = Rv32JalLuiAir::new(
+        Rv32CondRdWriteAdapterAir::new(Rv32RdWriteAdapterAir::new(
+            tester.memory_bridge(),
+            tester.execution_bridge(),
+        )),
+        Rv32JalLuiCoreAir::new(bitwise_bus),
+    );
+    let executor = Rv32JalLuiExecutor::new(Rv32CondRdWriteAdapterExecutor::new(
+        Rv32RdWriteAdapterExecutor,
+    ));
+    let chip = Rv32JalLuiChip::<F>::new(
+        Rv32JalLuiFiller::new(
+            Rv32CondRdWriteAdapterFiller::new(Rv32RdWriteAdapterFiller),
+            bitwise_chip.clone(),
+        ),
+        tester.memory_helper(),
+    );
+
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
 fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut Rv32JalLuiChip<F>,
+    harness: &mut Harness,
     rng: &mut StdRng,
     opcode: Rv32JalLuiOpcode,
     imm: Option<i32>,
@@ -51,7 +95,7 @@ fn set_and_execute(
     let needs_write = a != 0 || opcode == LUI;
 
     tester.execute_with_pc(
-        chip,
+        harness,
         &Instruction::large_from_isize(
             opcode.global_opcode(),
             a as isize,
@@ -67,11 +111,11 @@ fn set_and_execute(
     let initial_pc = tester.execution.last_from_pc().as_canonical_u32();
     let final_pc = tester.execution.last_to_pc().as_canonical_u32();
 
-    let (next_pc, rd_data) = run_jal_lui(opcode, initial_pc, imm);
+    let (next_pc, rd_data) = run_jal_lui(opcode == JAL, initial_pc, imm);
     let rd_data = if needs_write { rd_data } else { [0; 4] };
 
     assert_eq!(next_pc, final_pc);
-    assert_eq!(rd_data.map(F::from_canonical_u32), tester.read::<4>(1, a));
+    assert_eq!(rd_data.map(F::from_canonical_u8), tester.read::<4>(1, a));
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////
@@ -81,118 +125,98 @@ fn set_and_execute(
 /// passes all constraints.
 ///////////////////////////////////////////////////////////////////////////////////////
 
-#[test]
-fn rand_jal_lui_test() {
+#[test_case(JAL, 100)]
+#[test_case(LUI, 100)]
+fn rand_jal_lui_test(opcode: Rv32JalLuiOpcode, num_ops: usize) {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
     let mut tester = VmChipTestBuilder::default();
-    let adapter = Rv32CondRdWriteAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let core = Rv32JalLuiCoreChip::new(bitwise_chip.clone());
-    let mut chip = Rv32JalLuiChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
+    let (mut harness, bitwise) = create_test_chip(&tester);
 
-    let num_tests: usize = 100;
-    for _ in 0..num_tests {
-        set_and_execute(&mut tester, &mut chip, &mut rng, JAL, None, None);
-        set_and_execute(&mut tester, &mut chip, &mut rng, LUI, None, None);
+    for _ in 0..num_ops {
+        set_and_execute(&mut tester, &mut harness, &mut rng, opcode, None, None);
     }
 
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adaptor is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
+#[derive(Clone, Copy, Default, PartialEq)]
+struct JalLuiPrankValues {
+    pub rd_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    pub imm: Option<i32>,
+    pub is_jal: Option<bool>,
+    pub is_lui: Option<bool>,
+    pub needs_write: Option<bool>,
+}
+
 #[allow(clippy::too_many_arguments)]
 fn run_negative_jal_lui_test(
     opcode: Rv32JalLuiOpcode,
     initial_imm: Option<i32>,
     initial_pc: Option<u32>,
-    rd_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
-    imm: Option<i32>,
-    is_jal: Option<bool>,
-    is_lui: Option<bool>,
-    needs_write: Option<bool>,
-    expected_error: VerificationError,
+    prank_vals: JalLuiPrankValues,
+    interaction_error: bool,
 ) {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
     let mut tester = VmChipTestBuilder::default();
-    let adapter = Rv32CondRdWriteAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let adapter_width = BaseAir::<F>::width(adapter.air());
-    let core = Rv32JalLuiCoreChip::new(bitwise_chip.clone());
-    let mut chip = Rv32JalLuiChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
+    let (mut harness, bitwise) = create_test_chip(&tester);
 
     set_and_execute(
         &mut tester,
-        &mut chip,
+        &mut harness,
         &mut rng,
         opcode,
         initial_imm,
         initial_pc,
     );
 
-    let tester = tester.build();
-
-    let jal_lui_trace_width = chip.trace_width();
-    let air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    let jal_lui_trace = chip_input.raw.common_main.as_mut().unwrap();
-    {
-        let mut trace_row = jal_lui_trace.row_slice(0).to_vec();
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+    let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
+        let mut trace_row = trace.row_slice(0).to_vec();
         let (adapter_row, core_row) = trace_row.split_at_mut(adapter_width);
-
         let adapter_cols: &mut Rv32CondRdWriteAdapterCols<F> = adapter_row.borrow_mut();
         let core_cols: &mut Rv32JalLuiCoreCols<F> = core_row.borrow_mut();
 
-        if let Some(data) = rd_data {
+        if let Some(data) = prank_vals.rd_data {
             core_cols.rd_data = data.map(F::from_canonical_u32);
         }
-
-        if let Some(imm) = imm {
+        if let Some(imm) = prank_vals.imm {
             core_cols.imm = if imm < 0 {
                 F::NEG_ONE * F::from_canonical_u32((-imm) as u32)
             } else {
                 F::from_canonical_u32(imm as u32)
             };
         }
-        if let Some(is_jal) = is_jal {
+        if let Some(is_jal) = prank_vals.is_jal {
             core_cols.is_jal = F::from_bool(is_jal);
         }
-        if let Some(is_lui) = is_lui {
+        if let Some(is_lui) = prank_vals.is_lui {
             core_cols.is_lui = F::from_bool(is_lui);
         }
-
-        if let Some(needs_write) = needs_write {
+        if let Some(needs_write) = prank_vals.needs_write {
             adapter_cols.needs_write = F::from_bool(needs_write);
         }
 
-        *jal_lui_trace = RowMajorMatrix::new(trace_row, jal_lui_trace_width);
-    }
+        *trace = RowMajorMatrix::new(trace_row, trace.width());
+    };
 
     disable_debug_builder();
     let tester = tester
-        .load_air_proof_input((air, chip_input))
-        .load(bitwise_chip)
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
         .finalize();
-    tester.simple_test_with_expected_error(expected_error);
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
@@ -201,34 +225,35 @@ fn opcode_flag_negative_test() {
         JAL,
         None,
         None,
-        None,
-        None,
-        Some(false),
-        Some(true),
-        None,
-        VerificationError::OodEvaluationMismatch,
+        JalLuiPrankValues {
+            is_jal: Some(false),
+            is_lui: Some(true),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_jal_lui_test(
         JAL,
         None,
         None,
-        None,
-        None,
-        Some(false),
-        Some(false),
-        Some(false),
-        VerificationError::ChallengePhaseError,
+        JalLuiPrankValues {
+            is_jal: Some(false),
+            is_lui: Some(false),
+            needs_write: Some(false),
+            ..Default::default()
+        },
+        true,
     );
     run_negative_jal_lui_test(
         LUI,
         None,
         None,
-        None,
-        None,
-        Some(true),
-        Some(false),
-        None,
-        VerificationError::OodEvaluationMismatch,
+        JalLuiPrankValues {
+            is_jal: Some(true),
+            is_lui: Some(false),
+            ..Default::default()
+        },
+        false,
     );
 }
 
@@ -238,67 +263,61 @@ fn overflow_negative_tests() {
         JAL,
         None,
         None,
-        Some([LIMB_MAX, LIMB_MAX, LIMB_MAX, LIMB_MAX]),
-        None,
-        None,
-        None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        JalLuiPrankValues {
+            rd_data: Some([LIMB_MAX, LIMB_MAX, LIMB_MAX, LIMB_MAX]),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_jal_lui_test(
         LUI,
         None,
         None,
-        Some([LIMB_MAX, LIMB_MAX, LIMB_MAX, LIMB_MAX]),
-        None,
-        None,
-        None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        JalLuiPrankValues {
+            rd_data: Some([LIMB_MAX, LIMB_MAX, LIMB_MAX, LIMB_MAX]),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_jal_lui_test(
         LUI,
         None,
         None,
-        Some([0, LIMB_MAX, LIMB_MAX, LIMB_MAX + 1]),
-        None,
-        None,
-        None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        JalLuiPrankValues {
+            rd_data: Some([0, LIMB_MAX, LIMB_MAX, LIMB_MAX + 1]),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_jal_lui_test(
         LUI,
         None,
         None,
-        None,
-        Some(-1),
-        None,
-        None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        JalLuiPrankValues {
+            imm: Some(-1),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_jal_lui_test(
         LUI,
         None,
         None,
-        None,
-        Some(-28),
-        None,
-        None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        JalLuiPrankValues {
+            imm: Some(-28),
+            ..Default::default()
+        },
+        false,
     );
     run_negative_jal_lui_test(
         JAL,
         None,
         Some(251),
-        Some([F::NEG_ONE.as_canonical_u32(), 1, 0, 0]),
-        None,
-        None,
-        None,
-        None,
-        VerificationError::ChallengePhaseError,
+        JalLuiPrankValues {
+            rd_data: Some([F::NEG_ONE.as_canonical_u32(), 1, 0, 0]),
+            ..Default::default()
+        },
+        true,
     );
 }
 
@@ -307,29 +326,16 @@ fn overflow_negative_tests() {
 ///
 /// Ensure that solve functions produce the correct results.
 ///////////////////////////////////////////////////////////////////////////////////////
+
 #[test]
 fn execute_roundtrip_sanity_test() {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
     let mut tester = VmChipTestBuilder::default();
-    let adapter = Rv32CondRdWriteAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let core = Rv32JalLuiCoreChip::new(bitwise_chip);
-    let mut chip = Rv32JalLuiChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
-    let num_tests: usize = 10;
-    for _ in 0..num_tests {
-        set_and_execute(&mut tester, &mut chip, &mut rng, JAL, None, None);
-        set_and_execute(&mut tester, &mut chip, &mut rng, LUI, None, None);
-    }
+    let (mut harness, _) = create_test_chip(&tester);
 
     set_and_execute(
         &mut tester,
-        &mut chip,
+        &mut harness,
         &mut rng,
         LUI,
         Some((1 << IMM_BITS) - 1),
@@ -337,7 +343,7 @@ fn execute_roundtrip_sanity_test() {
     );
     set_and_execute(
         &mut tester,
-        &mut chip,
+        &mut harness,
         &mut rng,
         JAL,
         Some((1 << RV_IS_TYPE_IMM_BITS) - 1),
@@ -347,20 +353,25 @@ fn execute_roundtrip_sanity_test() {
 
 #[test]
 fn run_jal_sanity_test() {
-    let opcode = JAL;
     let initial_pc = 28120;
     let imm = -2048;
-    let (next_pc, rd_data) = run_jal_lui(opcode, initial_pc, imm);
+    let (next_pc, rd_data) = run_jal_lui(true, initial_pc, imm);
     assert_eq!(next_pc, 26072);
     assert_eq!(rd_data, [220, 109, 0, 0]);
 }
 
 #[test]
 fn run_lui_sanity_test() {
-    let opcode = LUI;
     let initial_pc = 456789120;
     let imm = 853679;
-    let (next_pc, rd_data) = run_jal_lui(opcode, initial_pc, imm);
+    let (next_pc, rd_data) = run_jal_lui(false, initial_pc, imm);
     assert_eq!(next_pc, 456789124);
     assert_eq!(rd_data, [0, 240, 106, 208]);
 }
+
+#[test]
+fn test_additional_bits() {
+    let last_limb_bits = PC_BITS - RV32_CELL_BITS * (RV32_REGISTER_NUM_LIMBS - 1);
+    let additional_bits = (last_limb_bits..RV32_CELL_BITS).fold(0, |acc, x| acc + (1u32 << x));
+    assert_eq!(additional_bits, ADDITIONAL_BITS);
+}
diff --git a/extensions/rv32im/circuit/src/jalr/core.rs b/extensions/rv32im/circuit/src/jalr/core.rs
index fd89c1e317..15a102410a 100644
--- a/extensions/rv32im/circuit/src/jalr/core.rs
+++ b/extensions/rv32im/circuit/src/jalr/core.rs
@@ -3,13 +3,14 @@ use std::{
     borrow::{Borrow, BorrowMut},
 };
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, Result, SignedImmInstruction, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
 use openvm_instructions::{
@@ -24,11 +25,10 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
 
-use crate::adapters::{compose, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
-
-const RV32_LIMB_MAX: u32 = (1 << RV32_CELL_BITS) - 1;
+use crate::adapters::{
+    Rv32JalrAdapterExecutor, Rv32JalrAdapterFiller, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+};
 
 #[repr(C)]
 #[derive(Debug, Clone, AlignedBorrow)]
@@ -46,18 +46,7 @@ pub struct Rv32JalrCoreCols<T> {
     pub imm_sign: T,
 }
 
-#[repr(C)]
-#[derive(Serialize, Deserialize)]
-pub struct Rv32JalrCoreRecord<F> {
-    pub imm: F,
-    pub rs1_data: [F; RV32_REGISTER_NUM_LIMBS],
-    pub rd_data: [F; RV32_REGISTER_NUM_LIMBS - 1],
-    pub to_pc_least_sig_bit: F,
-    pub to_pc_limbs: [u32; 2],
-    pub imm_sign: F,
-}
-
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, derive_new::new)]
 pub struct Rv32JalrCoreAir {
     pub bitwise_lookup_bus: BitwiseOperationLookupBus,
     pub range_bus: VariableRangeCheckerBus,
@@ -181,127 +170,156 @@ where
     }
 }
 
-pub struct Rv32JalrCoreChip {
-    pub air: Rv32JalrCoreAir,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct Rv32JalrCoreRecord {
+    pub imm: u16,
+    pub from_pc: u32,
+    pub rs1_val: u32,
+    pub imm_sign: bool,
+}
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct Rv32JalrExecutor<A = Rv32JalrAdapterExecutor> {
+    adapter: A,
+}
+
+#[derive(Clone)]
+pub struct Rv32JalrFiller<A = Rv32JalrAdapterFiller> {
+    adapter: A,
     pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
     pub range_checker_chip: SharedVariableRangeCheckerChip,
 }
 
-impl Rv32JalrCoreChip {
+impl<A> Rv32JalrFiller<A> {
     pub fn new(
+        adapter: A,
         bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
         range_checker_chip: SharedVariableRangeCheckerChip,
     ) -> Self {
         assert!(range_checker_chip.range_max_bits() >= 16);
         Self {
-            air: Rv32JalrCoreAir {
-                bitwise_lookup_bus: bitwise_lookup_chip.bus(),
-                range_bus: range_checker_chip.bus(),
-            },
+            adapter,
             bitwise_lookup_chip,
             range_checker_chip,
         }
     }
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>> VmCoreChip<F, I> for Rv32JalrCoreChip
+impl<F, A, RA> PreflightExecutor<F, RA> for Rv32JalrExecutor<A>
 where
-    I::Reads: Into<[[F; RV32_REGISTER_NUM_LIMBS]; 1]>,
-    I::Writes: From<[[F; RV32_REGISTER_NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData = [u8; RV32_REGISTER_NUM_LIMBS],
+            WriteData = [u8; RV32_REGISTER_NUM_LIMBS],
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut Rv32JalrCoreRecord),
+    >,
 {
-    type Record = Rv32JalrCoreRecord<F>;
-    type Air = Rv32JalrCoreAir;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            Rv32JalrOpcode::from_usize(opcode - Rv32JalrOpcode::CLASS_OFFSET)
+        )
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
+    ) -> Result<(), ExecutionError> {
         let Instruction { opcode, c, g, .. } = *instruction;
-        let local_opcode =
-            Rv32JalrOpcode::from_usize(opcode.local_opcode_idx(Rv32JalrOpcode::CLASS_OFFSET));
 
-        let imm = c.as_canonical_u32();
-        let imm_sign = g.as_canonical_u32();
-        let imm_extended = imm + imm_sign * 0xffff0000;
+        debug_assert_eq!(
+            opcode.local_opcode_idx(Rv32JalrOpcode::CLASS_OFFSET),
+            JALR as usize
+        );
 
-        let rs1 = reads.into()[0];
-        let rs1_val = compose(rs1);
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        let (to_pc, rd_data) = run_jalr(local_opcode, from_pc, imm_extended, rs1_val);
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        self.bitwise_lookup_chip
-            .request_range(rd_data[0], rd_data[1]);
-        self.range_checker_chip
-            .add_count(rd_data[2], RV32_CELL_BITS);
-        self.range_checker_chip
-            .add_count(rd_data[3], PC_BITS - RV32_CELL_BITS * 3);
+        core_record.rs1_val = u32::from_le_bytes(self.adapter.read(
+            state.memory,
+            instruction,
+            &mut adapter_record,
+        ));
 
-        let mask = (1 << 15) - 1;
-        let to_pc_least_sig_bit = rs1_val.wrapping_add(imm_extended) & 1;
+        core_record.imm = c.as_canonical_u32() as u16;
+        core_record.imm_sign = g.is_one();
+        core_record.from_pc = *state.pc;
 
-        let to_pc_limbs = array::from_fn(|i| ((to_pc >> (1 + i * 15)) & mask));
+        let (to_pc, rd_data) = run_jalr(
+            core_record.from_pc,
+            core_record.rs1_val,
+            core_record.imm,
+            core_record.imm_sign,
+        );
 
-        let rd_data = rd_data.map(F::from_canonical_u32);
+        self.adapter
+            .write(state.memory, instruction, rd_data, &mut adapter_record);
 
-        let output = AdapterRuntimeContext {
-            to_pc: Some(to_pc),
-            writes: [rd_data].into(),
-        };
-
-        Ok((
-            output,
-            Rv32JalrCoreRecord {
-                imm: c,
-                rd_data: array::from_fn(|i| rd_data[i + 1]),
-                rs1_data: rs1,
-                to_pc_least_sig_bit: F::from_canonical_u32(to_pc_least_sig_bit),
-                to_pc_limbs,
-                imm_sign: g,
-            },
-        ))
-    }
+        // RISC-V spec explicitly sets the least significant bit of `to_pc` to 0
+        *state.pc = to_pc & !1;
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            Rv32JalrOpcode::from_usize(opcode - Rv32JalrOpcode::CLASS_OFFSET)
-        )
+        Ok(())
     }
+}
+impl<F, A> TraceFiller<F> for Rv32JalrFiller<A>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        self.range_checker_chip.add_count(record.to_pc_limbs[0], 15);
-        self.range_checker_chip.add_count(record.to_pc_limbs[1], 14);
-
-        let core_cols: &mut Rv32JalrCoreCols<F> = row_slice.borrow_mut();
-        core_cols.imm = record.imm;
-        core_cols.rd_data = record.rd_data;
-        core_cols.rs1_data = record.rs1_data;
-        core_cols.to_pc_least_sig_bit = record.to_pc_least_sig_bit;
-        core_cols.to_pc_limbs = record.to_pc_limbs.map(F::from_canonical_u32);
-        core_cols.imm_sign = record.imm_sign;
-        core_cols.is_valid = F::ONE;
-    }
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &Rv32JalrCoreRecord = unsafe { get_record_from_slice(&mut core_row, ()) };
+
+        let core_row: &mut Rv32JalrCoreCols<F> = core_row.borrow_mut();
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        let (to_pc, rd_data) =
+            run_jalr(record.from_pc, record.rs1_val, record.imm, record.imm_sign);
+        let to_pc_limbs = [(to_pc & ((1 << 16) - 1)) >> 1, to_pc >> 16];
+        self.range_checker_chip.add_count(to_pc_limbs[0], 15);
+        self.range_checker_chip
+            .add_count(to_pc_limbs[1], PC_BITS - 16);
+        self.bitwise_lookup_chip
+            .request_range(rd_data[0] as u32, rd_data[1] as u32);
+
+        self.range_checker_chip
+            .add_count(rd_data[2] as u32, RV32_CELL_BITS);
+        self.range_checker_chip
+            .add_count(rd_data[3] as u32, PC_BITS - RV32_CELL_BITS * 3);
+
+        // Write in reverse order
+        core_row.imm_sign = F::from_bool(record.imm_sign);
+        core_row.to_pc_limbs = to_pc_limbs.map(F::from_canonical_u32);
+        core_row.to_pc_least_sig_bit = F::from_bool(to_pc & 1 == 1);
+        // fill_trace_row is called only on valid rows
+        core_row.is_valid = F::ONE;
+        core_row.rs1_data = record.rs1_val.to_le_bytes().map(F::from_canonical_u8);
+        core_row
+            .rd_data
+            .iter_mut()
+            .rev()
+            .zip(rd_data.iter().skip(1).rev())
+            .for_each(|(dst, src)| {
+                *dst = F::from_canonical_u8(*src);
+            });
+        core_row.imm = F::from_canonical_u16(record.imm);
     }
 }
 
 // returns (to_pc, rd_data)
-pub(super) fn run_jalr(
-    _opcode: Rv32JalrOpcode,
-    pc: u32,
-    imm: u32,
-    rs1: u32,
-) -> (u32, [u32; RV32_REGISTER_NUM_LIMBS]) {
-    let to_pc = rs1.wrapping_add(imm);
-    let to_pc = to_pc - (to_pc & 1);
+#[inline(always)]
+pub(super) fn run_jalr(pc: u32, rs1: u32, imm: u16, imm_sign: bool) -> (u32, [u8; 4]) {
+    let to_pc = rs1.wrapping_add(imm as u32 + (imm_sign as u32 * 0xffff0000));
     assert!(to_pc < (1 << PC_BITS));
-    (
-        to_pc,
-        array::from_fn(|i: usize| ((pc + DEFAULT_PC_STEP) >> (RV32_CELL_BITS * i)) & RV32_LIMB_MAX),
-    )
+    (to_pc, pc.wrapping_add(DEFAULT_PC_STEP).to_le_bytes())
 }
diff --git a/extensions/rv32im/circuit/src/jalr/execution.rs b/extensions/rv32im/circuit/src/jalr/execution.rs
new file mode 100644
index 0000000000..8eb09de03c
--- /dev/null
+++ b/extensions/rv32im/circuit/src/jalr/execution.rs
@@ -0,0 +1,146 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::{DEFAULT_PC_STEP, PC_BITS},
+    riscv::RV32_REGISTER_AS,
+};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::Rv32JalrExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct JalrPreCompute {
+    imm_extended: u32,
+    a: u8,
+    b: u8,
+}
+
+impl<A> Rv32JalrExecutor<A> {
+    /// Return true if enabled.
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut JalrPreCompute,
+    ) -> Result<bool, StaticProgramError> {
+        let imm_extended = inst.c.as_canonical_u32() + inst.g.as_canonical_u32() * 0xffff0000;
+        if inst.d.as_canonical_u32() != RV32_REGISTER_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = JalrPreCompute {
+            imm_extended,
+            a: inst.a.as_canonical_u32() as u8,
+            b: inst.b.as_canonical_u32() as u8,
+        };
+        let enabled = !inst.f.is_zero();
+        Ok(enabled)
+    }
+}
+
+impl<F, A> Executor<F> for Rv32JalrExecutor<A>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<JalrPreCompute>()
+    }
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let data: &mut JalrPreCompute = data.borrow_mut();
+        let enabled = self.pre_compute_impl(pc, inst, data)?;
+        let fn_ptr = if enabled {
+            execute_e1_impl::<_, _, true>
+        } else {
+            execute_e1_impl::<_, _, false>
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A> MeteredExecutor<F> for Rv32JalrExecutor<A>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<JalrPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<JalrPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let enabled = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        let fn_ptr = if enabled {
+            execute_e2_impl::<_, _, true>
+        } else {
+            execute_e2_impl::<_, _, false>
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const ENABLED: bool>(
+    pre_compute: &JalrPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs1 = u32::from_le_bytes(rs1);
+    let to_pc = rs1.wrapping_add(pre_compute.imm_extended);
+    let to_pc = to_pc - (to_pc & 1);
+    debug_assert!(to_pc < (1 << PC_BITS));
+    let rd = (vm_state.pc + DEFAULT_PC_STEP).to_le_bytes();
+
+    if ENABLED {
+        vm_state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &rd);
+    }
+
+    vm_state.pc = to_pc;
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const ENABLED: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &JalrPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, ENABLED>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, const ENABLED: bool>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<JalrPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, ENABLED>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/rv32im/circuit/src/jalr/mod.rs b/extensions/rv32im/circuit/src/jalr/mod.rs
index 1d85dcbe4a..b3053f0dc4 100644
--- a/extensions/rv32im/circuit/src/jalr/mod.rs
+++ b/extensions/rv32im/circuit/src/jalr/mod.rs
@@ -1,11 +1,13 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use crate::adapters::Rv32JalrAdapterChip;
+use crate::adapters::Rv32JalrAdapterAir;
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
-pub type Rv32JalrChip<F> = VmChipWrapper<F, Rv32JalrAdapterChip<F>, Rv32JalrCoreChip>;
+pub type Rv32JalrAir = VmAirWrapper<Rv32JalrAdapterAir, Rv32JalrCoreAir>;
+pub type Rv32JalrChip<F> = VmChipWrapper<F, Rv32JalrFiller>;
diff --git a/extensions/rv32im/circuit/src/jalr/tests.rs b/extensions/rv32im/circuit/src/jalr/tests.rs
index e22d97967f..f0ae372e7f 100644
--- a/extensions/rv32im/circuit/src/jalr/tests.rs
+++ b/extensions/rv32im/circuit/src/jalr/tests.rs
@@ -1,41 +1,83 @@
-use std::{array, borrow::BorrowMut};
+use std::{array, borrow::BorrowMut, sync::Arc};
 
-use openvm_circuit::arch::{
-    testing::{VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-    VmAdapterChip,
-};
+use openvm_circuit::arch::testing::{TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
 use openvm_instructions::{instruction::Instruction, program::PC_BITS, LocalOpcode};
 use openvm_rv32im_transpiler::Rv32JalrOpcode::{self, *};
 use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{FieldAlgebra, PrimeField32},
-    p3_matrix::{dense::RowMajorMatrix, Matrix},
+    p3_matrix::{
+        dense::{DenseMatrix, RowMajorMatrix},
+        Matrix,
+    },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    Chip, ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
 
+use super::Rv32JalrCoreAir;
 use crate::{
-    adapters::{compose, Rv32JalrAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
-    jalr::{run_jalr, Rv32JalrChip, Rv32JalrCoreChip, Rv32JalrCoreCols},
+    adapters::{
+        compose, Rv32JalrAdapterAir, Rv32JalrAdapterExecutor, Rv32JalrAdapterFiller,
+        RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+    },
+    jalr::{run_jalr, Rv32JalrChip, Rv32JalrCoreCols, Rv32JalrExecutor},
+    test_utils::get_verification_error,
+    Rv32JalrAir, Rv32JalrFiller,
 };
 
 const IMM_BITS: usize = 16;
+const MAX_INS_CAPACITY: usize = 128;
 type F = BabyBear;
+type Harness = TestChipHarness<F, Rv32JalrExecutor, Rv32JalrAir, Rv32JalrChip<F>>;
 
 fn into_limbs(num: u32) -> [u32; 4] {
     array::from_fn(|i| (num >> (8 * i)) & 255)
 }
 
+fn create_test_chip(
+    tester: &mut VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let range_checker_chip = tester.range_checker().clone();
+
+    let air = Rv32JalrAir::new(
+        Rv32JalrAdapterAir::new(tester.memory_bridge(), tester.execution_bridge()),
+        Rv32JalrCoreAir::new(bitwise_bus, range_checker_chip.bus()),
+    );
+    let executor = Rv32JalrExecutor::new(Rv32JalrAdapterExecutor);
+    let chip = Rv32JalrChip::<F>::new(
+        Rv32JalrFiller::new(
+            Rv32JalrAdapterFiller::new(),
+            bitwise_chip.clone(),
+            range_checker_chip.clone(),
+        ),
+        tester.memory_helper(),
+    );
+
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
 #[allow(clippy::too_many_arguments)]
 fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut Rv32JalrChip<F>,
+    harness: &mut Harness,
     rng: &mut StdRng,
     opcode: Rv32JalrOpcode,
     initial_imm: Option<u32>,
@@ -45,7 +87,7 @@ fn set_and_execute(
 ) {
     let imm = initial_imm.unwrap_or(rng.gen_range(0..(1 << IMM_BITS)));
     let imm_sign = initial_imm_sign.unwrap_or(rng.gen_range(0..2));
-    let imm_ext = imm + imm_sign * (0xffffffff ^ ((1 << IMM_BITS) - 1));
+    let imm_ext = imm + (imm_sign * 0xffff0000);
     let a = rng.gen_range(0..32) << 2;
     let b = rng.gen_range(1..32) << 2;
     let to_pc = rng.gen_range(0..(1 << PC_BITS));
@@ -55,8 +97,9 @@ fn set_and_execute(
 
     tester.write(1, b, rs1);
 
+    let initial_pc = initial_pc.unwrap_or(rng.gen_range(0..(1 << PC_BITS)));
     tester.execute_with_pc(
-        chip,
+        harness,
         &Instruction::from_usize(
             opcode.global_opcode(),
             [
@@ -69,18 +112,17 @@ fn set_and_execute(
                 imm_sign as usize,
             ],
         ),
-        initial_pc.unwrap_or(rng.gen_range(0..(1 << PC_BITS))),
+        initial_pc,
     );
-    let initial_pc = tester.execution.last_from_pc().as_canonical_u32();
     let final_pc = tester.execution.last_to_pc().as_canonical_u32();
 
     let rs1 = compose(rs1);
 
-    let (next_pc, rd_data) = run_jalr(opcode, initial_pc, imm_ext, rs1);
+    let (next_pc, rd_data) = run_jalr(initial_pc, rs1, imm as u16, imm_sign == 1);
     let rd_data = if a == 0 { [0; 4] } else { rd_data };
 
-    assert_eq!(next_pc, final_pc);
-    assert_eq!(rd_data.map(F::from_canonical_u32), tester.read::<4>(1, a));
+    assert_eq!(next_pc & !1, final_pc);
+    assert_eq!(rd_data.map(F::from_canonical_u8), tester.read::<4>(1, a));
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////
@@ -92,24 +134,14 @@ fn set_and_execute(
 #[test]
 fn rand_jalr_test() {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
     let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
+    let (mut harness, bitwise) = create_test_chip(&mut tester);
 
-    let adapter = Rv32JalrAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let inner = Rv32JalrCoreChip::new(bitwise_chip.clone(), range_checker_chip.clone());
-    let mut chip = Rv32JalrChip::<F>::new(adapter, inner, tester.offline_memory_mutex_arc());
-
-    let num_tests: usize = 100;
-    for _ in 0..num_tests {
+    let num_ops = 100;
+    for _ in 0..num_ops {
         set_and_execute(
             &mut tester,
-            &mut chip,
+            &mut harness,
             &mut rng,
             JALR,
             None,
@@ -119,8 +151,11 @@ fn rand_jalr_test() {
         );
     }
 
-    drop(range_checker_chip);
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
@@ -128,10 +163,18 @@ fn rand_jalr_test() {
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adaptor is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
+#[derive(Clone, Copy, Default, PartialEq)]
+struct JalrPrankValues {
+    pub rd_data: Option<[u32; RV32_REGISTER_NUM_LIMBS - 1]>,
+    pub rs1_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    pub to_pc_least_sig_bit: Option<u32>,
+    pub to_pc_limbs: Option<[u32; 2]>,
+    pub imm_sign: Option<u32>,
+}
+
 #[allow(clippy::too_many_arguments)]
 fn run_negative_jalr_test(
     opcode: Rv32JalrOpcode,
@@ -139,31 +182,17 @@ fn run_negative_jalr_test(
     initial_rs1: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
     initial_imm: Option<u32>,
     initial_imm_sign: Option<u32>,
-    rd_data: Option<[u32; RV32_REGISTER_NUM_LIMBS - 1]>,
-    rs1_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
-    to_pc_least_sig_bit: Option<u32>,
-    to_pc_limbs: Option<[u32; 2]>,
-    imm_sign: Option<u32>,
-    expected_error: VerificationError,
+    prank_vals: JalrPrankValues,
+    interaction_error: bool,
 ) {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
     let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
 
-    let adapter = Rv32JalrAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let adapter_width = BaseAir::<F>::width(adapter.air());
-    let inner = Rv32JalrCoreChip::new(bitwise_chip.clone(), range_checker_chip.clone());
-    let mut chip = Rv32JalrChip::<F>::new(adapter, inner, tester.offline_memory_mutex_arc());
+    let (mut harness, bitwise) = create_test_chip(&mut tester);
 
     set_and_execute(
         &mut tester,
-        &mut chip,
+        &mut harness,
         &mut rng,
         opcode,
         initial_imm,
@@ -172,49 +201,38 @@ fn run_negative_jalr_test(
         initial_rs1,
     );
 
-    let tester = tester.build();
-
-    let jalr_trace_width = chip.trace_width();
-    let air = chip.air();
-    let mut chip_input = chip.generate_air_proof_input();
-    let jalr_trace = chip_input.raw.common_main.as_mut().unwrap();
-    {
-        let mut trace_row = jalr_trace.row_slice(0).to_vec();
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+    let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
+        let mut trace_row = trace.row_slice(0).to_vec();
         let (_, core_row) = trace_row.split_at_mut(adapter_width);
-
         let core_cols: &mut Rv32JalrCoreCols<F> = core_row.borrow_mut();
 
-        if let Some(data) = rd_data {
+        if let Some(data) = prank_vals.rd_data {
             core_cols.rd_data = data.map(F::from_canonical_u32);
         }
-
-        if let Some(data) = rs1_data {
+        if let Some(data) = prank_vals.rs1_data {
             core_cols.rs1_data = data.map(F::from_canonical_u32);
         }
-
-        if let Some(data) = to_pc_least_sig_bit {
+        if let Some(data) = prank_vals.to_pc_least_sig_bit {
             core_cols.to_pc_least_sig_bit = F::from_canonical_u32(data);
         }
-
-        if let Some(data) = to_pc_limbs {
+        if let Some(data) = prank_vals.to_pc_limbs {
             core_cols.to_pc_limbs = data.map(F::from_canonical_u32);
         }
-
-        if let Some(data) = imm_sign {
+        if let Some(data) = prank_vals.imm_sign {
             core_cols.imm_sign = F::from_canonical_u32(data);
         }
 
-        *jalr_trace = RowMajorMatrix::new(trace_row, jalr_trace_width);
-    }
+        *trace = RowMajorMatrix::new(trace_row, trace.width());
+    };
 
-    drop(range_checker_chip);
     disable_debug_builder();
     let tester = tester
-        .load_air_proof_input((air, chip_input))
-        .load(bitwise_chip)
+        .build()
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
         .finalize();
-    tester.simple_test_with_expected_error(expected_error);
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
@@ -225,12 +243,11 @@ fn invalid_cols_negative_tests() {
         None,
         Some(15362),
         Some(0),
-        None,
-        None,
-        None,
-        None,
-        Some(1),
-        VerificationError::OodEvaluationMismatch,
+        JalrPrankValues {
+            imm_sign: Some(1),
+            ..Default::default()
+        },
+        false,
     );
 
     run_negative_jalr_test(
@@ -239,12 +256,11 @@ fn invalid_cols_negative_tests() {
         None,
         Some(15362),
         Some(1),
-        None,
-        None,
-        None,
-        None,
-        Some(0),
-        VerificationError::OodEvaluationMismatch,
+        JalrPrankValues {
+            imm_sign: Some(0),
+            ..Default::default()
+        },
+        false,
     );
 
     run_negative_jalr_test(
@@ -253,12 +269,11 @@ fn invalid_cols_negative_tests() {
         Some([23, 154, 67, 28]),
         Some(42512),
         Some(1),
-        None,
-        None,
-        Some(0),
-        None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        JalrPrankValues {
+            to_pc_least_sig_bit: Some(0),
+            ..Default::default()
+        },
+        false,
     );
 }
 
@@ -270,12 +285,11 @@ fn overflow_negative_tests() {
         None,
         None,
         None,
-        Some([1, 0, 0]),
-        None,
-        None,
-        None,
-        None,
-        VerificationError::ChallengePhaseError,
+        JalrPrankValues {
+            rd_data: Some([1, 0, 0]),
+            ..Default::default()
+        },
+        true,
     );
 
     run_negative_jalr_test(
@@ -284,15 +298,14 @@ fn overflow_negative_tests() {
         Some([0, 0, 0, 0]),
         Some((1 << 15) - 2),
         Some(0),
-        None,
-        None,
-        None,
-        Some([
-            (F::NEG_ONE * F::from_canonical_u32((1 << 14) + 1)).as_canonical_u32(),
-            1,
-        ]),
-        None,
-        VerificationError::ChallengePhaseError,
+        JalrPrankValues {
+            to_pc_limbs: Some([
+                (F::NEG_ONE * F::from_canonical_u32((1 << 14) + 1)).as_canonical_u32(),
+                1,
+            ]),
+            ..Default::default()
+        },
+        true,
     );
 }
 
@@ -301,44 +314,13 @@ fn overflow_negative_tests() {
 ///
 /// Ensure that solve functions produce the correct results.
 ///////////////////////////////////////////////////////////////////////////////////////
-#[test]
-fn execute_roundtrip_sanity_test() {
-    let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-
-    let adapter = Rv32JalrAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-    );
-    let inner = Rv32JalrCoreChip::new(bitwise_chip, range_checker_chip);
-    let mut chip = Rv32JalrChip::<F>::new(adapter, inner, tester.offline_memory_mutex_arc());
-
-    let num_tests: usize = 10;
-    for _ in 0..num_tests {
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            JALR,
-            None,
-            None,
-            None,
-            None,
-        );
-    }
-}
 
 #[test]
 fn run_jalr_sanity_test() {
-    let opcode = JALR;
     let initial_pc = 789456120;
     let imm = -1235_i32 as u32;
     let rs1 = 736482910;
-    let (next_pc, rd_data) = run_jalr(opcode, initial_pc, imm, rs1);
-    assert_eq!(next_pc, 736481674);
+    let (next_pc, rd_data) = run_jalr(initial_pc, rs1, imm as u16, true);
+    assert_eq!(next_pc & !1, 736481674);
     assert_eq!(rd_data, [252, 36, 14, 47]);
 }
diff --git a/extensions/rv32im/circuit/src/less_than/core.rs b/extensions/rv32im/circuit/src/less_than/core.rs
index a605dc43de..8e45491a17 100644
--- a/extensions/rv32im/circuit/src/less_than/core.rs
+++ b/extensions/rv32im/circuit/src/less_than/core.rs
@@ -3,16 +3,17 @@ use std::{
     borrow::{Borrow, BorrowMut},
 };
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     utils::not,
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_rv32im_transpiler::LessThanOpcode;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -20,12 +21,10 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use serde_big_array::BigArray;
 use strum::IntoEnumIterator;
 
 #[repr(C)]
-#[derive(AlignedBorrow)]
+#[derive(AlignedBorrow, Debug)]
 pub struct LessThanCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub b: [T; NUM_LIMBS],
     pub c: [T; NUM_LIMBS],
@@ -45,7 +44,7 @@ pub struct LessThanCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub diff_val: T,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct LessThanCoreAir<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bus: BitwiseOperationLookupBus,
     offset: usize,
@@ -164,162 +163,177 @@ where
 }
 
 #[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "T: Serialize + DeserializeOwned")]
-pub struct LessThanCoreRecord<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    #[serde(with = "BigArray")]
-    pub b: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub c: [T; NUM_LIMBS],
-    pub cmp_result: T,
-    pub b_msb_f: T,
-    pub c_msb_f: T,
-    pub diff_val: T,
-    pub diff_idx: usize,
-    pub opcode: LessThanOpcode,
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct LessThanCoreRecord<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    pub b: [u8; NUM_LIMBS],
+    pub c: [u8; NUM_LIMBS],
+    pub local_opcode: u8,
 }
 
-pub struct LessThanCoreChip<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub air: LessThanCoreAir<NUM_LIMBS, LIMB_BITS>,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
+#[derive(Clone, Copy, derive_new::new)]
+pub struct LessThanExecutor<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
 }
 
-impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> LessThanCoreChip<NUM_LIMBS, LIMB_BITS> {
-    pub fn new(
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
-        offset: usize,
-    ) -> Self {
-        Self {
-            air: LessThanCoreAir {
-                bus: bitwise_lookup_chip.bus(),
-                offset,
-            },
-            bitwise_lookup_chip,
-        }
-    }
+#[derive(Clone, derive_new::new)]
+pub struct LessThanFiller<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
+    pub offset: usize,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_LIMBS: usize, const LIMB_BITS: usize>
-    VmCoreChip<F, I> for LessThanCoreChip<NUM_LIMBS, LIMB_BITS>
+impl<F, A, RA, const NUM_LIMBS: usize, const LIMB_BITS: usize> PreflightExecutor<F, RA>
+    for LessThanExecutor<A, NUM_LIMBS, LIMB_BITS>
 where
-    I::Reads: Into<[[F; NUM_LIMBS]; 2]>,
-    I::Writes: From<[[F; NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData: Into<[[u8; NUM_LIMBS]; 2]>,
+            WriteData: From<[[u8; NUM_LIMBS]; 1]>,
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut LessThanCoreRecord<NUM_LIMBS, LIMB_BITS>,
+        ),
+    >,
 {
-    type Record = LessThanCoreRecord<F, NUM_LIMBS, LIMB_BITS>;
-    type Air = LessThanCoreAir<NUM_LIMBS, LIMB_BITS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!("{:?}", LessThanOpcode::from_usize(opcode - self.offset))
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
+    ) -> Result<(), ExecutionError> {
+        debug_assert!(LIMB_BITS <= 8);
         let Instruction { opcode, .. } = instruction;
-        let less_than_opcode = LessThanOpcode::from_usize(opcode.local_opcode_idx(self.air.offset));
 
-        let data: [[F; NUM_LIMBS]; 2] = reads.into();
-        let b = data[0].map(|x| x.as_canonical_u32());
-        let c = data[1].map(|y| y.as_canonical_u32());
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        let [rs1, rs2] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
+
+        core_record.b = rs1;
+        core_record.c = rs2;
+        core_record.local_opcode = opcode.local_opcode_idx(self.offset) as u8;
+
+        let (cmp_result, _, _, _) = run_less_than::<NUM_LIMBS, LIMB_BITS>(
+            core_record.local_opcode == LessThanOpcode::SLT as u8,
+            &rs1,
+            &rs2,
+        );
+
+        let mut output = [0u8; NUM_LIMBS];
+        output[0] = cmp_result as u8;
+
+        self.adapter.write(
+            state.memory,
+            instruction,
+            [output].into(),
+            &mut adapter_record,
+        );
+
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for LessThanFiller<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &LessThanCoreRecord<NUM_LIMBS, LIMB_BITS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+
+        let core_row: &mut LessThanCoreCols<F, NUM_LIMBS, LIMB_BITS> = core_row.borrow_mut();
+
+        let is_slt = record.local_opcode == LessThanOpcode::SLT as u8;
         let (cmp_result, diff_idx, b_sign, c_sign) =
-            run_less_than::<NUM_LIMBS, LIMB_BITS>(less_than_opcode, &b, &c);
+            run_less_than::<NUM_LIMBS, LIMB_BITS>(is_slt, &record.b, &record.c);
 
         // We range check (b_msb_f + 128) and (c_msb_f + 128) if signed,
         // b_msb_f and c_msb_f if not
         let (b_msb_f, b_msb_range) = if b_sign {
             (
-                -F::from_canonical_u32((1 << LIMB_BITS) - b[NUM_LIMBS - 1]),
-                b[NUM_LIMBS - 1] - (1 << (LIMB_BITS - 1)),
+                -F::from_canonical_u16((1u16 << LIMB_BITS) - record.b[NUM_LIMBS - 1] as u16),
+                record.b[NUM_LIMBS - 1] - (1u8 << (LIMB_BITS - 1)),
             )
         } else {
             (
-                F::from_canonical_u32(b[NUM_LIMBS - 1]),
-                b[NUM_LIMBS - 1]
-                    + (((less_than_opcode == LessThanOpcode::SLT) as u32) << (LIMB_BITS - 1)),
+                F::from_canonical_u8(record.b[NUM_LIMBS - 1]),
+                record.b[NUM_LIMBS - 1] + ((is_slt as u8) << (LIMB_BITS - 1)),
             )
         };
         let (c_msb_f, c_msb_range) = if c_sign {
             (
-                -F::from_canonical_u32((1 << LIMB_BITS) - c[NUM_LIMBS - 1]),
-                c[NUM_LIMBS - 1] - (1 << (LIMB_BITS - 1)),
+                -F::from_canonical_u16((1u16 << LIMB_BITS) - record.c[NUM_LIMBS - 1] as u16),
+                record.c[NUM_LIMBS - 1] - (1u8 << (LIMB_BITS - 1)),
             )
         } else {
             (
-                F::from_canonical_u32(c[NUM_LIMBS - 1]),
-                c[NUM_LIMBS - 1]
-                    + (((less_than_opcode == LessThanOpcode::SLT) as u32) << (LIMB_BITS - 1)),
+                F::from_canonical_u8(record.c[NUM_LIMBS - 1]),
+                record.c[NUM_LIMBS - 1] + ((is_slt as u8) << (LIMB_BITS - 1)),
             )
         };
-        self.bitwise_lookup_chip
-            .request_range(b_msb_range, c_msb_range);
 
-        let diff_val = if diff_idx == NUM_LIMBS {
-            0
+        core_row.diff_val = if diff_idx == NUM_LIMBS {
+            F::ZERO
         } else if diff_idx == (NUM_LIMBS - 1) {
             if cmp_result {
                 c_msb_f - b_msb_f
             } else {
                 b_msb_f - c_msb_f
             }
-            .as_canonical_u32()
         } else if cmp_result {
-            c[diff_idx] - b[diff_idx]
+            F::from_canonical_u8(record.c[diff_idx] - record.b[diff_idx])
         } else {
-            b[diff_idx] - c[diff_idx]
+            F::from_canonical_u8(record.b[diff_idx] - record.c[diff_idx])
         };
 
+        self.bitwise_lookup_chip
+            .request_range(b_msb_range as u32, c_msb_range as u32);
+
+        core_row.diff_marker = [F::ZERO; NUM_LIMBS];
         if diff_idx != NUM_LIMBS {
-            self.bitwise_lookup_chip.request_range(diff_val - 1, 0);
+            self.bitwise_lookup_chip
+                .request_range(core_row.diff_val.as_canonical_u32() - 1, 0);
+            core_row.diff_marker[diff_idx] = F::ONE;
         }
 
-        let mut writes = [0u32; NUM_LIMBS];
-        writes[0] = cmp_result as u32;
-
-        let output = AdapterRuntimeContext::without_pc([writes.map(F::from_canonical_u32)]);
-        let record = LessThanCoreRecord {
-            opcode: less_than_opcode,
-            b: data[0],
-            c: data[1],
-            cmp_result: F::from_bool(cmp_result),
-            b_msb_f,
-            c_msb_f,
-            diff_val: F::from_canonical_u32(diff_val),
-            diff_idx,
-        };
-
-        Ok((output, record))
-    }
-
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!("{:?}", LessThanOpcode::from_usize(opcode - self.air.offset))
-    }
-
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let row_slice: &mut LessThanCoreCols<_, NUM_LIMBS, LIMB_BITS> = row_slice.borrow_mut();
-        row_slice.b = record.b;
-        row_slice.c = record.c;
-        row_slice.cmp_result = record.cmp_result;
-        row_slice.b_msb_f = record.b_msb_f;
-        row_slice.c_msb_f = record.c_msb_f;
-        row_slice.diff_val = record.diff_val;
-        row_slice.opcode_slt_flag = F::from_bool(record.opcode == LessThanOpcode::SLT);
-        row_slice.opcode_sltu_flag = F::from_bool(record.opcode == LessThanOpcode::SLTU);
-        row_slice.diff_marker = array::from_fn(|i| F::from_bool(i == record.diff_idx));
-    }
-
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.c_msb_f = c_msb_f;
+        core_row.b_msb_f = b_msb_f;
+        core_row.opcode_sltu_flag = F::from_bool(!is_slt);
+        core_row.opcode_slt_flag = F::from_bool(is_slt);
+        core_row.cmp_result = F::from_bool(cmp_result);
+        core_row.c = record.c.map(F::from_canonical_u8);
+        core_row.b = record.b.map(F::from_canonical_u8);
     }
 }
 
 // Returns (cmp_result, diff_idx, x_sign, y_sign)
+#[inline(always)]
 pub(super) fn run_less_than<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    opcode: LessThanOpcode,
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
+    is_slt: bool,
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
 ) -> (bool, usize, bool, bool) {
-    let x_sign = (x[NUM_LIMBS - 1] >> (LIMB_BITS - 1) == 1) && opcode == LessThanOpcode::SLT;
-    let y_sign = (y[NUM_LIMBS - 1] >> (LIMB_BITS - 1) == 1) && opcode == LessThanOpcode::SLT;
+    let x_sign = (x[NUM_LIMBS - 1] >> (LIMB_BITS - 1) == 1) && is_slt;
+    let y_sign = (y[NUM_LIMBS - 1] >> (LIMB_BITS - 1) == 1) && is_slt;
     for i in (0..NUM_LIMBS).rev() {
         if x[i] != y[i] {
             return ((x[i] < y[i]) ^ x_sign ^ y_sign, i, x_sign, y_sign);
diff --git a/extensions/rv32im/circuit/src/less_than/execution.rs b/extensions/rv32im/circuit/src/less_than/execution.rs
new file mode 100644
index 0000000000..16c11377e5
--- /dev/null
+++ b/extensions/rv32im/circuit/src/less_than/execution.rs
@@ -0,0 +1,190 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_IMM_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32im_transpiler::LessThanOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::LessThanExecutor;
+use crate::adapters::imm_to_bytes;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct LessThanPreCompute {
+    c: u32,
+    a: u8,
+    b: u8,
+}
+
+impl<A, const LIMB_BITS: usize> LessThanExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut LessThanPreCompute,
+    ) -> Result<(bool, bool), StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS
+            || !(e_u32 == RV32_IMM_AS || e_u32 == RV32_REGISTER_AS)
+        {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        let local_opcode = LessThanOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        let is_imm = e_u32 == RV32_IMM_AS;
+        let c_u32 = c.as_canonical_u32();
+
+        *data = LessThanPreCompute {
+            c: if is_imm {
+                u32::from_le_bytes(imm_to_bytes(c_u32))
+            } else {
+                c_u32
+            },
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+        };
+        Ok((is_imm, local_opcode == LessThanOpcode::SLTU))
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> Executor<F>
+    for LessThanExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<LessThanPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut LessThanPreCompute = data.borrow_mut();
+        let (is_imm, is_sltu) = self.pre_compute_impl(pc, inst, pre_compute)?;
+        let fn_ptr = match (is_imm, is_sltu) {
+            (true, true) => execute_e1_impl::<_, _, true, true>,
+            (true, false) => execute_e1_impl::<_, _, true, false>,
+            (false, true) => execute_e1_impl::<_, _, false, true>,
+            (false, false) => execute_e1_impl::<_, _, false, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> MeteredExecutor<F>
+    for LessThanExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<LessThanPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<LessThanPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+        let (is_imm, is_sltu) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+        let fn_ptr = match (is_imm, is_sltu) {
+            (true, true) => execute_e2_impl::<_, _, true, true>,
+            (true, false) => execute_e2_impl::<_, _, true, false>,
+            (false, true) => execute_e2_impl::<_, _, false, true>,
+            (false, false) => execute_e2_impl::<_, _, false, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const E_IS_IMM: bool,
+    const IS_U32: bool,
+>(
+    pre_compute: &LessThanPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1 = vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2 = if E_IS_IMM {
+        pre_compute.c.to_le_bytes()
+    } else {
+        vm_state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.c)
+    };
+    let cmp_result = if IS_U32 {
+        u32::from_le_bytes(rs1) < u32::from_le_bytes(rs2)
+    } else {
+        i32::from_le_bytes(rs1) < i32::from_le_bytes(rs2)
+    };
+    let mut rd = [0u8; RV32_REGISTER_NUM_LIMBS];
+    rd[0] = cmp_result as u8;
+    vm_state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &rd);
+
+    vm_state.pc += DEFAULT_PC_STEP;
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const E_IS_IMM: bool,
+    const IS_U32: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &LessThanPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, E_IS_IMM, IS_U32>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const E_IS_IMM: bool,
+    const IS_U32: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<LessThanPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, E_IS_IMM, IS_U32>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/rv32im/circuit/src/less_than/mod.rs b/extensions/rv32im/circuit/src/less_than/mod.rs
index f8247d2d33..d04ef99977 100644
--- a/extensions/rv32im/circuit/src/less_than/mod.rs
+++ b/extensions/rv32im/circuit/src/less_than/mod.rs
@@ -1,15 +1,29 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::{Rv32BaseAluAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use super::adapters::{
+    Rv32BaseAluAdapterAir, Rv32BaseAluAdapterExecutor, Rv32BaseAluAdapterFiller, RV32_CELL_BITS,
+    RV32_REGISTER_NUM_LIMBS,
+};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
+pub type Rv32LessThanAir =
+    VmAirWrapper<Rv32BaseAluAdapterAir, LessThanCoreAir<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
+pub type Rv32LessThanExecutor = LessThanExecutor<
+    Rv32BaseAluAdapterExecutor<RV32_CELL_BITS>,
+    RV32_REGISTER_NUM_LIMBS,
+    RV32_CELL_BITS,
+>;
 pub type Rv32LessThanChip<F> = VmChipWrapper<
     F,
-    Rv32BaseAluAdapterChip<F>,
-    LessThanCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+    LessThanFiller<
+        Rv32BaseAluAdapterFiller<RV32_CELL_BITS>,
+        RV32_REGISTER_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >,
 >;
diff --git a/extensions/rv32im/circuit/src/less_than/tests.rs b/extensions/rv32im/circuit/src/less_than/tests.rs
index 18d64bf5f6..c23ac9aba1 100644
--- a/extensions/rv32im/circuit/src/less_than/tests.rs
+++ b/extensions/rv32im/circuit/src/less_than/tests.rs
@@ -1,17 +1,15 @@
-use std::borrow::BorrowMut;
+use std::{array, borrow::BorrowMut, sync::Arc};
 
 use openvm_circuit::{
-    arch::{
-        testing::{TestAdapterChip, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-        ExecutionBridge, VmAdapterChip, VmChipWrapper,
-    },
-    utils::{generate_long_number, i32_to_f},
+    arch::testing::{TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
+    utils::i32_to_f,
 };
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_rv32im_transpiler::LessThanOpcode;
+use openvm_instructions::LocalOpcode;
+use openvm_rv32im_transpiler::LessThanOpcode::{self, *};
 use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::{FieldAlgebra, PrimeField32},
@@ -20,20 +18,105 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::Rng;
+use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::{core::run_less_than, LessThanCoreChip, Rv32LessThanChip};
+use super::{core::run_less_than, LessThanCoreAir, Rv32LessThanChip};
 use crate::{
-    adapters::{Rv32BaseAluAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
+    adapters::{
+        Rv32BaseAluAdapterAir, Rv32BaseAluAdapterExecutor, Rv32BaseAluAdapterFiller,
+        RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+    },
     less_than::LessThanCoreCols,
-    test_utils::{generate_rv32_is_type_immediate, rv32_rand_write_register_or_imm},
+    test_utils::{
+        generate_rv32_is_type_immediate, get_verification_error, rv32_rand_write_register_or_imm,
+    },
+    LessThanFiller, Rv32LessThanAir, Rv32LessThanExecutor,
 };
 
 type F = BabyBear;
+const MAX_INS_CAPACITY: usize = 128;
+type Harness = TestChipHarness<F, Rv32LessThanExecutor, Rv32LessThanAir, Rv32LessThanChip<F>>;
+
+fn create_test_chip(
+    tester: &VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+    let air = Rv32LessThanAir::new(
+        Rv32BaseAluAdapterAir::new(
+            tester.execution_bridge(),
+            tester.memory_bridge(),
+            bitwise_bus,
+        ),
+        LessThanCoreAir::new(bitwise_bus, LessThanOpcode::CLASS_OFFSET),
+    );
+    let executor =
+        Rv32LessThanExecutor::new(Rv32BaseAluAdapterExecutor, LessThanOpcode::CLASS_OFFSET);
+    let chip = Rv32LessThanChip::<F>::new(
+        LessThanFiller::new(
+            Rv32BaseAluAdapterFiller::new(bitwise_chip.clone()),
+            bitwise_chip.clone(),
+            LessThanOpcode::CLASS_OFFSET,
+        ),
+        tester.memory_helper(),
+    );
+
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
+#[allow(clippy::too_many_arguments)]
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    opcode: LessThanOpcode,
+    b: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    is_imm: Option<bool>,
+    c: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+) {
+    let b = b.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX)));
+    let (c_imm, c) = if is_imm.unwrap_or(rng.gen_bool(0.5)) {
+        let (imm, c) = if let Some(c) = c {
+            ((u32::from_le_bytes(c) & 0xFFFFFF) as usize, c)
+        } else {
+            generate_rv32_is_type_immediate(rng)
+        };
+        (Some(imm), c)
+    } else {
+        (
+            None,
+            c.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX))),
+        )
+    };
+
+    let (instruction, rd) = rv32_rand_write_register_or_imm(
+        tester,
+        b,
+        c,
+        c_imm,
+        opcode.global_opcode().as_usize(),
+        rng,
+    );
+    tester.execute(harness, &instruction);
+
+    let (cmp, _, _, _) =
+        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode == SLT, &b, &c);
+    let mut a = [F::ZERO; RV32_REGISTER_NUM_LIMBS];
+    a[0] = F::from_bool(cmp);
+    assert_eq!(a, tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd));
+}
 
 //////////////////////////////////////////////////////////////////////////////////////
 // POSITIVE TESTS
@@ -42,100 +125,63 @@ type F = BabyBear;
 // passes all constraints.
 //////////////////////////////////////////////////////////////////////////////////////
 
+#[test_case(SLT, 100)]
+#[test_case(SLTU, 100)]
 fn run_rv32_lt_rand_test(opcode: LessThanOpcode, num_ops: usize) {
     let mut rng = create_seeded_rng();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32LessThanChip::<F>::new(
-        Rv32BaseAluAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-            bitwise_chip.clone(),
-        ),
-        LessThanCoreChip::new(bitwise_chip.clone(), LessThanOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, bitwise) = create_test_chip(&tester);
 
     for _ in 0..num_ops {
-        let b = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        let (c_imm, c) = if rng.gen_bool(0.5) {
-            (
-                None,
-                generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng),
-            )
-        } else {
-            let (imm, c) = generate_rv32_is_type_immediate(&mut rng);
-            (Some(imm), c)
-        };
-
-        let (instruction, rd) = rv32_rand_write_register_or_imm(
+        set_and_execute(
             &mut tester,
-            b,
-            c,
-            c_imm,
-            opcode.global_opcode().as_usize(),
+            &mut harness,
             &mut rng,
+            opcode,
+            None,
+            None,
+            None,
         );
-        tester.execute(&mut chip, &instruction);
-
-        let (cmp, _, _, _) =
-            run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c);
-        let mut a = [F::ZERO; RV32_REGISTER_NUM_LIMBS];
-        a[0] = F::from_bool(cmp);
-        assert_eq!(a, tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd));
     }
 
     // Test special case where b = c
     let b = [101, 128, 202, 255];
-    let (instruction, _) = rv32_rand_write_register_or_imm(
+    set_and_execute(
         &mut tester,
-        b,
-        b,
-        None,
-        opcode.global_opcode().as_usize(),
+        &mut harness,
         &mut rng,
+        opcode,
+        Some(b),
+        Some(false),
+        Some(b),
     );
-    tester.execute(&mut chip, &instruction);
 
     let b = [36, 0, 0, 0];
-    let (instruction, _) = rv32_rand_write_register_or_imm(
+    set_and_execute(
         &mut tester,
-        b,
-        b,
-        Some(36),
-        opcode.global_opcode().as_usize(),
+        &mut harness,
         &mut rng,
+        opcode,
+        Some(b),
+        Some(true),
+        Some(b),
     );
-    tester.execute(&mut chip, &instruction);
 
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn rv32_slt_rand_test() {
-    run_rv32_lt_rand_test(LessThanOpcode::SLT, 100);
-}
-
-#[test]
-fn rv32_sltu_rand_test() {
-    run_rv32_lt_rand_test(LessThanOpcode::SLTU, 100);
-}
-
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adapter is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-type Rv32LessThanTestChip<F> =
-    VmChipWrapper<F, TestAdapterChip<F>, LessThanCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
-
 #[derive(Clone, Copy, Default, PartialEq)]
 struct LessThanPrankValues<const NUM_LIMBS: usize> {
     pub b_msb: Option<i32>,
@@ -145,67 +191,29 @@ struct LessThanPrankValues<const NUM_LIMBS: usize> {
 }
 
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_lt_negative_test(
+fn run_negative_less_than_test(
     opcode: LessThanOpcode,
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    c: [u32; RV32_REGISTER_NUM_LIMBS],
-    cmp_result: bool,
+    b: [u8; RV32_REGISTER_NUM_LIMBS],
+    c: [u8; RV32_REGISTER_NUM_LIMBS],
+    prank_cmp_result: bool,
     prank_vals: LessThanPrankValues<RV32_REGISTER_NUM_LIMBS>,
     interaction_error: bool,
 ) {
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-
+    let mut rng = create_seeded_rng();
     let mut tester: VmChipTestBuilder<BabyBear> = VmChipTestBuilder::default();
-    let mut chip = Rv32LessThanTestChip::<F>::new(
-        TestAdapterChip::new(
-            vec![[b.map(F::from_canonical_u32), c.map(F::from_canonical_u32)].concat()],
-            vec![None],
-            ExecutionBridge::new(tester.execution_bus(), tester.program_bus()),
-        ),
-        LessThanCoreChip::new(bitwise_chip.clone(), LessThanOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, bitwise) = create_test_chip(&tester);
 
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(opcode.global_opcode(), [0, 0, 0, 1, 1]),
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(b),
+        Some(false),
+        Some(c),
     );
 
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
-    let (_, _, b_sign, c_sign) =
-        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c);
-
-    if prank_vals != LessThanPrankValues::default() {
-        debug_assert!(prank_vals.diff_val.is_some());
-        let b_msb = prank_vals.b_msb.unwrap_or(
-            b[RV32_REGISTER_NUM_LIMBS - 1] as i32 - if b_sign { 1 << RV32_CELL_BITS } else { 0 },
-        );
-        let c_msb = prank_vals.c_msb.unwrap_or(
-            c[RV32_REGISTER_NUM_LIMBS - 1] as i32 - if c_sign { 1 << RV32_CELL_BITS } else { 0 },
-        );
-        let sign_offset = if opcode == LessThanOpcode::SLT {
-            1 << (RV32_CELL_BITS - 1)
-        } else {
-            0
-        };
-
-        bitwise_chip.clear();
-        bitwise_chip.request_range(
-            (b_msb + sign_offset) as u8 as u32,
-            (c_msb + sign_offset) as u8 as u32,
-        );
-
-        let diff_val = prank_vals
-            .diff_val
-            .unwrap()
-            .clamp(0, (1 << RV32_CELL_BITS) - 1);
-        if diff_val > 0 {
-            bitwise_chip.request_range(diff_val - 1, 0);
-        }
-    };
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut values = trace.row_slice(0).to_vec();
         let cols: &mut LessThanCoreCols<F, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> =
@@ -223,22 +231,18 @@ fn run_rv32_lt_negative_test(
         if let Some(diff_val) = prank_vals.diff_val {
             cols.diff_val = F::from_canonical_u32(diff_val);
         }
-        cols.cmp_result = F::from_bool(cmp_result);
+        cols.cmp_result = F::from_bool(prank_cmp_result);
 
-        *trace = RowMajorMatrix::new(values, trace_width);
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
-        .load(bitwise_chip)
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
         .finalize();
-    tester.simple_test_with_expected_error(if interaction_error {
-        VerificationError::ChallengePhaseError
-    } else {
-        VerificationError::OodEvaluationMismatch
-    });
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
@@ -246,8 +250,8 @@ fn rv32_lt_wrong_false_cmp_negative_test() {
     let b = [145, 34, 25, 205];
     let c = [73, 35, 25, 205];
     let prank_vals = Default::default();
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, false, prank_vals, false);
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, false, prank_vals, false);
+    run_negative_less_than_test(SLT, b, c, false, prank_vals, false);
+    run_negative_less_than_test(SLTU, b, c, false, prank_vals, false);
 }
 
 #[test]
@@ -255,8 +259,8 @@ fn rv32_lt_wrong_true_cmp_negative_test() {
     let b = [73, 35, 25, 205];
     let c = [145, 34, 25, 205];
     let prank_vals = Default::default();
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, true, prank_vals, false);
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, true, prank_vals, false);
+    run_negative_less_than_test(SLT, b, c, true, prank_vals, false);
+    run_negative_less_than_test(SLTU, b, c, true, prank_vals, false);
 }
 
 #[test]
@@ -264,8 +268,8 @@ fn rv32_lt_wrong_eq_negative_test() {
     let b = [73, 35, 25, 205];
     let c = [73, 35, 25, 205];
     let prank_vals = Default::default();
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, true, prank_vals, false);
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, true, prank_vals, false);
+    run_negative_less_than_test(SLT, b, c, true, prank_vals, false);
+    run_negative_less_than_test(SLTU, b, c, true, prank_vals, false);
 }
 
 #[test]
@@ -276,8 +280,8 @@ fn rv32_lt_fake_diff_val_negative_test() {
         diff_val: Some(F::NEG_ONE.as_canonical_u32()),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, false, prank_vals, true);
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, false, prank_vals, true);
+    run_negative_less_than_test(SLT, b, c, false, prank_vals, true);
+    run_negative_less_than_test(SLTU, b, c, false, prank_vals, true);
 }
 
 #[test]
@@ -289,8 +293,8 @@ fn rv32_lt_zero_diff_val_negative_test() {
         diff_val: Some(0),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, false, prank_vals, true);
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, false, prank_vals, true);
+    run_negative_less_than_test(SLT, b, c, false, prank_vals, true);
+    run_negative_less_than_test(SLTU, b, c, false, prank_vals, true);
 }
 
 #[test]
@@ -302,8 +306,8 @@ fn rv32_lt_fake_diff_marker_negative_test() {
         diff_val: Some(72),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, false, prank_vals, false);
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, false, prank_vals, false);
+    run_negative_less_than_test(SLT, b, c, false, prank_vals, false);
+    run_negative_less_than_test(SLTU, b, c, false, prank_vals, false);
 }
 
 #[test]
@@ -315,8 +319,8 @@ fn rv32_lt_zero_diff_marker_negative_test() {
         diff_val: Some(0),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, false, prank_vals, false);
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, false, prank_vals, false);
+    run_negative_less_than_test(SLT, b, c, false, prank_vals, false);
+    run_negative_less_than_test(SLTU, b, c, false, prank_vals, false);
 }
 
 #[test]
@@ -329,7 +333,7 @@ fn rv32_slt_wrong_b_msb_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, false, prank_vals, false);
+    run_negative_less_than_test(SLT, b, c, false, prank_vals, false);
 }
 
 #[test]
@@ -342,7 +346,7 @@ fn rv32_slt_wrong_b_msb_sign_negative_test() {
         diff_val: Some(256),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, false, prank_vals, true);
+    run_negative_less_than_test(SLT, b, c, false, prank_vals, true);
 }
 
 #[test]
@@ -355,7 +359,7 @@ fn rv32_slt_wrong_c_msb_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, true, prank_vals, false);
+    run_negative_less_than_test(SLT, b, c, true, prank_vals, false);
 }
 
 #[test]
@@ -368,7 +372,7 @@ fn rv32_slt_wrong_c_msb_sign_negative_test() {
         diff_val: Some(256),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLT, b, c, true, prank_vals, true);
+    run_negative_less_than_test(SLT, b, c, true, prank_vals, true);
 }
 
 #[test]
@@ -381,7 +385,7 @@ fn rv32_sltu_wrong_b_msb_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, true, prank_vals, false);
+    run_negative_less_than_test(SLTU, b, c, true, prank_vals, false);
 }
 
 #[test]
@@ -394,7 +398,7 @@ fn rv32_sltu_wrong_b_msb_sign_negative_test() {
         diff_val: Some(256),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, true, prank_vals, true);
+    run_negative_less_than_test(SLTU, b, c, true, prank_vals, true);
 }
 
 #[test]
@@ -407,7 +411,7 @@ fn rv32_sltu_wrong_c_msb_negative_test() {
         diff_val: Some(1),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, false, prank_vals, false);
+    run_negative_less_than_test(SLTU, b, c, false, prank_vals, false);
 }
 
 #[test]
@@ -420,7 +424,7 @@ fn rv32_sltu_wrong_c_msb_sign_negative_test() {
         diff_val: Some(256),
         ..Default::default()
     };
-    run_rv32_lt_negative_test(LessThanOpcode::SLTU, b, c, false, prank_vals, true);
+    run_negative_less_than_test(SLTU, b, c, false, prank_vals, true);
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////
@@ -431,10 +435,10 @@ fn rv32_sltu_wrong_c_msb_sign_negative_test() {
 
 #[test]
 fn run_sltu_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [73, 35, 25, 205];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [73, 35, 25, 205];
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(LessThanOpcode::SLTU, &x, &y);
+        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(false, &x, &y);
     assert!(cmp_result);
     assert_eq!(diff_idx, 1);
     assert!(!x_sign); // unsigned
@@ -443,10 +447,10 @@ fn run_sltu_sanity_test() {
 
 #[test]
 fn run_slt_same_sign_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [73, 35, 25, 205];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [145, 34, 25, 205];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [73, 35, 25, 205];
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(LessThanOpcode::SLT, &x, &y);
+        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(true, &x, &y);
     assert!(cmp_result);
     assert_eq!(diff_idx, 1);
     assert!(x_sign); // negative
@@ -455,10 +459,10 @@ fn run_slt_same_sign_sanity_test() {
 
 #[test]
 fn run_slt_diff_sign_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [45, 35, 25, 55];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [173, 34, 25, 205];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [45, 35, 25, 55];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [173, 34, 25, 205];
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(LessThanOpcode::SLT, &x, &y);
+        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(true, &x, &y);
     assert!(!cmp_result);
     assert_eq!(diff_idx, 3);
     assert!(!x_sign); // positive
@@ -467,9 +471,9 @@ fn run_slt_diff_sign_sanity_test() {
 
 #[test]
 fn run_less_than_equal_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [45, 35, 25, 55];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [45, 35, 25, 55];
     let (cmp_result, diff_idx, x_sign, y_sign) =
-        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(LessThanOpcode::SLT, &x, &x);
+        run_less_than::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(true, &x, &x);
     assert!(!cmp_result);
     assert_eq!(diff_idx, RV32_REGISTER_NUM_LIMBS);
     assert!(!x_sign); // positive
diff --git a/extensions/rv32im/circuit/src/lib.rs b/extensions/rv32im/circuit/src/lib.rs
index 2006b27038..6224c0450a 100644
--- a/extensions/rv32im/circuit/src/lib.rs
+++ b/extensions/rv32im/circuit/src/lib.rs
@@ -1,5 +1,20 @@
-pub mod adapters;
+use openvm_circuit::{
+    arch::{
+        AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, SystemConfig,
+        VmBuilder, VmChipComplex, VmProverExtension,
+    },
+    system::{SystemChipInventory, SystemCpuBuilder, SystemExecutor},
+};
+use openvm_circuit_derive::{Executor, PreflightExecutor, VmConfig};
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    engine::StarkEngine,
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use serde::{Deserialize, Serialize};
 
+pub mod adapters;
 mod auipc;
 mod base_alu;
 mod branch_eq;
@@ -35,3 +50,137 @@ pub use extension::*;
 
 #[cfg(any(test, feature = "test-utils"))]
 mod test_utils;
+
+// Config for a VM with base extension and IO extension
+#[derive(Clone, Debug, derive_new::new, VmConfig, Serialize, Deserialize)]
+pub struct Rv32IConfig {
+    #[config(executor = "SystemExecutor<F>")]
+    pub system: SystemConfig,
+    #[extension]
+    pub base: Rv32I,
+    #[extension]
+    pub io: Rv32Io,
+}
+
+// Default implementation uses no init file
+impl InitFileGenerator for Rv32IConfig {}
+
+/// Config for a VM with base extension, IO extension, and multiplication extension
+#[derive(Clone, Debug, Default, VmConfig, derive_new::new, Serialize, Deserialize)]
+pub struct Rv32ImConfig {
+    #[config]
+    pub rv32i: Rv32IConfig,
+    #[extension]
+    pub mul: Rv32M,
+}
+
+// Default implementation uses no init file
+impl InitFileGenerator for Rv32ImConfig {}
+
+impl Default for Rv32IConfig {
+    fn default() -> Self {
+        let system = SystemConfig::default();
+        Self {
+            system,
+            base: Default::default(),
+            io: Default::default(),
+        }
+    }
+}
+
+impl Rv32IConfig {
+    pub fn with_public_values(public_values: usize) -> Self {
+        let system = SystemConfig::default().with_public_values(public_values);
+        Self {
+            system,
+            base: Default::default(),
+            io: Default::default(),
+        }
+    }
+
+    pub fn with_public_values_and_segment_len(public_values: usize, segment_len: usize) -> Self {
+        let system = SystemConfig::default()
+            .with_public_values(public_values)
+            .with_max_segment_len(segment_len);
+        Self {
+            system,
+            base: Default::default(),
+            io: Default::default(),
+        }
+    }
+}
+
+impl Rv32ImConfig {
+    pub fn with_public_values(public_values: usize) -> Self {
+        Self {
+            rv32i: Rv32IConfig::with_public_values(public_values),
+            mul: Default::default(),
+        }
+    }
+
+    pub fn with_public_values_and_segment_len(public_values: usize, segment_len: usize) -> Self {
+        Self {
+            rv32i: Rv32IConfig::with_public_values_and_segment_len(public_values, segment_len),
+            mul: Default::default(),
+        }
+    }
+}
+
+#[derive(Clone)]
+pub struct Rv32ICpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Rv32ICpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Rv32IConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Rv32IConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.base, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.io, inventory)?;
+        Ok(chip_complex)
+    }
+}
+
+#[derive(Clone)]
+pub struct Rv32ImCpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Rv32ImCpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Rv32ImConfig;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Self::VmConfig,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&Rv32ICpuBuilder, &config.rv32i, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.mul, inventory)?;
+        Ok(chip_complex)
+    }
+}
diff --git a/extensions/rv32im/circuit/src/load_sign_extend/core.rs b/extensions/rv32im/circuit/src/load_sign_extend/core.rs
index 2284d6815c..b1663bcf1e 100644
--- a/extensions/rv32im/circuit/src/load_sign_extend/core.rs
+++ b/extensions/rv32im/circuit/src/load_sign_extend/core.rs
@@ -3,15 +3,22 @@ use std::{
     borrow::{Borrow, BorrowMut},
 };
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, Result, VmAdapterInterface, VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
     utils::select,
     var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
 use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -19,10 +26,8 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use serde_big_array::BigArray;
 
-use crate::adapters::LoadStoreInstruction;
+use crate::adapters::{LoadStoreInstruction, Rv32LoadStoreAdapterFiller};
 
 /// LoadSignExtend Core Chip handles byte/halfword into word conversions through sign extend
 /// This chip uses read_data to construct write_data
@@ -46,20 +51,7 @@ pub struct LoadSignExtendCoreCols<T, const NUM_CELLS: usize> {
     pub prev_data: [T; NUM_CELLS],
 }
 
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Serialize + DeserializeOwned")]
-pub struct LoadSignExtendCoreRecord<F, const NUM_CELLS: usize> {
-    #[serde(with = "BigArray")]
-    pub shifted_read_data: [F; NUM_CELLS],
-    #[serde(with = "BigArray")]
-    pub prev_data: [F; NUM_CELLS],
-    pub opcode: Rv32LoadStoreOpcode,
-    pub shift_amount: u32,
-    pub most_sig_bit: bool,
-}
-
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, derive_new::new)]
 pub struct LoadSignExtendCoreAir<const NUM_CELLS: usize, const LIMB_BITS: usize> {
     pub range_bus: VariableRangeCheckerBus,
 }
@@ -178,135 +170,163 @@ where
     }
 }
 
-pub struct LoadSignExtendCoreChip<const NUM_CELLS: usize, const LIMB_BITS: usize> {
-    pub air: LoadSignExtendCoreAir<NUM_CELLS, LIMB_BITS>,
-    pub range_checker_chip: SharedVariableRangeCheckerChip,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct LoadSignExtendCoreRecord<const NUM_CELLS: usize> {
+    pub is_byte: bool,
+    pub shift_amount: u8,
+    pub read_data: [u8; NUM_CELLS],
+    pub prev_data: [u8; NUM_CELLS],
 }
 
-impl<const NUM_CELLS: usize, const LIMB_BITS: usize> LoadSignExtendCoreChip<NUM_CELLS, LIMB_BITS> {
-    pub fn new(range_checker_chip: SharedVariableRangeCheckerChip) -> Self {
-        Self {
-            air: LoadSignExtendCoreAir::<NUM_CELLS, LIMB_BITS> {
-                range_bus: range_checker_chip.bus(),
-            },
-            range_checker_chip,
-        }
-    }
+#[derive(Clone, Copy, derive_new::new)]
+pub struct LoadSignExtendExecutor<A, const NUM_CELLS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+}
+
+#[derive(Clone, derive_new::new)]
+pub struct LoadSignExtendFiller<
+    A = Rv32LoadStoreAdapterFiller,
+    const NUM_CELLS: usize = RV32_REGISTER_NUM_LIMBS,
+    const LIMB_BITS: usize = RV32_CELL_BITS,
+> {
+    adapter: A,
+    pub range_checker_chip: SharedVariableRangeCheckerChip,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_CELLS: usize, const LIMB_BITS: usize>
-    VmCoreChip<F, I> for LoadSignExtendCoreChip<NUM_CELLS, LIMB_BITS>
+impl<F, A, RA, const NUM_CELLS: usize, const LIMB_BITS: usize> PreflightExecutor<F, RA>
+    for LoadSignExtendExecutor<A, NUM_CELLS, LIMB_BITS>
 where
-    I::Reads: Into<([[F; NUM_CELLS]; 2], F)>,
-    I::Writes: From<[[F; NUM_CELLS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData = (([u32; NUM_CELLS], [u8; NUM_CELLS]), u8),
+            WriteData = [u32; NUM_CELLS],
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut LoadSignExtendCoreRecord<NUM_CELLS>,
+        ),
+    >,
 {
-    type Record = LoadSignExtendCoreRecord<F, NUM_CELLS>;
-    type Air = LoadSignExtendCoreAir<NUM_CELLS, LIMB_BITS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            Rv32LoadStoreOpcode::from_usize(opcode - Rv32LoadStoreOpcode::CLASS_OFFSET)
+        )
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
+    ) -> Result<(), ExecutionError> {
+        let Instruction { opcode, .. } = instruction;
+
         let local_opcode = Rv32LoadStoreOpcode::from_usize(
-            instruction
-                .opcode
-                .local_opcode_idx(Rv32LoadStoreOpcode::CLASS_OFFSET),
+            opcode.local_opcode_idx(Rv32LoadStoreOpcode::CLASS_OFFSET),
         );
 
-        let (data, shift_amount) = reads.into();
-        let shift_amount = shift_amount.as_canonical_u32();
-        let write_data: [F; NUM_CELLS] = run_write_data_sign_extend::<_, NUM_CELLS, LIMB_BITS>(
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        let tmp = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record);
+
+        core_record.is_byte = local_opcode == LOADB;
+        core_record.prev_data = tmp.0 .0.map(|x| x as u8);
+        core_record.read_data = tmp.0 .1;
+        core_record.shift_amount = tmp.1;
+
+        let write_data = run_write_data_sign_extend(
             local_opcode,
-            data[1],
-            data[0],
-            shift_amount,
+            core_record.read_data,
+            core_record.shift_amount as usize,
         );
-        let output = AdapterRuntimeContext::without_pc([write_data]);
 
-        let most_sig_limb = match local_opcode {
-            LOADB => write_data[0],
-            LOADH => write_data[NUM_CELLS / 2 - 1],
-            _ => unreachable!(),
-        }
-        .as_canonical_u32();
+        self.adapter.write(
+            state.memory,
+            instruction,
+            write_data.map(u32::from),
+            &mut adapter_record,
+        );
 
-        let most_sig_bit = most_sig_limb & (1 << (LIMB_BITS - 1));
-        self.range_checker_chip
-            .add_count(most_sig_limb - most_sig_bit, LIMB_BITS - 1);
-
-        let read_shift = shift_amount & 2;
-
-        Ok((
-            output,
-            LoadSignExtendCoreRecord {
-                opcode: local_opcode,
-                most_sig_bit: most_sig_bit != 0,
-                prev_data: data[0],
-                shifted_read_data: array::from_fn(|i| {
-                    data[1][(i + read_shift as usize) % NUM_CELLS]
-                }),
-                shift_amount,
-            },
-        ))
-    }
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            Rv32LoadStoreOpcode::from_usize(opcode - Rv32LoadStoreOpcode::CLASS_OFFSET)
-        )
+        Ok(())
     }
+}
+
+impl<F, A, const NUM_CELLS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for LoadSignExtendFiller<A, NUM_CELLS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &LoadSignExtendCoreRecord<NUM_CELLS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+
+        let core_row: &mut LoadSignExtendCoreCols<F, NUM_CELLS> = core_row.borrow_mut();
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let core_cols: &mut LoadSignExtendCoreCols<F, NUM_CELLS> = row_slice.borrow_mut();
-        let opcode = record.opcode;
         let shift = record.shift_amount;
-        core_cols.opcode_loadb_flag0 = F::from_bool(opcode == LOADB && (shift & 1) == 0);
-        core_cols.opcode_loadb_flag1 = F::from_bool(opcode == LOADB && (shift & 1) == 1);
-        core_cols.opcode_loadh_flag = F::from_bool(opcode == LOADH);
-        core_cols.shift_most_sig_bit = F::from_canonical_u32((shift & 2) >> 1);
-        core_cols.data_most_sig_bit = F::from_bool(record.most_sig_bit);
-        core_cols.prev_data = record.prev_data;
-        core_cols.shifted_read_data = record.shifted_read_data;
-    }
+        let most_sig_limb = if record.is_byte {
+            record.read_data[shift as usize]
+        } else {
+            record.read_data[NUM_CELLS / 2 - 1 + shift as usize]
+        };
+
+        let most_sig_bit = most_sig_limb & (1 << 7);
+        self.range_checker_chip
+            .add_count((most_sig_limb - most_sig_bit) as u32, 7);
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.prev_data = record.prev_data.map(F::from_canonical_u8);
+        core_row.shifted_read_data = record.read_data.map(F::from_canonical_u8);
+        core_row.shifted_read_data.rotate_left((shift & 2) as usize);
+
+        core_row.data_most_sig_bit = F::from_bool(most_sig_bit != 0);
+        core_row.shift_most_sig_bit = F::from_bool(shift & 2 == 2);
+        core_row.opcode_loadh_flag = F::from_bool(!record.is_byte);
+        core_row.opcode_loadb_flag1 = F::from_bool(record.is_byte && ((shift & 1) == 1));
+        core_row.opcode_loadb_flag0 = F::from_bool(record.is_byte && ((shift & 1) == 0));
     }
 }
 
-pub(super) fn run_write_data_sign_extend<
-    F: PrimeField32,
-    const NUM_CELLS: usize,
-    const LIMB_BITS: usize,
->(
+// Returns write_data
+#[inline(always)]
+pub(super) fn run_write_data_sign_extend<const NUM_CELLS: usize>(
     opcode: Rv32LoadStoreOpcode,
-    read_data: [F; NUM_CELLS],
-    _prev_data: [F; NUM_CELLS],
-    shift: u32,
-) -> [F; NUM_CELLS] {
-    let shift = shift as usize;
-    let mut write_data = read_data;
+    read_data: [u8; NUM_CELLS],
+    shift: usize,
+) -> [u8; NUM_CELLS] {
     match (opcode, shift) {
         (LOADH, 0) | (LOADH, 2) => {
-            let ext = read_data[NUM_CELLS / 2 - 1 + shift].as_canonical_u32();
-            let ext = (ext >> (LIMB_BITS - 1)) * ((1 << LIMB_BITS) - 1);
-            for cell in write_data.iter_mut().take(NUM_CELLS).skip(NUM_CELLS / 2) {
-                *cell = F::from_canonical_u32(ext);
-            }
-            write_data[0..NUM_CELLS / 2]
-                .copy_from_slice(&read_data[shift..(NUM_CELLS / 2 + shift)]);
+            let ext = (read_data[NUM_CELLS / 2 - 1 + shift] >> 7) * u8::MAX;
+            array::from_fn(|i| {
+                if i < NUM_CELLS / 2 {
+                    read_data[i + shift]
+                } else {
+                    ext
+                }
+            })
         }
         (LOADB, 0) | (LOADB, 1) | (LOADB, 2) | (LOADB, 3) => {
-            let ext = read_data[shift].as_canonical_u32();
-            let ext = (ext >> (LIMB_BITS - 1)) * ((1 << LIMB_BITS) - 1);
-            for cell in write_data.iter_mut().take(NUM_CELLS).skip(1) {
-                *cell = F::from_canonical_u32(ext);
-            }
-            write_data[0] = read_data[shift];
+            let ext = (read_data[shift] >> 7) * u8::MAX;
+            array::from_fn(|i| {
+                if i == 0 {
+                    read_data[i + shift]
+                } else {
+                    ext
+                }
+            })
         }
         // Currently the adapter AIR requires `ptr_val` to be aligned to the data size in bytes.
         // The circuit requires that `shift = ptr_val % 4` so that `ptr_val - shift` is a multiple of 4.
@@ -314,6 +334,5 @@ pub(super) fn run_write_data_sign_extend<
         _ => unreachable!(
             "unaligned memory access not supported by this execution environment: {opcode:?}, shift: {shift}"
         ),
-    };
-    write_data
+    }
 }
diff --git a/extensions/rv32im/circuit/src/load_sign_extend/execution.rs b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs
new file mode 100644
index 0000000000..43f11a33a7
--- /dev/null
+++ b/extensions/rv32im/circuit/src/load_sign_extend/execution.rs
@@ -0,0 +1,215 @@
+use std::{
+    array,
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, ExecutionError, Executor,
+        MeteredExecutionCtxTrait, MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::{online::GuestMemory, POINTER_MAX_BITS},
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_IMM_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::LoadSignExtendExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct LoadSignExtendPreCompute {
+    imm_extended: u32,
+    a: u8,
+    b: u8,
+    e: u8,
+}
+
+impl<A, const LIMB_BITS: usize> LoadSignExtendExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS> {
+    /// Return (is_loadb, enabled)
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut LoadSignExtendPreCompute,
+    ) -> Result<(bool, bool), StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g,
+            ..
+        } = inst;
+
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 == RV32_IMM_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let local_opcode = Rv32LoadStoreOpcode::from_usize(
+            opcode.local_opcode_idx(Rv32LoadStoreOpcode::CLASS_OFFSET),
+        );
+        match local_opcode {
+            LOADB | LOADH => {}
+            _ => unreachable!("LoadSignExtendExecutor should only handle LOADB/LOADH opcodes"),
+        }
+
+        let imm = c.as_canonical_u32();
+        let imm_sign = g.as_canonical_u32();
+        let imm_extended = imm + imm_sign * 0xffff0000;
+
+        *data = LoadSignExtendPreCompute {
+            imm_extended,
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            e: e_u32 as u8,
+        };
+        let enabled = !f.is_zero();
+        Ok((local_opcode == LOADB, enabled))
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> Executor<F>
+    for LoadSignExtendExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn pre_compute_size(&self) -> usize {
+        size_of::<LoadSignExtendPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut LoadSignExtendPreCompute = data.borrow_mut();
+        let (is_loadb, enabled) = self.pre_compute_impl(pc, inst, pre_compute)?;
+        let fn_ptr = match (is_loadb, enabled) {
+            (true, true) => execute_e1_impl::<_, _, true, true>,
+            (true, false) => execute_e1_impl::<_, _, true, false>,
+            (false, true) => execute_e1_impl::<_, _, false, true>,
+            (false, false) => execute_e1_impl::<_, _, false, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> MeteredExecutor<F>
+    for LoadSignExtendExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<LoadSignExtendPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<LoadSignExtendPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+        let (is_loadb, enabled) = self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+        let fn_ptr = match (is_loadb, enabled) {
+            (true, true) => execute_e2_impl::<_, _, true, true>,
+            (true, false) => execute_e2_impl::<_, _, true, false>,
+            (false, true) => execute_e2_impl::<_, _, false, true>,
+            (false, false) => execute_e2_impl::<_, _, false, false>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const IS_LOADB: bool,
+    const ENABLED: bool,
+>(
+    pre_compute: &LoadSignExtendPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1_bytes: [u8; RV32_REGISTER_NUM_LIMBS] =
+        vm_state.vm_read(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs1_val = u32::from_le_bytes(rs1_bytes);
+    let ptr_val = rs1_val.wrapping_add(pre_compute.imm_extended);
+    // sign_extend([r32{c,g}(b):2]_e)`
+    debug_assert!(ptr_val < (1 << POINTER_MAX_BITS));
+    let shift_amount = ptr_val % 4;
+    let ptr_val = ptr_val - shift_amount; // aligned ptr
+
+    let read_data: [u8; RV32_REGISTER_NUM_LIMBS] = vm_state.vm_read(pre_compute.e as u32, ptr_val);
+
+    let write_data = if IS_LOADB {
+        let byte = read_data[shift_amount as usize];
+        let sign_extended = (byte as i8) as i32;
+        sign_extended.to_le_bytes()
+    } else {
+        if shift_amount != 0 && shift_amount != 2 {
+            vm_state.exit_code = Err(ExecutionError::Fail {
+                pc: vm_state.pc,
+                msg: "LoadSignExtend invalid shift amount",
+            });
+            return;
+        }
+        let half: [u8; 2] = array::from_fn(|i| read_data[shift_amount as usize + i]);
+        (i16::from_le_bytes(half) as i32).to_le_bytes()
+    };
+
+    if ENABLED {
+        vm_state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &write_data);
+    }
+
+    vm_state.pc += DEFAULT_PC_STEP;
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const IS_LOADB: bool,
+    const ENABLED: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &LoadSignExtendPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, IS_LOADB, ENABLED>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const IS_LOADB: bool,
+    const ENABLED: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<LoadSignExtendPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, IS_LOADB, ENABLED>(&pre_compute.data, vm_state);
+}
diff --git a/extensions/rv32im/circuit/src/load_sign_extend/mod.rs b/extensions/rv32im/circuit/src/load_sign_extend/mod.rs
index 79efbe912e..b788fc2f73 100644
--- a/extensions/rv32im/circuit/src/load_sign_extend/mod.rs
+++ b/extensions/rv32im/circuit/src/load_sign_extend/mod.rs
@@ -1,16 +1,19 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
 use super::adapters::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
-use crate::adapters::Rv32LoadStoreAdapterChip;
+use crate::adapters::{Rv32LoadStoreAdapterAir, Rv32LoadStoreAdapterExecutor};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
-pub type Rv32LoadSignExtendChip<F> = VmChipWrapper<
-    F,
-    Rv32LoadStoreAdapterChip<F>,
-    LoadSignExtendCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+pub type Rv32LoadSignExtendAir = VmAirWrapper<
+    Rv32LoadStoreAdapterAir,
+    LoadSignExtendCoreAir<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
 >;
+pub type Rv32LoadSignExtendExecutor =
+    LoadSignExtendExecutor<Rv32LoadStoreAdapterExecutor, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>;
+pub type Rv32LoadSignExtendChip<F> = VmChipWrapper<F, LoadSignExtendFiller>;
diff --git a/extensions/rv32im/circuit/src/load_sign_extend/tests.rs b/extensions/rv32im/circuit/src/load_sign_extend/tests.rs
index 0fe6d859d1..39c1f378ae 100644
--- a/extensions/rv32im/circuit/src/load_sign_extend/tests.rs
+++ b/extensions/rv32im/circuit/src/load_sign_extend/tests.rs
@@ -1,9 +1,6 @@
 use std::{array, borrow::BorrowMut};
 
-use openvm_circuit::arch::{
-    testing::{memory::gen_pointer, VmChipTestBuilder},
-    VmAdapterChip,
-};
+use openvm_circuit::arch::testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder};
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
 use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *};
 use openvm_stark_backend::{
@@ -14,82 +11,104 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
 };
-use openvm_stark_sdk::{config::setup_tracing, p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::run_write_data_sign_extend;
+use super::{run_write_data_sign_extend, LoadSignExtendCoreAir};
 use crate::{
-    adapters::{compose, Rv32LoadStoreAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
+    adapters::{
+        Rv32LoadStoreAdapterAir, Rv32LoadStoreAdapterExecutor, Rv32LoadStoreAdapterFiller,
+        RV32_REGISTER_NUM_LIMBS,
+    },
     load_sign_extend::LoadSignExtendCoreCols,
-    LoadSignExtendCoreChip, Rv32LoadSignExtendChip,
+    test_utils::get_verification_error,
+    LoadSignExtendFiller, Rv32LoadSignExtendAir, Rv32LoadSignExtendChip,
+    Rv32LoadSignExtendExecutor,
 };
 
 const IMM_BITS: usize = 16;
-
+const MAX_INS_CAPACITY: usize = 128;
+type Harness = TestChipHarness<
+    F,
+    Rv32LoadSignExtendExecutor,
+    Rv32LoadSignExtendAir,
+    Rv32LoadSignExtendChip<F>,
+>;
 type F = BabyBear;
 
-fn into_limbs<const NUM_LIMBS: usize, const LIMB_BITS: usize>(num: u32) -> [u32; NUM_LIMBS] {
-    array::from_fn(|i| (num >> (LIMB_BITS * i)) & ((1 << LIMB_BITS) - 1))
+fn create_test_chip(tester: &mut VmChipTestBuilder<F>) -> Harness {
+    let range_checker_chip = tester.range_checker().clone();
+    let air = Rv32LoadSignExtendAir::new(
+        Rv32LoadStoreAdapterAir::new(
+            tester.memory_bridge(),
+            tester.execution_bridge(),
+            range_checker_chip.bus(),
+            tester.address_bits(),
+        ),
+        LoadSignExtendCoreAir::new(range_checker_chip.bus()),
+    );
+    let executor =
+        Rv32LoadSignExtendExecutor::new(Rv32LoadStoreAdapterExecutor::new(tester.address_bits()));
+    let chip = Rv32LoadSignExtendChip::<F>::new(
+        LoadSignExtendFiller::new(
+            Rv32LoadStoreAdapterFiller::new(tester.address_bits(), range_checker_chip.clone()),
+            range_checker_chip.clone(),
+        ),
+        tester.memory_helper(),
+    );
+
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
 }
 
 #[allow(clippy::too_many_arguments)]
 fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut Rv32LoadSignExtendChip<F>,
+    harness: &mut Harness,
     rng: &mut StdRng,
     opcode: Rv32LoadStoreOpcode,
-    read_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
-    rs1: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    read_data: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    rs1: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
     imm: Option<u32>,
     imm_sign: Option<u32>,
 ) {
     let imm = imm.unwrap_or(rng.gen_range(0..(1 << IMM_BITS)));
     let imm_sign = imm_sign.unwrap_or(rng.gen_range(0..2));
-    let imm_ext = imm + imm_sign * (0xffffffff ^ ((1 << IMM_BITS) - 1));
+    let imm_ext = imm + imm_sign * (0xffff0000);
 
     let alignment = match opcode {
         LOADB => 0,
         LOADH => 1,
         _ => unreachable!(),
     };
-    let ptr_val = rng.gen_range(
-        0..(1
-            << (tester
-                .memory_controller()
-                .borrow()
-                .mem_config()
-                .pointer_max_bits
-                - alignment)),
-    ) << alignment;
-
-    let rs1 = rs1
-        .unwrap_or(into_limbs::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-            (ptr_val as u32).wrapping_sub(imm_ext),
-        ))
-        .map(F::from_canonical_u32);
+
+    let ptr_val: u32 = rng.gen_range(0..(1 << (tester.address_bits() - alignment))) << alignment;
+    let rs1 = rs1.unwrap_or(ptr_val.wrapping_sub(imm_ext).to_le_bytes());
+    let ptr_val = imm_ext.wrapping_add(u32::from_le_bytes(rs1));
     let a = gen_pointer(rng, 4);
     let b = gen_pointer(rng, 4);
 
-    let ptr_val = imm_ext.wrapping_add(compose(rs1));
     let shift_amount = ptr_val % 4;
-    tester.write(1, b, rs1);
+    tester.write(1, b, rs1.map(F::from_canonical_u8));
 
     let some_prev_data: [F; RV32_REGISTER_NUM_LIMBS] = if a != 0 {
-        array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..(1 << RV32_CELL_BITS))))
+        array::from_fn(|_| F::from_canonical_u8(rng.gen()))
     } else {
         [F::ZERO; RV32_REGISTER_NUM_LIMBS]
     };
-    let read_data: [F; RV32_REGISTER_NUM_LIMBS] = read_data
-        .unwrap_or(array::from_fn(|_| rng.gen_range(0..(1 << RV32_CELL_BITS))))
-        .map(F::from_canonical_u32);
+    let read_data: [u8; RV32_REGISTER_NUM_LIMBS] =
+        read_data.unwrap_or(array::from_fn(|_| rng.gen()));
 
     tester.write(1, a, some_prev_data);
-    tester.write(2, (ptr_val - shift_amount) as usize, read_data);
+    tester.write(
+        2,
+        (ptr_val - shift_amount) as usize,
+        read_data.map(F::from_canonical_u8),
+    );
 
     tester.execute(
-        chip,
+        harness,
         &Instruction::from_usize(
             opcode.global_opcode(),
             [
@@ -104,16 +123,11 @@ fn set_and_execute(
         ),
     );
 
-    let write_data = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        opcode,
-        read_data,
-        some_prev_data,
-        shift_amount,
-    );
+    let write_data = run_write_data_sign_extend(opcode, read_data, shift_amount as usize);
     if a != 0 {
-        assert_eq!(write_data, tester.read::<4>(1, a));
+        assert_eq!(write_data.map(F::from_canonical_u8), tester.read::<4>(1, a));
     } else {
-        assert_eq!([F::ZERO; RV32_REGISTER_NUM_LIMBS], tester.read::<4>(1, a));
+        assert_eq!([F::ZERO; 4], tester.read::<4>(1, a));
     }
 }
 
@@ -123,40 +137,19 @@ fn set_and_execute(
 /// Randomly generate computations and execute, ensuring that the generated trace
 /// passes all constraints.
 ///////////////////////////////////////////////////////////////////////////////////////
-#[test]
-fn rand_load_sign_extend_test() {
-    setup_tracing();
+#[test_case(LOADB, 100)]
+#[test_case(LOADH, 100)]
+fn rand_load_sign_extend_test(opcode: Rv32LoadStoreOpcode, num_ops: usize) {
     let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let adapter = Rv32LoadStoreAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        range_checker_chip.clone(),
-    );
-    let core = LoadSignExtendCoreChip::new(range_checker_chip);
-    let mut chip =
-        Rv32LoadSignExtendChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
 
-    let num_tests: usize = 100;
-    for _ in 0..num_tests {
+    let mut harness = create_test_chip(&mut tester);
+    for _ in 0..num_ops {
         set_and_execute(
             &mut tester,
-            &mut chip,
+            &mut harness,
             &mut rng,
-            LOADB,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADH,
+            opcode,
             None,
             None,
             None,
@@ -164,7 +157,7 @@ fn rand_load_sign_extend_test() {
         );
     }
 
-    let tester = tester.build().load(chip).finalize();
+    let tester = tester.build().load(harness).finalize();
     tester.simple_test().expect("Verification failed");
 }
 
@@ -172,40 +165,33 @@ fn rand_load_sign_extend_test() {
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adaptor is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-#[allow(clippy::too_many_arguments)]
-fn run_negative_loadstore_test(
-    opcode: Rv32LoadStoreOpcode,
-    read_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+#[derive(Clone, Copy, Default, PartialEq)]
+struct LoadSignExtPrankValues {
     data_most_sig_bit: Option<u32>,
     shift_most_sig_bit: Option<u32>,
     opcode_flags: Option<[bool; 3]>,
-    rs1: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+}
+
+#[allow(clippy::too_many_arguments)]
+fn run_negative_load_sign_extend_test(
+    opcode: Rv32LoadStoreOpcode,
+    read_data: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    rs1: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
     imm: Option<u32>,
     imm_sign: Option<u32>,
-    expected_error: VerificationError,
+    prank_vals: LoadSignExtPrankValues,
+    interaction_error: bool,
 ) {
     let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let adapter = Rv32LoadStoreAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        range_checker_chip.clone(),
-    );
-    let core = LoadSignExtendCoreChip::new(range_checker_chip.clone());
-    let adapter_width = BaseAir::<F>::width(adapter.air());
-    let mut chip =
-        Rv32LoadSignExtendChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
+    let mut harness = create_test_chip(&mut tester);
 
     set_and_execute(
         &mut tester,
-        &mut chip,
+        &mut harness,
         &mut rng,
         opcode,
         read_data,
@@ -214,78 +200,78 @@ fn run_negative_loadstore_test(
         imm_sign,
     );
 
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut trace_row = trace.row_slice(0).to_vec();
-
         let (_, core_row) = trace_row.split_at_mut(adapter_width);
 
         let core_cols: &mut LoadSignExtendCoreCols<F, RV32_REGISTER_NUM_LIMBS> =
             core_row.borrow_mut();
-
         if let Some(shifted_read_data) = read_data {
-            core_cols.shifted_read_data = shifted_read_data.map(F::from_canonical_u32);
+            core_cols.shifted_read_data = shifted_read_data.map(F::from_canonical_u8);
         }
-
-        if let Some(data_most_sig_bit) = data_most_sig_bit {
+        if let Some(data_most_sig_bit) = prank_vals.data_most_sig_bit {
             core_cols.data_most_sig_bit = F::from_canonical_u32(data_most_sig_bit);
         }
-        if let Some(shift_most_sig_bit) = shift_most_sig_bit {
+        if let Some(shift_most_sig_bit) = prank_vals.shift_most_sig_bit {
             core_cols.shift_most_sig_bit = F::from_canonical_u32(shift_most_sig_bit);
         }
-
-        if let Some(opcode_flags) = opcode_flags {
+        if let Some(opcode_flags) = prank_vals.opcode_flags {
             core_cols.opcode_loadb_flag0 = F::from_bool(opcode_flags[0]);
             core_cols.opcode_loadb_flag1 = F::from_bool(opcode_flags[1]);
             core_cols.opcode_loadh_flag = F::from_bool(opcode_flags[2]);
         }
+
         *trace = RowMajorMatrix::new(trace_row, trace.width());
     };
 
-    drop(range_checker_chip);
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
+        .load_and_prank_trace(harness, modify_trace)
         .finalize();
-    tester.simple_test_with_expected_error(expected_error);
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
 fn loadstore_negative_tests() {
-    run_negative_loadstore_test(
+    run_negative_load_sign_extend_test(
         LOADB,
         Some([233, 187, 145, 238]),
-        Some(0),
         None,
         None,
         None,
-        None,
-        None,
-        VerificationError::ChallengePhaseError,
+        LoadSignExtPrankValues {
+            data_most_sig_bit: Some(0),
+            ..Default::default()
+        },
+        true,
     );
 
-    run_negative_loadstore_test(
+    run_negative_load_sign_extend_test(
         LOADH,
         None,
-        None,
-        Some(0),
-        None,
         Some([202, 109, 183, 26]),
         Some(31212),
         None,
-        VerificationError::ChallengePhaseError,
+        LoadSignExtPrankValues {
+            shift_most_sig_bit: Some(0),
+            ..Default::default()
+        },
+        true,
     );
 
-    run_negative_loadstore_test(
+    run_negative_load_sign_extend_test(
         LOADB,
         None,
-        None,
-        None,
-        Some([true, false, false]),
         Some([250, 132, 77, 5]),
         Some(47741),
         None,
-        VerificationError::ChallengePhaseError,
+        LoadSignExtPrankValues {
+            opcode_flags: Some([true, false, false]),
+            ..Default::default()
+        },
+        true,
     );
 }
 
@@ -294,119 +280,51 @@ fn loadstore_negative_tests() {
 ///
 /// Ensure that solve functions produce the correct results.
 ///////////////////////////////////////////////////////////////////////////////////////
-#[test]
-fn execute_roundtrip_sanity_test() {
-    let mut rng = create_seeded_rng();
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let adapter = Rv32LoadStoreAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        range_checker_chip.clone(),
-    );
-    let core = LoadSignExtendCoreChip::new(range_checker_chip);
-    let mut chip =
-        Rv32LoadSignExtendChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
-
-    let num_tests: usize = 10;
-    for _ in 0..num_tests {
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADB,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADH,
-            None,
-            None,
-            None,
-            None,
-        );
-    }
-}
 
 #[test]
 fn solve_loadh_extend_sign_sanity_test() {
-    let read_data = [34, 159, 237, 151].map(F::from_canonical_u32);
-    let prev_data = [94, 183, 56, 241].map(F::from_canonical_u32);
-    let write_data0 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADH, read_data, prev_data, 0,
-    );
-    let write_data2 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADH, read_data, prev_data, 2,
-    );
+    let read_data = [34, 159, 237, 151];
+    let write_data0 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADH, read_data, 0);
+    let write_data2 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADH, read_data, 2);
 
-    assert_eq!(write_data0, [34, 159, 255, 255].map(F::from_canonical_u32));
-    assert_eq!(write_data2, [237, 151, 255, 255].map(F::from_canonical_u32));
+    assert_eq!(write_data0, [34, 159, 255, 255]);
+    assert_eq!(write_data2, [237, 151, 255, 255]);
 }
 
 #[test]
 fn solve_loadh_extend_zero_sanity_test() {
-    let read_data = [34, 121, 237, 97].map(F::from_canonical_u32);
-    let prev_data = [94, 183, 56, 241].map(F::from_canonical_u32);
-    let write_data0 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADH, read_data, prev_data, 0,
-    );
-    let write_data2 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADH, read_data, prev_data, 2,
-    );
+    let read_data = [34, 121, 237, 97];
+    let write_data0 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADH, read_data, 0);
+    let write_data2 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADH, read_data, 2);
 
-    assert_eq!(write_data0, [34, 121, 0, 0].map(F::from_canonical_u32));
-    assert_eq!(write_data2, [237, 97, 0, 0].map(F::from_canonical_u32));
+    assert_eq!(write_data0, [34, 121, 0, 0]);
+    assert_eq!(write_data2, [237, 97, 0, 0]);
 }
 
 #[test]
 fn solve_loadb_extend_sign_sanity_test() {
-    let read_data = [45, 82, 99, 127].map(F::from_canonical_u32);
-    let prev_data = [53, 180, 29, 244].map(F::from_canonical_u32);
-    let write_data0 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADB, read_data, prev_data, 0,
-    );
-    let write_data1 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADB, read_data, prev_data, 1,
-    );
-    let write_data2 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADB, read_data, prev_data, 2,
-    );
-    let write_data3 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADB, read_data, prev_data, 3,
-    );
-
-    assert_eq!(write_data0, [45, 0, 0, 0].map(F::from_canonical_u32));
-    assert_eq!(write_data1, [82, 0, 0, 0].map(F::from_canonical_u32));
-    assert_eq!(write_data2, [99, 0, 0, 0].map(F::from_canonical_u32));
-    assert_eq!(write_data3, [127, 0, 0, 0].map(F::from_canonical_u32));
+    let read_data = [45, 82, 99, 127];
+    let write_data0 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADB, read_data, 0);
+    let write_data1 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADB, read_data, 1);
+    let write_data2 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADB, read_data, 2);
+    let write_data3 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADB, read_data, 3);
+
+    assert_eq!(write_data0, [45, 0, 0, 0]);
+    assert_eq!(write_data1, [82, 0, 0, 0]);
+    assert_eq!(write_data2, [99, 0, 0, 0]);
+    assert_eq!(write_data3, [127, 0, 0, 0]);
 }
 
 #[test]
 fn solve_loadb_extend_zero_sanity_test() {
-    let read_data = [173, 210, 227, 255].map(F::from_canonical_u32);
-    let prev_data = [53, 180, 29, 244].map(F::from_canonical_u32);
-    let write_data0 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADB, read_data, prev_data, 0,
-    );
-    let write_data1 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADB, read_data, prev_data, 1,
-    );
-    let write_data2 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADB, read_data, prev_data, 2,
-    );
-    let write_data3 = run_write_data_sign_extend::<_, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-        LOADB, read_data, prev_data, 3,
-    );
-
-    assert_eq!(write_data0, [173, 255, 255, 255].map(F::from_canonical_u32));
-    assert_eq!(write_data1, [210, 255, 255, 255].map(F::from_canonical_u32));
-    assert_eq!(write_data2, [227, 255, 255, 255].map(F::from_canonical_u32));
-    assert_eq!(write_data3, [255, 255, 255, 255].map(F::from_canonical_u32));
+    let read_data = [173, 210, 227, 255];
+    let write_data0 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADB, read_data, 0);
+    let write_data1 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADB, read_data, 1);
+    let write_data2 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADB, read_data, 2);
+    let write_data3 = run_write_data_sign_extend::<RV32_REGISTER_NUM_LIMBS>(LOADB, read_data, 3);
+
+    assert_eq!(write_data0, [173, 255, 255, 255]);
+    assert_eq!(write_data1, [210, 255, 255, 255]);
+    assert_eq!(write_data2, [227, 255, 255, 255]);
+    assert_eq!(write_data3, [255, 255, 255, 255]);
 }
diff --git a/extensions/rv32im/circuit/src/loadstore/core.rs b/extensions/rv32im/circuit/src/loadstore/core.rs
index 36beb10629..b0f578dd82 100644
--- a/extensions/rv32im/circuit/src/loadstore/core.rs
+++ b/extensions/rv32im/circuit/src/loadstore/core.rs
@@ -1,10 +1,17 @@
-use std::borrow::{Borrow, BorrowMut};
+use std::{
+    array,
+    borrow::{Borrow, BorrowMut},
+    fmt::Debug,
+};
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, Result, VmAdapterInterface, VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
+};
+use openvm_circuit_primitives::{AlignedBorrow, AlignedBytesBorrow};
+use openvm_instructions::{
+    instruction::Instruction, program::DEFAULT_PC_STEP, riscv::RV32_REGISTER_NUM_LIMBS, LocalOpcode,
 };
-use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
 use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *};
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -12,10 +19,8 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use serde_big_array::BigArray;
 
-use crate::adapters::LoadStoreInstruction;
+use crate::adapters::{LoadStoreInstruction, Rv32LoadStoreAdapterFiller};
 
 #[derive(Debug, Clone, Copy)]
 enum InstructionOpcode {
@@ -56,21 +61,7 @@ pub struct LoadStoreCoreCols<T, const NUM_CELLS: usize> {
     pub write_data: [T; NUM_CELLS],
 }
 
-#[repr(C)]
-#[derive(Debug, Clone, Serialize, Deserialize)]
-#[serde(bound = "F: Serialize + DeserializeOwned")]
-pub struct LoadStoreCoreRecord<F, const NUM_CELLS: usize> {
-    pub opcode: Rv32LoadStoreOpcode,
-    pub shift: u32,
-    #[serde(with = "BigArray")]
-    pub read_data: [F; NUM_CELLS],
-    #[serde(with = "BigArray")]
-    pub prev_data: [F; NUM_CELLS],
-    #[serde(with = "BigArray")]
-    pub write_data: [F; NUM_CELLS],
-}
-
-#[derive(Debug, Clone)]
+#[derive(Debug, Clone, derive_new::new)]
 pub struct LoadStoreCoreAir<const NUM_CELLS: usize> {
     pub offset: usize,
 }
@@ -246,70 +237,115 @@ where
     }
 }
 
-#[derive(Debug)]
-pub struct LoadStoreCoreChip<const NUM_CELLS: usize> {
-    pub air: LoadStoreCoreAir<NUM_CELLS>,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct LoadStoreCoreRecord<const NUM_CELLS: usize> {
+    pub local_opcode: u8,
+    pub shift_amount: u8,
+    pub read_data: [u8; NUM_CELLS],
+    // Note: `prev_data` can be from native address space, so we need to use u32
+    pub prev_data: [u32; NUM_CELLS],
 }
 
-impl<const NUM_CELLS: usize> LoadStoreCoreChip<NUM_CELLS> {
-    pub fn new(offset: usize) -> Self {
-        Self {
-            air: LoadStoreCoreAir { offset },
-        }
-    }
+#[derive(Clone, Copy, derive_new::new)]
+pub struct LoadStoreExecutor<A, const NUM_CELLS: usize> {
+    adapter: A,
+    pub offset: usize,
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_CELLS: usize> VmCoreChip<F, I>
-    for LoadStoreCoreChip<NUM_CELLS>
+#[derive(Clone, derive_new::new)]
+pub struct LoadStoreFiller<
+    A = Rv32LoadStoreAdapterFiller,
+    const NUM_CELLS: usize = RV32_REGISTER_NUM_LIMBS,
+> {
+    adapter: A,
+    pub offset: usize,
+}
+
+impl<F, A, RA, const NUM_CELLS: usize> PreflightExecutor<F, RA> for LoadStoreExecutor<A, NUM_CELLS>
 where
-    I::Reads: Into<([[F; NUM_CELLS]; 2], F)>,
-    I::Writes: From<[[F; NUM_CELLS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData = (([u32; NUM_CELLS], [u8; NUM_CELLS]), u8),
+            WriteData = [u32; NUM_CELLS],
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (A::RecordMut<'buf>, &'buf mut LoadStoreCoreRecord<NUM_CELLS>),
+    >,
 {
-    type Record = LoadStoreCoreRecord<F, NUM_CELLS>;
-    type Air = LoadStoreCoreAir<NUM_CELLS>;
-
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
-        &self,
-        instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
-        let local_opcode =
-            Rv32LoadStoreOpcode::from_usize(instruction.opcode.local_opcode_idx(self.air.offset));
-
-        let (reads, shift_amount) = reads.into();
-        let shift = shift_amount.as_canonical_u32();
-        let prev_data = reads[0];
-        let read_data = reads[1];
-        let write_data = run_write_data(local_opcode, read_data, prev_data, shift);
-        let output = AdapterRuntimeContext::without_pc([write_data]);
-
-        Ok((
-            output,
-            LoadStoreCoreRecord {
-                opcode: local_opcode,
-                shift,
-                prev_data,
-                read_data,
-                write_data,
-            },
-        ))
-    }
-
     fn get_opcode_name(&self, opcode: usize) -> String {
         format!(
             "{:?}",
-            Rv32LoadStoreOpcode::from_usize(opcode - self.air.offset)
+            Rv32LoadStoreOpcode::from_usize(opcode - self.offset)
         )
     }
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let core_cols: &mut LoadStoreCoreCols<F, NUM_CELLS> = row_slice.borrow_mut();
-        let opcode = record.opcode;
-        let flags = &mut core_cols.flags;
+    fn execute(
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
+        instruction: &Instruction<F>,
+    ) -> Result<(), ExecutionError> {
+        let Instruction { opcode, .. } = instruction;
+
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        (
+            (core_record.prev_data, core_record.read_data),
+            core_record.shift_amount,
+        ) = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record);
+
+        let local_opcode = Rv32LoadStoreOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        core_record.local_opcode = local_opcode as u8;
+
+        let write_data = run_write_data(
+            local_opcode,
+            core_record.read_data,
+            core_record.prev_data,
+            core_record.shift_amount as usize,
+        );
+        self.adapter
+            .write(state.memory, instruction, write_data, &mut adapter_record);
+
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
+    }
+}
+
+impl<F, A, const NUM_CELLS: usize> TraceFiller<F> for LoadStoreFiller<A, NUM_CELLS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: &LoadStoreCoreRecord<NUM_CELLS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut LoadStoreCoreCols<F, NUM_CELLS> = core_row.borrow_mut();
+
+        let opcode = Rv32LoadStoreOpcode::from_usize(record.local_opcode as usize);
+        let shift = record.shift_amount;
+
+        let write_data = run_write_data(opcode, record.read_data, record.prev_data, shift as usize);
+        // Writing in reverse order
+        core_row.write_data = write_data.map(F::from_canonical_u32);
+        core_row.prev_data = record.prev_data.map(F::from_canonical_u32);
+        core_row.read_data = record.read_data.map(F::from_canonical_u8);
+        core_row.is_load = F::from_bool([LOADW, LOADHU, LOADBU].contains(&opcode));
+        core_row.is_valid = F::ONE;
+        let flags = &mut core_row.flags;
         *flags = [F::ZERO; 4];
-        match (opcode, record.shift) {
+        match (opcode, shift) {
             (LOADW, 0) => flags[0] = F::TWO,
             (LOADHU, 0) => flags[1] = F::TWO,
             (LOADHU, 2) => flags[2] = F::TWO,
@@ -328,51 +364,49 @@ where
             (STOREB, 3) => (flags[2], flags[3]) = (F::ONE, F::ONE),
             _ => unreachable!(),
         };
-        core_cols.prev_data = record.prev_data;
-        core_cols.read_data = record.read_data;
-        core_cols.is_valid = F::ONE;
-        core_cols.is_load = F::from_bool([LOADW, LOADHU, LOADBU].contains(&opcode));
-        core_cols.write_data = record.write_data;
-    }
-
-    fn air(&self) -> &Self::Air {
-        &self.air
     }
 }
 
-pub(super) fn run_write_data<F: PrimeField32, const NUM_CELLS: usize>(
+// Returns the write data
+#[inline(always)]
+pub(super) fn run_write_data<const NUM_CELLS: usize>(
     opcode: Rv32LoadStoreOpcode,
-    read_data: [F; NUM_CELLS],
-    prev_data: [F; NUM_CELLS],
-    shift: u32,
-) -> [F; NUM_CELLS] {
-    let shift = shift as usize;
-    let mut write_data = read_data;
+    read_data: [u8; NUM_CELLS],
+    prev_data: [u32; NUM_CELLS],
+    shift: usize,
+) -> [u32; NUM_CELLS] {
     match (opcode, shift) {
-        (LOADW, 0) => (),
+        (LOADW, 0) => {
+            read_data.map(|x| x as u32)
+        },
         (LOADBU, 0) | (LOADBU, 1) | (LOADBU, 2) | (LOADBU, 3) => {
-            for cell in write_data.iter_mut().take(NUM_CELLS).skip(1) {
-                *cell = F::ZERO;
-            }
-            write_data[0] = read_data[shift];
+           let mut wrie_data = [0; NUM_CELLS];
+           wrie_data[0] = read_data[shift] as u32;
+           wrie_data
         }
         (LOADHU, 0) | (LOADHU, 2) => {
-            for cell in write_data.iter_mut().take(NUM_CELLS).skip(NUM_CELLS / 2) {
-                *cell = F::ZERO;
-            }
+            let mut write_data = [0; NUM_CELLS];
             for (i, cell) in write_data.iter_mut().take(NUM_CELLS / 2).enumerate() {
-                *cell = read_data[i + shift];
+                *cell = read_data[i + shift] as u32;
             }
+            write_data
         }
-        (STOREW, 0) => (),
+        (STOREW, 0) => {
+            read_data.map(|x| x as u32)
+        },
         (STOREB, 0) | (STOREB, 1) | (STOREB, 2) | (STOREB, 3) => {
-            write_data = prev_data;
-            write_data[shift] = read_data[0];
+            let mut write_data = prev_data;
+            write_data[shift] = read_data[0] as u32;
+            write_data
         }
         (STOREH, 0) | (STOREH, 2) => {
-            write_data = prev_data;
-            write_data[shift..(NUM_CELLS / 2 + shift)]
-                .copy_from_slice(&read_data[..(NUM_CELLS / 2)]);
+            array::from_fn(|i| {
+                if i >= shift && i < (NUM_CELLS / 2 + shift){
+                    read_data[i - shift] as u32
+                } else {
+                    prev_data[i]
+                }
+            })
         }
         // Currently the adapter AIR requires `ptr_val` to be aligned to the data size in bytes.
         // The circuit requires that `shift = ptr_val % 4` so that `ptr_val - shift` is a multiple of 4.
@@ -380,6 +414,5 @@ pub(super) fn run_write_data<F: PrimeField32, const NUM_CELLS: usize>(
         _ => unreachable!(
             "unaligned memory access not supported by this execution environment: {opcode:?}, shift: {shift}"
         ),
-    };
-    write_data
+    }
 }
diff --git a/extensions/rv32im/circuit/src/loadstore/execution.rs b/extensions/rv32im/circuit/src/loadstore/execution.rs
new file mode 100644
index 0000000000..4d718c579e
--- /dev/null
+++ b/extensions/rv32im/circuit/src/loadstore/execution.rs
@@ -0,0 +1,416 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    fmt::Debug,
+};
+
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::GuestMemory, POINTER_MAX_BITS},
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_IMM_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode, NATIVE_AS,
+};
+use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *};
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::core::LoadStoreExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct LoadStorePreCompute {
+    imm_extended: u32,
+    a: u8,
+    b: u8,
+    e: u8,
+}
+
+impl<A, const NUM_CELLS: usize> LoadStoreExecutor<A, NUM_CELLS> {
+    /// Return (local_opcode, enabled, is_native_store)
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut LoadStorePreCompute,
+    ) -> Result<(Rv32LoadStoreOpcode, bool, bool), StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            f,
+            g,
+            ..
+        } = inst;
+        let enabled = !f.is_zero();
+
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 == RV32_IMM_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        let local_opcode = Rv32LoadStoreOpcode::from_usize(
+            opcode.local_opcode_idx(Rv32LoadStoreOpcode::CLASS_OFFSET),
+        );
+        match local_opcode {
+            LOADW | LOADBU | LOADHU => {}
+            STOREW | STOREH | STOREB => {
+                if !enabled {
+                    return Err(StaticProgramError::InvalidInstruction(pc));
+                }
+            }
+            _ => unreachable!("LoadStoreExecutor should not handle LOADB/LOADH opcodes"),
+        }
+
+        let imm = c.as_canonical_u32();
+        let imm_sign = g.as_canonical_u32();
+        let imm_extended = imm + imm_sign * 0xffff0000;
+        let is_native_store = e_u32 == NATIVE_AS;
+
+        *data = LoadStorePreCompute {
+            imm_extended,
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            e: e_u32 as u8,
+        };
+        Ok((local_opcode, enabled, is_native_store))
+    }
+}
+
+impl<F, A, const NUM_CELLS: usize> Executor<F> for LoadStoreExecutor<A, NUM_CELLS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<LoadStorePreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut LoadStorePreCompute = data.borrow_mut();
+        let (local_opcode, enabled, is_native_store) =
+            self.pre_compute_impl(pc, inst, pre_compute)?;
+        let fn_ptr = match (local_opcode, enabled, is_native_store) {
+            (LOADW, true, _) => execute_e1_impl::<_, _, U8, LoadWOp, true>,
+            (LOADW, false, _) => execute_e1_impl::<_, _, U8, LoadWOp, false>,
+            (LOADHU, true, _) => execute_e1_impl::<_, _, U8, LoadHUOp, true>,
+            (LOADHU, false, _) => execute_e1_impl::<_, _, U8, LoadHUOp, false>,
+            (LOADBU, true, _) => execute_e1_impl::<_, _, U8, LoadBUOp, true>,
+            (LOADBU, false, _) => execute_e1_impl::<_, _, U8, LoadBUOp, false>,
+            (STOREW, true, false) => execute_e1_impl::<_, _, U8, StoreWOp, true>,
+            (STOREW, false, false) => execute_e1_impl::<_, _, U8, StoreWOp, false>,
+            (STOREW, true, true) => execute_e1_impl::<_, _, F, StoreWOp, true>,
+            (STOREW, false, true) => execute_e1_impl::<_, _, F, StoreWOp, false>,
+            (STOREH, true, false) => execute_e1_impl::<_, _, U8, StoreHOp, true>,
+            (STOREH, false, false) => execute_e1_impl::<_, _, U8, StoreHOp, false>,
+            (STOREH, true, true) => execute_e1_impl::<_, _, F, StoreHOp, true>,
+            (STOREH, false, true) => execute_e1_impl::<_, _, F, StoreHOp, false>,
+            (STOREB, true, false) => execute_e1_impl::<_, _, U8, StoreBOp, true>,
+            (STOREB, false, false) => execute_e1_impl::<_, _, U8, StoreBOp, false>,
+            (STOREB, true, true) => execute_e1_impl::<_, _, F, StoreBOp, true>,
+            (STOREB, false, true) => execute_e1_impl::<_, _, F, StoreBOp, false>,
+            (_, _, _) => unreachable!(),
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const NUM_CELLS: usize> MeteredExecutor<F> for LoadStoreExecutor<A, NUM_CELLS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<LoadStorePreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<LoadStorePreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+        let (local_opcode, enabled, is_native_store) =
+            self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+        let fn_ptr = match (local_opcode, enabled, is_native_store) {
+            (LOADW, true, _) => execute_e2_impl::<_, _, U8, LoadWOp, true>,
+            (LOADW, false, _) => execute_e2_impl::<_, _, U8, LoadWOp, false>,
+            (LOADHU, true, _) => execute_e2_impl::<_, _, U8, LoadHUOp, true>,
+            (LOADHU, false, _) => execute_e2_impl::<_, _, U8, LoadHUOp, false>,
+            (LOADBU, true, _) => execute_e2_impl::<_, _, U8, LoadBUOp, true>,
+            (LOADBU, false, _) => execute_e2_impl::<_, _, U8, LoadBUOp, false>,
+            (STOREW, true, false) => execute_e2_impl::<_, _, U8, StoreWOp, true>,
+            (STOREW, false, false) => execute_e2_impl::<_, _, U8, StoreWOp, false>,
+            (STOREW, true, true) => execute_e2_impl::<_, _, F, StoreWOp, true>,
+            (STOREW, false, true) => execute_e2_impl::<_, _, F, StoreWOp, false>,
+            (STOREH, true, false) => execute_e2_impl::<_, _, U8, StoreHOp, true>,
+            (STOREH, false, false) => execute_e2_impl::<_, _, U8, StoreHOp, false>,
+            (STOREH, true, true) => execute_e2_impl::<_, _, F, StoreHOp, true>,
+            (STOREH, false, true) => execute_e2_impl::<_, _, F, StoreHOp, false>,
+            (STOREB, true, false) => execute_e2_impl::<_, _, U8, StoreBOp, true>,
+            (STOREB, false, false) => execute_e2_impl::<_, _, U8, StoreBOp, false>,
+            (STOREB, true, true) => execute_e2_impl::<_, _, F, StoreBOp, true>,
+            (STOREB, false, true) => execute_e2_impl::<_, _, F, StoreBOp, false>,
+            (_, _, _) => unreachable!(),
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    T: Copy + Debug + Default,
+    OP: LoadStoreOp<T>,
+    const ENABLED: bool,
+>(
+    pre_compute: &LoadStorePreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1_bytes: [u8; RV32_REGISTER_NUM_LIMBS] =
+        vm_state.vm_read(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs1_val = u32::from_le_bytes(rs1_bytes);
+    let ptr_val = rs1_val.wrapping_add(pre_compute.imm_extended);
+    // sign_extend([r32{c,g}(b):2]_e)`
+    debug_assert!(ptr_val < (1 << POINTER_MAX_BITS));
+    let shift_amount = ptr_val % 4;
+    let ptr_val = ptr_val - shift_amount; // aligned ptr
+
+    let read_data: [u8; RV32_REGISTER_NUM_LIMBS] = if OP::IS_LOAD {
+        vm_state.vm_read(pre_compute.e as u32, ptr_val)
+    } else {
+        vm_state.vm_read(RV32_REGISTER_AS, pre_compute.a as u32)
+    };
+
+    // We need to write 4 u32s for STORE.
+    let mut write_data: [T; RV32_REGISTER_NUM_LIMBS] = if OP::HOST_READ {
+        vm_state.host_read(pre_compute.e as u32, ptr_val)
+    } else {
+        [T::default(); RV32_REGISTER_NUM_LIMBS]
+    };
+
+    if !OP::compute_write_data(&mut write_data, read_data, shift_amount as usize) {
+        vm_state.exit_code = Err(ExecutionError::Fail {
+            pc: vm_state.pc,
+            msg: "Invalid LoadStoreOp",
+        });
+        return;
+    }
+
+    if ENABLED {
+        if OP::IS_LOAD {
+            vm_state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &write_data);
+        } else {
+            vm_state.vm_write(pre_compute.e as u32, ptr_val, &write_data);
+        }
+    }
+
+    vm_state.pc += DEFAULT_PC_STEP;
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    T: Copy + Debug + Default,
+    OP: LoadStoreOp<T>,
+    const ENABLED: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &LoadStorePreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, T, OP, ENABLED>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    T: Copy + Debug + Default,
+    OP: LoadStoreOp<T>,
+    const ENABLED: bool,
+>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<LoadStorePreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, T, OP, ENABLED>(&pre_compute.data, vm_state);
+}
+
+trait LoadStoreOp<T> {
+    const IS_LOAD: bool;
+    const HOST_READ: bool;
+
+    /// Return if the operation is valid.
+    fn compute_write_data(
+        write_data: &mut [T; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        shift_amount: usize,
+    ) -> bool;
+}
+/// Wrapper type for u8 so we can implement `LoadStoreOp<F>` for `F: PrimeField32`.
+/// For memory read/write, this type behaves as same as `u8`.
+#[allow(dead_code)]
+#[derive(Copy, Clone, Debug, Default)]
+struct U8(u8);
+struct LoadWOp;
+struct LoadHUOp;
+struct LoadBUOp;
+struct StoreWOp;
+struct StoreHOp;
+struct StoreBOp;
+impl LoadStoreOp<U8> for LoadWOp {
+    const IS_LOAD: bool = true;
+    const HOST_READ: bool = false;
+
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [U8; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        _shift_amount: usize,
+    ) -> bool {
+        *write_data = read_data.map(U8);
+        true
+    }
+}
+
+impl LoadStoreOp<U8> for LoadHUOp {
+    const IS_LOAD: bool = true;
+    const HOST_READ: bool = false;
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [U8; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        shift_amount: usize,
+    ) -> bool {
+        if shift_amount != 0 && shift_amount != 2 {
+            return false;
+        }
+        write_data[0] = U8(read_data[shift_amount]);
+        write_data[1] = U8(read_data[shift_amount + 1]);
+        true
+    }
+}
+impl LoadStoreOp<U8> for LoadBUOp {
+    const IS_LOAD: bool = true;
+    const HOST_READ: bool = false;
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [U8; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        shift_amount: usize,
+    ) -> bool {
+        write_data[0] = U8(read_data[shift_amount]);
+        true
+    }
+}
+
+impl LoadStoreOp<U8> for StoreWOp {
+    const IS_LOAD: bool = false;
+    const HOST_READ: bool = false;
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [U8; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        _shift_amount: usize,
+    ) -> bool {
+        *write_data = read_data.map(U8);
+        true
+    }
+}
+impl LoadStoreOp<U8> for StoreHOp {
+    const IS_LOAD: bool = false;
+    const HOST_READ: bool = true;
+
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [U8; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        shift_amount: usize,
+    ) -> bool {
+        if shift_amount != 0 && shift_amount != 2 {
+            return false;
+        }
+        write_data[shift_amount] = U8(read_data[0]);
+        write_data[shift_amount + 1] = U8(read_data[1]);
+        true
+    }
+}
+impl LoadStoreOp<U8> for StoreBOp {
+    const IS_LOAD: bool = false;
+    const HOST_READ: bool = true;
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [U8; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        shift_amount: usize,
+    ) -> bool {
+        write_data[shift_amount] = U8(read_data[0]);
+        true
+    }
+}
+
+impl<F: PrimeField32> LoadStoreOp<F> for StoreWOp {
+    const IS_LOAD: bool = false;
+    const HOST_READ: bool = false;
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [F; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        _shift_amount: usize,
+    ) -> bool {
+        *write_data = read_data.map(F::from_canonical_u8);
+        true
+    }
+}
+impl<F: PrimeField32> LoadStoreOp<F> for StoreHOp {
+    const IS_LOAD: bool = false;
+    const HOST_READ: bool = true;
+
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [F; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        shift_amount: usize,
+    ) -> bool {
+        if shift_amount != 0 && shift_amount != 2 {
+            return false;
+        }
+        write_data[shift_amount] = F::from_canonical_u8(read_data[0]);
+        write_data[shift_amount + 1] = F::from_canonical_u8(read_data[1]);
+        true
+    }
+}
+impl<F: PrimeField32> LoadStoreOp<F> for StoreBOp {
+    const IS_LOAD: bool = false;
+    const HOST_READ: bool = true;
+    #[inline(always)]
+    fn compute_write_data(
+        write_data: &mut [F; RV32_REGISTER_NUM_LIMBS],
+        read_data: [u8; RV32_REGISTER_NUM_LIMBS],
+        shift_amount: usize,
+    ) -> bool {
+        write_data[shift_amount] = F::from_canonical_u8(read_data[0]);
+        true
+    }
+}
diff --git a/extensions/rv32im/circuit/src/loadstore/mod.rs b/extensions/rv32im/circuit/src/loadstore/mod.rs
index 825f82166c..4d5e35df58 100644
--- a/extensions/rv32im/circuit/src/loadstore/mod.rs
+++ b/extensions/rv32im/circuit/src/loadstore/mod.rs
@@ -2,12 +2,17 @@ mod core;
 
 pub use core::*;
 
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::{Rv32LoadStoreAdapterChip, RV32_REGISTER_NUM_LIMBS};
+use super::adapters::RV32_REGISTER_NUM_LIMBS;
+use crate::adapters::{Rv32LoadStoreAdapterAir, Rv32LoadStoreAdapterExecutor};
 
+mod execution;
 #[cfg(test)]
 mod tests;
 
-pub type Rv32LoadStoreChip<F> =
-    VmChipWrapper<F, Rv32LoadStoreAdapterChip<F>, LoadStoreCoreChip<RV32_REGISTER_NUM_LIMBS>>;
+pub type Rv32LoadStoreAir =
+    VmAirWrapper<Rv32LoadStoreAdapterAir, LoadStoreCoreAir<RV32_REGISTER_NUM_LIMBS>>;
+pub type Rv32LoadStoreExecutor =
+    LoadStoreExecutor<Rv32LoadStoreAdapterExecutor, RV32_REGISTER_NUM_LIMBS>;
+pub type Rv32LoadStoreChip<F> = VmChipWrapper<F, LoadStoreFiller>;
diff --git a/extensions/rv32im/circuit/src/loadstore/tests.rs b/extensions/rv32im/circuit/src/loadstore/tests.rs
index 0fbfa137b9..00b2dcc46f 100644
--- a/extensions/rv32im/circuit/src/loadstore/tests.rs
+++ b/extensions/rv32im/circuit/src/loadstore/tests.rs
@@ -2,50 +2,83 @@ use std::{array, borrow::BorrowMut};
 
 use openvm_circuit::{
     arch::{
-        testing::{memory::gen_pointer, VmChipTestBuilder},
-        VmAdapterChip,
+        testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder},
+        MemoryConfig,
     },
-    utils::u32_into_limbs,
+    system::memory::merkle::public_values::PUBLIC_VALUES_AS,
 };
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, riscv::RV32_REGISTER_AS, LocalOpcode};
 use openvm_rv32im_transpiler::Rv32LoadStoreOpcode::{self, *};
 use openvm_stark_backend::{
     p3_air::BaseAir,
-    p3_field::FieldAlgebra,
+    p3_field::{FieldAlgebra, PrimeField32},
     p3_matrix::{
         dense::{DenseMatrix, RowMajorMatrix},
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
 };
-use openvm_stark_sdk::{config::setup_tracing, p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, seq::SliceRandom, Rng};
+use test_case::test_case;
 
-use super::{run_write_data, LoadStoreCoreChip, Rv32LoadStoreChip};
+use super::{run_write_data, LoadStoreCoreAir, Rv32LoadStoreChip};
 use crate::{
-    adapters::{compose, Rv32LoadStoreAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
+    adapters::{
+        Rv32LoadStoreAdapterAir, Rv32LoadStoreAdapterCols, Rv32LoadStoreAdapterExecutor,
+        Rv32LoadStoreAdapterFiller, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+    },
     loadstore::LoadStoreCoreCols,
+    test_utils::get_verification_error,
+    LoadStoreFiller, Rv32LoadStoreAir, Rv32LoadStoreExecutor,
 };
 
 const IMM_BITS: usize = 16;
+const MAX_INS_CAPACITY: usize = 128;
 
 type F = BabyBear;
+type Harness = TestChipHarness<F, Rv32LoadStoreExecutor, Rv32LoadStoreAir, Rv32LoadStoreChip<F>>;
+
+fn create_test_chip(tester: &mut VmChipTestBuilder<F>) -> Harness {
+    let range_checker_chip = tester.range_checker();
+
+    let air = Rv32LoadStoreAir::new(
+        Rv32LoadStoreAdapterAir::new(
+            tester.memory_bridge(),
+            tester.execution_bridge(),
+            range_checker_chip.bus(),
+            tester.address_bits(),
+        ),
+        LoadStoreCoreAir::new(Rv32LoadStoreOpcode::CLASS_OFFSET),
+    );
+    let executor = Rv32LoadStoreExecutor::new(
+        Rv32LoadStoreAdapterExecutor::new(tester.address_bits()),
+        Rv32LoadStoreOpcode::CLASS_OFFSET,
+    );
+    let chip = Rv32LoadStoreChip::<F>::new(
+        LoadStoreFiller::new(
+            Rv32LoadStoreAdapterFiller::new(tester.address_bits(), range_checker_chip.clone()),
+            Rv32LoadStoreOpcode::CLASS_OFFSET,
+        ),
+        tester.memory_helper(),
+    );
+    Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY)
+}
 
 #[allow(clippy::too_many_arguments)]
 fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut Rv32LoadStoreChip<F>,
+    harness: &mut Harness,
     rng: &mut StdRng,
     opcode: Rv32LoadStoreOpcode,
-    rs1: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    rs1: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
     imm: Option<u32>,
     imm_sign: Option<u32>,
     mem_as: Option<usize>,
 ) {
     let imm = imm.unwrap_or(rng.gen_range(0..(1 << IMM_BITS)));
     let imm_sign = imm_sign.unwrap_or(rng.gen_range(0..2));
-    let imm_ext = imm + imm_sign * (0xffffffff ^ ((1 << IMM_BITS) - 1));
+    let imm_ext = imm + imm_sign * 0xffff0000;
 
     let alignment = match opcode {
         LOADW | STOREW => 2,
@@ -54,33 +87,21 @@ fn set_and_execute(
         _ => unreachable!(),
     };
 
-    let ptr_val = rng.gen_range(
-        0..(1
-            << (tester
-                .memory_controller()
-                .borrow()
-                .mem_config()
-                .pointer_max_bits
-                - alignment)),
-    ) << alignment;
-
-    let rs1 = rs1
-        .unwrap_or(u32_into_limbs::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(
-            (ptr_val as u32).wrapping_sub(imm_ext),
-        ))
-        .map(F::from_canonical_u32);
+    let ptr_val: u32 = rng.gen_range(0..(1 << (tester.address_bits() - alignment))) << alignment;
+    let rs1 = rs1.unwrap_or(ptr_val.wrapping_sub(imm_ext).to_le_bytes());
+    let ptr_val = imm_ext.wrapping_add(u32::from_le_bytes(rs1));
     let a = gen_pointer(rng, 4);
     let b = gen_pointer(rng, 4);
+
     let is_load = [LOADW, LOADHU, LOADBU].contains(&opcode);
     let mem_as = mem_as.unwrap_or(if is_load {
-        *[1, 2].choose(rng).unwrap()
+        2
     } else {
         *[2, 3, 4].choose(rng).unwrap()
     });
 
-    let ptr_val = imm_ext.wrapping_add(compose(rs1));
     let shift_amount = ptr_val % 4;
-    tester.write(1, b, rs1);
+    tester.write(1, b, rs1.map(F::from_canonical_u8));
 
     let mut some_prev_data: [F; RV32_REGISTER_NUM_LIMBS] =
         array::from_fn(|_| F::from_canonical_u32(rng.gen_range(0..(1 << RV32_CELL_BITS))));
@@ -92,11 +113,11 @@ fn set_and_execute(
             some_prev_data = [F::ZERO; RV32_REGISTER_NUM_LIMBS];
         }
         tester.write(1, a, some_prev_data);
-        if mem_as == 1 && ptr_val - shift_amount == 0 {
-            read_data = [F::ZERO; RV32_REGISTER_NUM_LIMBS];
-        }
         tester.write(mem_as, (ptr_val - shift_amount) as usize, read_data);
     } else {
+        if mem_as == 4 {
+            some_prev_data = array::from_fn(|_| rng.gen());
+        }
         if a == 0 {
             read_data = [F::ZERO; RV32_REGISTER_NUM_LIMBS];
         }
@@ -107,7 +128,7 @@ fn set_and_execute(
     let enabled_write = !(is_load & (a == 0));
 
     tester.execute(
-        chip,
+        harness,
         &Instruction::from_usize(
             opcode.global_opcode(),
             [
@@ -122,7 +143,13 @@ fn set_and_execute(
         ),
     );
 
-    let write_data = run_write_data(opcode, read_data, some_prev_data, shift_amount);
+    let write_data = run_write_data(
+        opcode,
+        read_data.map(|x| x.as_canonical_u32() as u8),
+        some_prev_data.map(|x| x.as_canonical_u32()),
+        shift_amount as usize,
+    )
+    .map(F::from_canonical_u32);
     if is_load {
         if enabled_write {
             assert_eq!(write_data, tester.read::<4>(1, a));
@@ -143,80 +170,28 @@ fn set_and_execute(
 /// Randomly generate computations and execute, ensuring that the generated trace
 /// passes all constraints.
 ///////////////////////////////////////////////////////////////////////////////////////
-#[test]
-fn rand_loadstore_test() {
-    setup_tracing();
+#[test_case(LOADW, 100)]
+#[test_case(LOADBU, 100)]
+#[test_case(LOADHU, 100)]
+#[test_case(STOREW, 100)]
+#[test_case(STOREB, 100)]
+#[test_case(STOREH, 100)]
+fn rand_loadstore_test(opcode: Rv32LoadStoreOpcode, num_ops: usize) {
     let mut rng = create_seeded_rng();
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let adapter = Rv32LoadStoreAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        range_checker_chip.clone(),
-    );
-
-    let core = LoadStoreCoreChip::new(Rv32LoadStoreOpcode::CLASS_OFFSET);
-    let mut chip = Rv32LoadStoreChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
+    let mut mem_config = MemoryConfig::default();
+    mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29;
+    if [STOREW, STOREB, STOREH].contains(&opcode) {
+        mem_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = 1 << 29;
+    }
+    let mut tester = VmChipTestBuilder::volatile(mem_config);
+    let mut harness = create_test_chip(&mut tester);
 
-    let num_tests: usize = 100;
-    for _ in 0..num_tests {
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADW,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADBU,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADHU,
-            None,
-            None,
-            None,
-            None,
-        );
+    for _ in 0..num_ops {
         set_and_execute(
             &mut tester,
-            &mut chip,
+            &mut harness,
             &mut rng,
-            STOREW,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            STOREB,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            STOREH,
+            opcode,
             None,
             None,
             None,
@@ -224,8 +199,7 @@ fn rand_loadstore_test() {
         );
     }
 
-    drop(range_checker_chip);
-    let tester = tester.build().load(chip).finalize();
+    let tester = tester.build().load(harness).finalize();
     tester.simple_test().expect("Verification failed");
 }
 
@@ -233,79 +207,84 @@ fn rand_loadstore_test() {
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adaptor is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-#[allow(clippy::too_many_arguments)]
-fn run_negative_loadstore_test(
-    opcode: Rv32LoadStoreOpcode,
+#[derive(Clone, Copy, Default, PartialEq)]
+struct LoadStorePrankValues {
     read_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
     prev_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
     write_data: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
     flags: Option<[u32; 4]>,
     is_load: Option<bool>,
-    rs1: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    mem_as: Option<u32>,
+}
+
+#[allow(clippy::too_many_arguments)]
+fn run_negative_loadstore_test(
+    opcode: Rv32LoadStoreOpcode,
+    rs1: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
     imm: Option<u32>,
     imm_sign: Option<u32>,
-    mem_as: Option<usize>,
-    expected_error: VerificationError,
+    prank_vals: LoadStorePrankValues,
+    interaction_error: bool,
 ) {
     let mut rng = create_seeded_rng();
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let adapter = Rv32LoadStoreAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        range_checker_chip.clone(),
-    );
-
-    let core = LoadStoreCoreChip::new(Rv32LoadStoreOpcode::CLASS_OFFSET);
-    let adapter_width = BaseAir::<F>::width(adapter.air());
-    let mut chip = Rv32LoadStoreChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
+    let mut mem_config = MemoryConfig::default();
+    mem_config.addr_spaces[RV32_REGISTER_AS as usize].num_cells = 1 << 29;
+    if [STOREW, STOREB, STOREH].contains(&opcode) {
+        mem_config.addr_spaces[PUBLIC_VALUES_AS as usize].num_cells = 1 << 29;
+    }
+    let mut tester = VmChipTestBuilder::volatile(mem_config);
+    let mut harness = create_test_chip(&mut tester);
 
     set_and_execute(
         &mut tester,
-        &mut chip,
+        &mut harness,
         &mut rng,
         opcode,
         rs1,
         imm,
         imm_sign,
-        mem_as,
+        None,
     );
 
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
+
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut trace_row = trace.row_slice(0).to_vec();
-        let (_, core_row) = trace_row.split_at_mut(adapter_width);
+        let (adapter_row, core_row) = trace_row.split_at_mut(adapter_width);
+        let adapter_cols: &mut Rv32LoadStoreAdapterCols<F> = adapter_row.borrow_mut();
         let core_cols: &mut LoadStoreCoreCols<F, RV32_REGISTER_NUM_LIMBS> = core_row.borrow_mut();
-        if let Some(read_data) = read_data {
+
+        if let Some(read_data) = prank_vals.read_data {
             core_cols.read_data = read_data.map(F::from_canonical_u32);
         }
-        if let Some(prev_data) = prev_data {
+        if let Some(prev_data) = prank_vals.prev_data {
             core_cols.prev_data = prev_data.map(F::from_canonical_u32);
         }
-        if let Some(write_data) = write_data {
+        if let Some(write_data) = prank_vals.write_data {
             core_cols.write_data = write_data.map(F::from_canonical_u32);
         }
-        if let Some(flags) = flags {
+        if let Some(flags) = prank_vals.flags {
             core_cols.flags = flags.map(F::from_canonical_u32);
         }
-        if let Some(is_load) = is_load {
+        if let Some(is_load) = prank_vals.is_load {
             core_cols.is_load = F::from_bool(is_load);
         }
+        if let Some(mem_as) = prank_vals.mem_as {
+            adapter_cols.mem_as = F::from_canonical_u32(mem_as);
+        }
+
         *trace = RowMajorMatrix::new(trace_row, trace.width());
     };
 
-    drop(range_checker_chip);
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
+        .load_and_prank_trace(harness, modify_trace)
         .finalize();
-    tester.simple_test_with_expected_error(expected_error);
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
@@ -315,41 +294,36 @@ fn negative_wrong_opcode_tests() {
         None,
         None,
         None,
-        None,
-        Some(false),
-        None,
-        None,
-        None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        LoadStorePrankValues {
+            is_load: Some(false),
+            ..Default::default()
+        },
+        false,
     );
 
     run_negative_loadstore_test(
         LOADBU,
-        None,
-        None,
-        None,
-        Some([0, 0, 0, 2]),
-        None,
         Some([4, 0, 0, 0]),
         Some(1),
         None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        LoadStorePrankValues {
+            flags: Some([0, 0, 0, 2]),
+            ..Default::default()
+        },
+        false,
     );
 
     run_negative_loadstore_test(
         STOREH,
-        None,
-        None,
-        None,
-        Some([1, 0, 1, 0]),
-        Some(true),
         Some([11, 169, 76, 28]),
         Some(37121),
         None,
-        None,
-        VerificationError::OodEvaluationMismatch,
+        LoadStorePrankValues {
+            flags: Some([1, 0, 1, 0]),
+            is_load: Some(true),
+            ..Default::default()
+        },
+        false,
     );
 }
 
@@ -357,30 +331,34 @@ fn negative_wrong_opcode_tests() {
 fn negative_write_data_tests() {
     run_negative_loadstore_test(
         LOADHU,
-        Some([175, 33, 198, 250]),
-        Some([90, 121, 64, 205]),
-        Some([175, 33, 0, 0]),
-        Some([0, 2, 0, 0]),
-        Some(true),
         Some([13, 11, 156, 23]),
         Some(43641),
         None,
-        None,
-        VerificationError::ChallengePhaseError,
+        LoadStorePrankValues {
+            read_data: Some([175, 33, 198, 250]),
+            prev_data: Some([90, 121, 64, 205]),
+            write_data: Some([175, 33, 0, 0]),
+            flags: Some([0, 2, 0, 0]),
+            is_load: Some(true),
+            mem_as: None,
+        },
+        true,
     );
 
     run_negative_loadstore_test(
         STOREB,
-        Some([175, 33, 198, 250]),
-        Some([90, 121, 64, 205]),
-        Some([175, 121, 64, 205]),
-        Some([0, 0, 1, 1]),
-        None,
         Some([45, 123, 87, 24]),
         Some(28122),
         Some(0),
-        None,
-        VerificationError::OodEvaluationMismatch,
+        LoadStorePrankValues {
+            read_data: Some([175, 33, 198, 250]),
+            prev_data: Some([90, 121, 64, 205]),
+            write_data: Some([175, 121, 64, 205]),
+            flags: Some([0, 0, 1, 1]),
+            is_load: None,
+            mem_as: None,
+        },
+        false,
     );
 }
 
@@ -391,39 +369,35 @@ fn negative_wrong_address_space_tests() {
         None,
         None,
         None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        Some(3),
-        VerificationError::OodEvaluationMismatch,
+        LoadStorePrankValues {
+            mem_as: Some(3),
+            ..Default::default()
+        },
+        false,
     );
+
     run_negative_loadstore_test(
         LOADW,
         None,
         None,
         None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        Some(4),
-        VerificationError::OodEvaluationMismatch,
+        LoadStorePrankValues {
+            mem_as: Some(4),
+            ..Default::default()
+        },
+        false,
     );
+
     run_negative_loadstore_test(
         STOREW,
         None,
         None,
         None,
-        None,
-        None,
-        None,
-        None,
-        None,
-        Some(1),
-        VerificationError::OodEvaluationMismatch,
+        LoadStorePrankValues {
+            mem_as: Some(1),
+            ..Default::default()
+        },
+        false,
     );
 }
 
@@ -432,140 +406,60 @@ fn negative_wrong_address_space_tests() {
 ///
 /// Ensure that solve functions produce the correct results.
 ///////////////////////////////////////////////////////////////////////////////////////
-#[test]
-fn execute_roundtrip_sanity_test() {
-    let mut rng = create_seeded_rng();
-    let mut tester = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let adapter = Rv32LoadStoreAdapterChip::<F>::new(
-        tester.execution_bus(),
-        tester.program_bus(),
-        tester.memory_bridge(),
-        tester.address_bits(),
-        range_checker_chip.clone(),
-    );
-    let core = LoadStoreCoreChip::new(Rv32LoadStoreOpcode::CLASS_OFFSET);
-    let mut chip = Rv32LoadStoreChip::<F>::new(adapter, core, tester.offline_memory_mutex_arc());
-
-    let num_tests: usize = 100;
-    for _ in 0..num_tests {
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADW,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADBU,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            LOADHU,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            STOREW,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            STOREB,
-            None,
-            None,
-            None,
-            None,
-        );
-        set_and_execute(
-            &mut tester,
-            &mut chip,
-            &mut rng,
-            STOREH,
-            None,
-            None,
-            None,
-            None,
-        );
-    }
-}
-
 #[test]
 fn run_loadw_storew_sanity_test() {
-    let read_data = [138, 45, 202, 76].map(F::from_canonical_u32);
-    let prev_data = [159, 213, 89, 34].map(F::from_canonical_u32);
+    let read_data = [138, 45, 202, 76];
+    let prev_data = [159, 213, 89, 34];
     let store_write_data = run_write_data(STOREW, read_data, prev_data, 0);
     let load_write_data = run_write_data(LOADW, read_data, prev_data, 0);
-    assert_eq!(store_write_data, read_data);
-    assert_eq!(load_write_data, read_data);
+    assert_eq!(store_write_data, read_data.map(u32::from));
+    assert_eq!(load_write_data, read_data.map(u32::from));
 }
 
 #[test]
 fn run_storeh_sanity_test() {
-    let read_data = [250, 123, 67, 198].map(F::from_canonical_u32);
-    let prev_data = [144, 56, 175, 92].map(F::from_canonical_u32);
+    let read_data = [250, 123, 67, 198];
+    let prev_data = [144, 56, 175, 92];
     let write_data = run_write_data(STOREH, read_data, prev_data, 0);
     let write_data2 = run_write_data(STOREH, read_data, prev_data, 2);
-    assert_eq!(write_data, [250, 123, 175, 92].map(F::from_canonical_u32));
-    assert_eq!(write_data2, [144, 56, 250, 123].map(F::from_canonical_u32));
+    assert_eq!(write_data, [250, 123, 175, 92]);
+    assert_eq!(write_data2, [144, 56, 250, 123]);
 }
 
 #[test]
 fn run_storeb_sanity_test() {
-    let read_data = [221, 104, 58, 147].map(F::from_canonical_u32);
-    let prev_data = [199, 83, 243, 12].map(F::from_canonical_u32);
+    let read_data = [221, 104, 58, 147];
+    let prev_data = [199, 83, 243, 12];
     let write_data = run_write_data(STOREB, read_data, prev_data, 0);
     let write_data1 = run_write_data(STOREB, read_data, prev_data, 1);
     let write_data2 = run_write_data(STOREB, read_data, prev_data, 2);
     let write_data3 = run_write_data(STOREB, read_data, prev_data, 3);
-    assert_eq!(write_data, [221, 83, 243, 12].map(F::from_canonical_u32));
-    assert_eq!(write_data1, [199, 221, 243, 12].map(F::from_canonical_u32));
-    assert_eq!(write_data2, [199, 83, 221, 12].map(F::from_canonical_u32));
-    assert_eq!(write_data3, [199, 83, 243, 221].map(F::from_canonical_u32));
+    assert_eq!(write_data, [221, 83, 243, 12]);
+    assert_eq!(write_data1, [199, 221, 243, 12]);
+    assert_eq!(write_data2, [199, 83, 221, 12]);
+    assert_eq!(write_data3, [199, 83, 243, 221]);
 }
 
 #[test]
 fn run_loadhu_sanity_test() {
-    let read_data = [175, 33, 198, 250].map(F::from_canonical_u32);
-    let prev_data = [90, 121, 64, 205].map(F::from_canonical_u32);
+    let read_data = [175, 33, 198, 250];
+    let prev_data = [90, 121, 64, 205];
     let write_data = run_write_data(LOADHU, read_data, prev_data, 0);
     let write_data2 = run_write_data(LOADHU, read_data, prev_data, 2);
-    assert_eq!(write_data, [175, 33, 0, 0].map(F::from_canonical_u32));
-    assert_eq!(write_data2, [198, 250, 0, 0].map(F::from_canonical_u32));
+    assert_eq!(write_data, [175, 33, 0, 0]);
+    assert_eq!(write_data2, [198, 250, 0, 0]);
 }
 
 #[test]
 fn run_loadbu_sanity_test() {
-    let read_data = [131, 74, 186, 29].map(F::from_canonical_u32);
-    let prev_data = [142, 67, 210, 88].map(F::from_canonical_u32);
+    let read_data = [131, 74, 186, 29];
+    let prev_data = [142, 67, 210, 88];
     let write_data = run_write_data(LOADBU, read_data, prev_data, 0);
     let write_data1 = run_write_data(LOADBU, read_data, prev_data, 1);
     let write_data2 = run_write_data(LOADBU, read_data, prev_data, 2);
     let write_data3 = run_write_data(LOADBU, read_data, prev_data, 3);
-    assert_eq!(write_data, [131, 0, 0, 0].map(F::from_canonical_u32));
-    assert_eq!(write_data1, [74, 0, 0, 0].map(F::from_canonical_u32));
-    assert_eq!(write_data2, [186, 0, 0, 0].map(F::from_canonical_u32));
-    assert_eq!(write_data3, [29, 0, 0, 0].map(F::from_canonical_u32));
+    assert_eq!(write_data, [131, 0, 0, 0]);
+    assert_eq!(write_data1, [74, 0, 0, 0]);
+    assert_eq!(write_data2, [186, 0, 0, 0]);
+    assert_eq!(write_data3, [29, 0, 0, 0]);
 }
diff --git a/extensions/rv32im/circuit/src/mul/core.rs b/extensions/rv32im/circuit/src/mul/core.rs
index fa65a6cf09..45b2f262cc 100644
--- a/extensions/rv32im/circuit/src/mul/core.rs
+++ b/extensions/rv32im/circuit/src/mul/core.rs
@@ -3,13 +3,16 @@ use std::{
     borrow::{Borrow, BorrowMut},
 };
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
+};
+use openvm_circuit_primitives::{
+    range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip},
+    AlignedBytesBorrow,
 };
-use openvm_circuit_primitives::range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip};
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_rv32im_transpiler::MulOpcode;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -17,8 +20,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use serde_big_array::BigArray;
 
 #[repr(C)]
 #[derive(AlignedBorrow)]
@@ -29,7 +30,7 @@ pub struct MultiplicationCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: us
     pub is_valid: T,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct MultiplicationCoreAir<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bus: RangeTupleCheckerBus<2>,
     pub offset: usize,
@@ -109,14 +110,34 @@ where
     }
 }
 
-#[derive(Debug)]
-pub struct MultiplicationCoreChip<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub air: MultiplicationCoreAir<NUM_LIMBS, LIMB_BITS>,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct MultiplicationCoreRecord<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    pub b: [u8; NUM_LIMBS],
+    pub c: [u8; NUM_LIMBS],
+}
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct MultiplicationExecutor<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
+}
+
+#[derive(Clone, Debug)]
+pub struct MultiplicationFiller<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
     pub range_tuple_chip: SharedRangeTupleCheckerChip<2>,
 }
 
-impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> MultiplicationCoreChip<NUM_LIMBS, LIMB_BITS> {
-    pub fn new(range_tuple_chip: SharedRangeTupleCheckerChip<2>, offset: usize) -> Self {
+impl<A, const NUM_LIMBS: usize, const LIMB_BITS: usize>
+    MultiplicationFiller<A, NUM_LIMBS, LIMB_BITS>
+{
+    pub fn new(
+        adapter: A,
+        range_tuple_chip: SharedRangeTupleCheckerChip<2>,
+        offset: usize,
+    ) -> Self {
         // The RangeTupleChecker is used to range check (a[i], carry[i]) pairs where 0 <= i
         // < NUM_LIMBS. a[i] must have LIMB_BITS bits and carry[i] is the sum of i + 1 bytes
         // (with LIMB_BITS bits).
@@ -132,102 +153,117 @@ impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> MultiplicationCoreChip<NUM_
         );
 
         Self {
-            air: MultiplicationCoreAir {
-                bus: *range_tuple_chip.bus(),
-                offset,
-            },
+            adapter,
+            offset,
             range_tuple_chip,
         }
     }
 }
 
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "T: Serialize + DeserializeOwned")]
-pub struct MultiplicationCoreRecord<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    #[serde(with = "BigArray")]
-    pub a: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub b: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub c: [T; NUM_LIMBS],
-}
-
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_LIMBS: usize, const LIMB_BITS: usize>
-    VmCoreChip<F, I> for MultiplicationCoreChip<NUM_LIMBS, LIMB_BITS>
+impl<F, A, RA, const NUM_LIMBS: usize, const LIMB_BITS: usize> PreflightExecutor<F, RA>
+    for MultiplicationExecutor<A, NUM_LIMBS, LIMB_BITS>
 where
-    I::Reads: Into<[[F; NUM_LIMBS]; 2]>,
-    I::Writes: From<[[F; NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData: Into<[[u8; NUM_LIMBS]; 2]>,
+            WriteData: From<[[u8; NUM_LIMBS]; 1]>,
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut MultiplicationCoreRecord<NUM_LIMBS, LIMB_BITS>,
+        ),
+    >,
 {
-    type Record = MultiplicationCoreRecord<F, NUM_LIMBS, LIMB_BITS>;
-    type Air = MultiplicationCoreAir<NUM_LIMBS, LIMB_BITS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!("{:?}", MulOpcode::from_usize(opcode - self.offset))
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
+    ) -> Result<(), ExecutionError> {
         let Instruction { opcode, .. } = instruction;
-        assert_eq!(
-            MulOpcode::from_usize(opcode.local_opcode_idx(self.air.offset)),
+
+        debug_assert_eq!(
+            MulOpcode::from_usize(opcode.local_opcode_idx(self.offset)),
             MulOpcode::MUL
         );
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        let data: [[F; NUM_LIMBS]; 2] = reads.into();
-        let b = data[0].map(|x| x.as_canonical_u32());
-        let c = data[1].map(|y| y.as_canonical_u32());
-        let (a, carry) = run_mul::<NUM_LIMBS, LIMB_BITS>(&b, &c);
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        for (a, carry) in a.iter().zip(carry.iter()) {
-            self.range_tuple_chip.add_count(&[*a, *carry]);
-        }
+        let [rs1, rs2] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
 
-        let output = AdapterRuntimeContext::without_pc([a.map(F::from_canonical_u32)]);
-        let record = MultiplicationCoreRecord {
-            a: a.map(F::from_canonical_u32),
-            b: data[0],
-            c: data[1],
-        };
+        let (a, _) = run_mul::<NUM_LIMBS, LIMB_BITS>(&rs1, &rs2);
 
-        Ok((output, record))
-    }
+        core_record.b = rs1;
+        core_record.c = rs2;
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!("{:?}", MulOpcode::from_usize(opcode - self.air.offset))
-    }
+        self.adapter
+            .write(state.memory, instruction, [a].into(), &mut adapter_record);
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let row_slice: &mut MultiplicationCoreCols<_, NUM_LIMBS, LIMB_BITS> =
-            row_slice.borrow_mut();
-        row_slice.a = record.a;
-        row_slice.b = record.b;
-        row_slice.c = record.c;
-        row_slice.is_valid = F::ONE;
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+        Ok(())
     }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for MultiplicationFiller<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: &MultiplicationCoreRecord<NUM_LIMBS, LIMB_BITS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+
+        let core_row: &mut MultiplicationCoreCols<F, NUM_LIMBS, LIMB_BITS> = core_row.borrow_mut();
+
+        let (a, carry) = run_mul::<NUM_LIMBS, LIMB_BITS>(&record.b, &record.c);
+
+        for (a, carry) in a.iter().zip(carry.iter()) {
+            self.range_tuple_chip.add_count(&[*a as u32, *carry]);
+        }
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        // write in reverse order
+        core_row.is_valid = F::ONE;
+        core_row.c = record.c.map(F::from_canonical_u8);
+        core_row.b = record.b.map(F::from_canonical_u8);
+        core_row.a = a.map(F::from_canonical_u8);
     }
 }
 
 // returns mul, carry
+#[inline(always)]
 pub(super) fn run_mul<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> ([u32; NUM_LIMBS], [u32; NUM_LIMBS]) {
-    let mut result = [0; NUM_LIMBS];
-    let mut carry = [0; NUM_LIMBS];
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
+) -> ([u8; NUM_LIMBS], [u32; NUM_LIMBS]) {
+    let mut result = [0u8; NUM_LIMBS];
+    let mut carry = [0u32; NUM_LIMBS];
     for i in 0..NUM_LIMBS {
+        let mut res = 0u32;
         if i > 0 {
-            result[i] = carry[i - 1];
+            res = carry[i - 1];
         }
         for j in 0..=i {
-            result[i] += x[j] * y[i - j];
+            res += (x[j] as u32) * (y[i - j] as u32);
         }
-        carry[i] = result[i] >> LIMB_BITS;
-        result[i] %= 1 << LIMB_BITS;
+        carry[i] = res >> LIMB_BITS;
+        res %= 1u32 << LIMB_BITS;
+        result[i] = res as u8;
     }
     (result, carry)
 }
diff --git a/extensions/rv32im/circuit/src/mul/execution.rs b/extensions/rv32im/circuit/src/mul/execution.rs
new file mode 100644
index 0000000000..73376d8f98
--- /dev/null
+++ b/extensions/rv32im/circuit/src/mul/execution.rs
@@ -0,0 +1,141 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32im_transpiler::MulOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::MultiplicationExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct MultiPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl<A, const LIMB_BITS: usize> MultiplicationExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS> {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut MultiPreCompute,
+    ) -> Result<(), StaticProgramError> {
+        assert_eq!(
+            MulOpcode::from_usize(inst.opcode.local_opcode_idx(self.offset)),
+            MulOpcode::MUL
+        );
+        if inst.d.as_canonical_u32() != RV32_REGISTER_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+
+        *data = MultiPreCompute {
+            a: inst.a.as_canonical_u32() as u8,
+            b: inst.b.as_canonical_u32() as u8,
+            c: inst.c.as_canonical_u32() as u8,
+        };
+        Ok(())
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> Executor<F>
+    for MultiplicationExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn pre_compute_size(&self) -> usize {
+        size_of::<MultiPreCompute>()
+    }
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let pre_compute: &mut MultiPreCompute = data.borrow_mut();
+        self.pre_compute_impl(pc, inst, pre_compute)?;
+        Ok(execute_e1_impl)
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> MeteredExecutor<F>
+    for MultiplicationExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<MultiPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<MultiPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+        self.pre_compute_impl(pc, inst, &mut pre_compute.data)?;
+        Ok(execute_e2_impl)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &MultiPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1: [u8; RV32_REGISTER_NUM_LIMBS] =
+        vm_state.vm_read(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2: [u8; RV32_REGISTER_NUM_LIMBS] =
+        vm_state.vm_read(RV32_REGISTER_AS, pre_compute.c as u32);
+    let rs1 = u32::from_le_bytes(rs1);
+    let rs2 = u32::from_le_bytes(rs2);
+    let rd = rs1.wrapping_mul(rs2);
+    vm_state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &rd.to_le_bytes());
+
+    vm_state.pc += DEFAULT_PC_STEP;
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &MultiPreCompute = pre_compute.borrow();
+    execute_e12_impl(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<MultiPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl(&pre_compute.data, vm_state);
+}
diff --git a/extensions/rv32im/circuit/src/mul/mod.rs b/extensions/rv32im/circuit/src/mul/mod.rs
index 5f28439977..680f192e36 100644
--- a/extensions/rv32im/circuit/src/mul/mod.rs
+++ b/extensions/rv32im/circuit/src/mul/mod.rs
@@ -1,15 +1,22 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::{Rv32MultAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use super::adapters::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use crate::adapters::{Rv32MultAdapterAir, Rv32MultAdapterExecutor, Rv32MultAdapterFiller};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
+pub type Rv32MultiplicationAir = VmAirWrapper<
+    Rv32MultAdapterAir,
+    MultiplicationCoreAir<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+>;
+pub type Rv32MultiplicationExecutor =
+    MultiplicationExecutor<Rv32MultAdapterExecutor, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>;
 pub type Rv32MultiplicationChip<F> = VmChipWrapper<
     F,
-    Rv32MultAdapterChip<F>,
-    MultiplicationCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+    MultiplicationFiller<Rv32MultAdapterFiller, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
 >;
diff --git a/extensions/rv32im/circuit/src/mul/tests.rs b/extensions/rv32im/circuit/src/mul/tests.rs
index b942c24cc3..e2d9bd42a3 100644
--- a/extensions/rv32im/circuit/src/mul/tests.rs
+++ b/extensions/rv32im/circuit/src/mul/tests.rs
@@ -1,15 +1,11 @@
-use std::borrow::BorrowMut;
+use std::{array, borrow::BorrowMut};
 
-use openvm_circuit::{
-    arch::{
-        testing::{TestAdapterChip, VmChipTestBuilder, RANGE_TUPLE_CHECKER_BUS},
-        ExecutionBridge, VmAdapterChip, VmChipWrapper,
-    },
-    utils::generate_long_number,
+use openvm_circuit::arch::testing::{TestChipHarness, VmChipTestBuilder, RANGE_TUPLE_CHECKER_BUS};
+use openvm_circuit_primitives::range_tuple::{
+    RangeTupleCheckerAir, RangeTupleCheckerBus, RangeTupleCheckerChip, SharedRangeTupleCheckerChip,
 };
-use openvm_circuit_primitives::range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip};
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_rv32im_transpiler::MulOpcode;
+use openvm_instructions::LocalOpcode;
+use openvm_rv32im_transpiler::MulOpcode::{self, MUL};
 use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::FieldAlgebra,
@@ -18,19 +14,88 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use rand::{rngs::StdRng, Rng};
 
 use super::core::run_mul;
 use crate::{
-    adapters::{Rv32MultAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
-    mul::{MultiplicationCoreChip, MultiplicationCoreCols, Rv32MultiplicationChip},
-    test_utils::rv32_rand_write_register_or_imm,
+    adapters::{
+        Rv32MultAdapterAir, Rv32MultAdapterExecutor, Rv32MultAdapterFiller, RV32_CELL_BITS,
+        RV32_REGISTER_NUM_LIMBS,
+    },
+    mul::{MultiplicationCoreCols, Rv32MultiplicationChip},
+    test_utils::{get_verification_error, rv32_rand_write_register_or_imm},
+    MultiplicationCoreAir, MultiplicationFiller, Rv32MultiplicationAir, Rv32MultiplicationExecutor,
 };
 
+const MAX_INS_CAPACITY: usize = 128;
+// the max number of limbs we currently support MUL for is 32 (i.e. for U256s)
+const MAX_NUM_LIMBS: u32 = 32;
 type F = BabyBear;
+type Harness = TestChipHarness<
+    F,
+    Rv32MultiplicationExecutor,
+    Rv32MultiplicationAir,
+    Rv32MultiplicationChip<F>,
+>;
+
+fn create_test_chip(
+    tester: &mut VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (RangeTupleCheckerAir<2>, SharedRangeTupleCheckerChip<2>),
+) {
+    let range_tuple_bus = RangeTupleCheckerBus::new(
+        RANGE_TUPLE_CHECKER_BUS,
+        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
+    );
+    let range_tuple_chip =
+        SharedRangeTupleCheckerChip::new(RangeTupleCheckerChip::<2>::new(range_tuple_bus));
+
+    let air = Rv32MultiplicationAir::new(
+        Rv32MultAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        MultiplicationCoreAir::new(range_tuple_bus, MulOpcode::CLASS_OFFSET),
+    );
+    let executor =
+        Rv32MultiplicationExecutor::new(Rv32MultAdapterExecutor, MulOpcode::CLASS_OFFSET);
+    let chip = Rv32MultiplicationChip::<F>::new(
+        MultiplicationFiller::new(
+            Rv32MultAdapterFiller,
+            range_tuple_chip.clone(),
+            MulOpcode::CLASS_OFFSET,
+        ),
+        tester.memory_helper(),
+    );
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (harness, (range_tuple_chip.air, range_tuple_chip))
+}
+
+#[allow(clippy::too_many_arguments)]
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    opcode: MulOpcode,
+    b: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    c: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+) {
+    let b = b.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX)));
+    let c = c.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX)));
+
+    let (mut instruction, rd) =
+        rv32_rand_write_register_or_imm(tester, b, c, None, opcode.global_opcode().as_usize(), rng);
+
+    instruction.e = F::ZERO;
+    tester.execute(harness, &instruction);
+
+    let (a, _) = run_mul::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&b, &c);
+    assert_eq!(
+        a.map(F::from_canonical_u8),
+        tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd)
+    )
+}
 
 //////////////////////////////////////////////////////////////////////////////////////
 // POSITIVE TESTS
@@ -39,144 +104,77 @@ type F = BabyBear;
 // passes all constraints.
 //////////////////////////////////////////////////////////////////////////////////////
 
-fn run_rv32_mul_rand_test(num_ops: usize) {
-    // the max number of limbs we currently support MUL for is 32 (i.e. for U256s)
-    const MAX_NUM_LIMBS: u32 = 32;
+#[test]
+fn run_rv32_mul_rand_test() {
     let mut rng = create_seeded_rng();
-
-    let range_tuple_bus = RangeTupleCheckerBus::new(
-        RANGE_TUPLE_CHECKER_BUS,
-        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
-    );
-    let range_tuple_checker = SharedRangeTupleCheckerChip::new(range_tuple_bus);
-
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32MultiplicationChip::<F>::new(
-        Rv32MultAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        MultiplicationCoreChip::new(range_tuple_checker.clone(), MulOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
 
+    let (mut harness, range_tuple) = create_test_chip(&mut tester);
+    let num_ops = 100;
     for _ in 0..num_ops {
-        let b = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        let c = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-
-        let (mut instruction, rd) = rv32_rand_write_register_or_imm(
-            &mut tester,
-            b,
-            c,
-            None,
-            MulOpcode::MUL.global_opcode().as_usize(),
-            &mut rng,
-        );
-        instruction.e = F::ZERO;
-        tester.execute(&mut chip, &instruction);
-
-        let (a, _) = run_mul::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&b, &c);
-        assert_eq!(
-            a.map(F::from_canonical_u32),
-            tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd)
-        )
+        set_and_execute(&mut tester, &mut harness, &mut rng, MUL, None, None);
     }
 
     let tester = tester
         .build()
-        .load(chip)
-        .load(range_tuple_checker)
+        .load(harness)
+        .load_periphery(range_tuple)
         .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn rv32_mul_rand_test() {
-    run_rv32_mul_rand_test(1);
-}
-
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adapter is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-type Rv32MultiplicationTestChip<F> = VmChipWrapper<
-    F,
-    TestAdapterChip<F>,
-    MultiplicationCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
->;
-
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_mul_negative_test(
-    a: [u32; RV32_REGISTER_NUM_LIMBS],
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    c: [u32; RV32_REGISTER_NUM_LIMBS],
-    is_valid: bool,
+fn run_negative_mul_test(
+    opcode: MulOpcode,
+    prank_a: [u32; RV32_REGISTER_NUM_LIMBS],
+    b: [u8; RV32_REGISTER_NUM_LIMBS],
+    c: [u8; RV32_REGISTER_NUM_LIMBS],
+    prank_is_valid: bool,
     interaction_error: bool,
 ) {
-    const MAX_NUM_LIMBS: u32 = 32;
-    let range_tuple_bus = RangeTupleCheckerBus::new(
-        RANGE_TUPLE_CHECKER_BUS,
-        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
-    );
-    let range_tuple_chip = SharedRangeTupleCheckerChip::new(range_tuple_bus);
-
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32MultiplicationTestChip::<F>::new(
-        TestAdapterChip::new(
-            vec![[b.map(F::from_canonical_u32), c.map(F::from_canonical_u32)].concat()],
-            vec![None],
-            ExecutionBridge::new(tester.execution_bus(), tester.program_bus()),
-        ),
-        MultiplicationCoreChip::new(range_tuple_chip.clone(), MulOpcode::CLASS_OFFSET),
-        tester.offline_memory_mutex_arc(),
-    );
-
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(MulOpcode::MUL.global_opcode(), [0, 0, 0, 1, 0]),
+    let (mut harness, range_tuple) = create_test_chip(&mut tester);
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(b),
+        Some(c),
     );
 
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
-    let (_, carry) = run_mul::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&b, &c);
-
-    range_tuple_chip.clear();
-    if is_valid {
-        for (a, carry) in a.iter().zip(carry.iter()) {
-            range_tuple_chip.add_count(&[*a, *carry]);
-        }
-    }
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut values = trace.row_slice(0).to_vec();
         let cols: &mut MultiplicationCoreCols<F, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> =
             values.split_at_mut(adapter_width).1.borrow_mut();
-        cols.a = a.map(F::from_canonical_u32);
-        cols.is_valid = F::from_bool(is_valid);
-        *trace = RowMajorMatrix::new(values, trace_width);
+        cols.a = prank_a.map(F::from_canonical_u32);
+        cols.is_valid = F::from_bool(prank_is_valid);
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
-        .load(range_tuple_chip)
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(range_tuple)
         .finalize();
-    tester.simple_test_with_expected_error(if interaction_error {
-        VerificationError::ChallengePhaseError
-    } else {
-        VerificationError::OodEvaluationMismatch
-    });
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
 fn rv32_mul_wrong_negative_test() {
-    run_rv32_mul_negative_test(
+    run_negative_mul_test(
+        MUL,
         [63, 247, 125, 234],
         [51, 109, 78, 142],
         [197, 85, 150, 32],
@@ -187,7 +185,8 @@ fn rv32_mul_wrong_negative_test() {
 
 #[test]
 fn rv32_mul_is_valid_false_negative_test() {
-    run_rv32_mul_negative_test(
+    run_negative_mul_test(
+        MUL,
         [63, 247, 125, 234],
         [51, 109, 78, 142],
         [197, 85, 150, 32],
@@ -204,9 +203,9 @@ fn rv32_mul_is_valid_false_negative_test() {
 
 #[test]
 fn run_mul_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [197, 85, 150, 32];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [51, 109, 78, 142];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [63, 247, 125, 232];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [197, 85, 150, 32];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [51, 109, 78, 142];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [63, 247, 125, 232];
     let c: [u32; RV32_REGISTER_NUM_LIMBS] = [39, 100, 126, 205];
     let (result, carry) = run_mul::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
diff --git a/extensions/rv32im/circuit/src/mulh/core.rs b/extensions/rv32im/circuit/src/mulh/core.rs
index 16aa8fd550..a201cfc4f3 100644
--- a/extensions/rv32im/circuit/src/mulh/core.rs
+++ b/extensions/rv32im/circuit/src/mulh/core.rs
@@ -3,16 +3,17 @@ use std::{
     borrow::{Borrow, BorrowMut},
 };
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_rv32im_transpiler::MulHOpcode;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -20,8 +21,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{Deserialize, Serialize};
-use serde_big_array::BigArray;
 use strum::IntoEnumIterator;
 
 #[repr(C)]
@@ -40,7 +39,7 @@ pub struct MulHCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub opcode_mulhu_flag: T,
 }
 
-#[derive(Copy, Clone, Debug)]
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct MulHCoreAir<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bitwise_lookup_bus: BitwiseOperationLookupBus,
     pub range_tuple_bus: RangeTupleCheckerBus<2>,
@@ -183,14 +182,30 @@ where
     }
 }
 
-pub struct MulHCoreChip<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub air: MulHCoreAir<NUM_LIMBS, LIMB_BITS>,
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct MulHCoreRecord<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    pub b: [u8; NUM_LIMBS],
+    pub c: [u8; NUM_LIMBS],
+    pub local_opcode: u8,
+}
+
+#[derive(Clone, Copy, derive_new::new)]
+pub struct MulHExecutor<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
+}
+
+#[derive(Clone)]
+pub struct MulHFiller<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
     pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
     pub range_tuple_chip: SharedRangeTupleCheckerChip<2>,
 }
 
-impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> MulHCoreChip<NUM_LIMBS, LIMB_BITS> {
+impl<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> MulHFiller<A, NUM_LIMBS, LIMB_BITS> {
     pub fn new(
+        adapter: A,
         bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
         range_tuple_chip: SharedRangeTupleCheckerChip<2>,
     ) -> Self {
@@ -209,55 +224,93 @@ impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> MulHCoreChip<NUM_LIMBS, LIM
         );
 
         Self {
-            air: MulHCoreAir {
-                bitwise_lookup_bus: bitwise_lookup_chip.bus(),
-                range_tuple_bus: *range_tuple_chip.bus(),
-            },
+            adapter,
             bitwise_lookup_chip,
             range_tuple_chip,
         }
     }
 }
 
-#[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-pub struct MulHCoreRecord<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub opcode: MulHOpcode,
-    #[serde(with = "BigArray")]
-    pub a: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub b: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub c: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub a_mul: [T; NUM_LIMBS],
-    pub b_ext: T,
-    pub c_ext: T,
-}
-
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_LIMBS: usize, const LIMB_BITS: usize>
-    VmCoreChip<F, I> for MulHCoreChip<NUM_LIMBS, LIMB_BITS>
+impl<F, A, RA, const NUM_LIMBS: usize, const LIMB_BITS: usize> PreflightExecutor<F, RA>
+    for MulHExecutor<A, NUM_LIMBS, LIMB_BITS>
 where
-    I::Reads: Into<[[F; NUM_LIMBS]; 2]>,
-    I::Writes: From<[[F; NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData: Into<[[u8; NUM_LIMBS]; 2]>,
+            WriteData: From<[[u8; NUM_LIMBS]; 1]>,
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut MulHCoreRecord<NUM_LIMBS, LIMB_BITS>,
+        ),
+    >,
 {
-    type Record = MulHCoreRecord<F, NUM_LIMBS, LIMB_BITS>;
-    type Air = MulHCoreAir<NUM_LIMBS, LIMB_BITS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!(
+            "{:?}",
+            MulHOpcode::from_usize(opcode - MulHOpcode::CLASS_OFFSET)
+        )
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
+    ) -> Result<(), ExecutionError> {
         let Instruction { opcode, .. } = instruction;
-        let mulh_opcode = MulHOpcode::from_usize(opcode.local_opcode_idx(MulHOpcode::CLASS_OFFSET));
 
-        let data: [[F; NUM_LIMBS]; 2] = reads.into();
-        let b = data[0].map(|x| x.as_canonical_u32());
-        let c = data[1].map(|y| y.as_canonical_u32());
-        let (a, a_mul, carry, b_ext, c_ext) = run_mulh::<NUM_LIMBS, LIMB_BITS>(mulh_opcode, &b, &c);
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
+
+        A::start(*state.pc, state.memory, &mut adapter_record);
+
+        core_record.local_opcode = opcode.local_opcode_idx(MulHOpcode::CLASS_OFFSET) as u8;
+        let mulh_opcode = MulHOpcode::from_usize(core_record.local_opcode as usize);
+
+        [core_record.b, core_record.c] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
+
+        let (a, _, _, _, _) = run_mulh::<NUM_LIMBS, LIMB_BITS>(
+            mulh_opcode,
+            &core_record.b.map(u32::from),
+            &core_record.c.map(u32::from),
+        );
+
+        let a = a.map(|x| x as u8);
+        self.adapter
+            .write(state.memory, instruction, [a].into(), &mut adapter_record);
+
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for MulHFiller<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+        let record: &MulHCoreRecord<NUM_LIMBS, LIMB_BITS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
+        let core_row: &mut MulHCoreCols<F, NUM_LIMBS, LIMB_BITS> = core_row.borrow_mut();
+
+        let opcode = MulHOpcode::from_usize(record.local_opcode as usize);
+        let (a, a_mul, carry, b_ext, c_ext) = run_mulh::<NUM_LIMBS, LIMB_BITS>(
+            opcode,
+            &record.b.map(u32::from),
+            &record.c.map(u32::from),
+        );
 
         for i in 0..NUM_LIMBS {
             self.range_tuple_chip.add_count(&[a_mul[i], carry[i]]);
@@ -265,55 +318,31 @@ where
                 .add_count(&[a[i], carry[NUM_LIMBS + i]]);
         }
 
-        if mulh_opcode != MulHOpcode::MULHU {
+        if opcode != MulHOpcode::MULHU {
             let b_sign_mask = if b_ext == 0 { 0 } else { 1 << (LIMB_BITS - 1) };
             let c_sign_mask = if c_ext == 0 { 0 } else { 1 << (LIMB_BITS - 1) };
             self.bitwise_lookup_chip.request_range(
-                (b[NUM_LIMBS - 1] - b_sign_mask) << 1,
-                (c[NUM_LIMBS - 1] - c_sign_mask) << ((mulh_opcode == MulHOpcode::MULH) as u32),
+                (record.b[NUM_LIMBS - 1] as u32 - b_sign_mask) << 1,
+                (record.c[NUM_LIMBS - 1] as u32 - c_sign_mask)
+                    << ((opcode == MulHOpcode::MULH) as u32),
             );
         }
 
-        let output = AdapterRuntimeContext::without_pc([a.map(F::from_canonical_u32)]);
-        let record = MulHCoreRecord {
-            opcode: mulh_opcode,
-            a: a.map(F::from_canonical_u32),
-            b: data[0],
-            c: data[1],
-            a_mul: a_mul.map(F::from_canonical_u32),
-            b_ext: F::from_canonical_u32(b_ext),
-            c_ext: F::from_canonical_u32(c_ext),
-        };
-
-        Ok((output, record))
-    }
-
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!(
-            "{:?}",
-            MulHOpcode::from_usize(opcode - MulHOpcode::CLASS_OFFSET)
-        )
-    }
-
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        let row_slice: &mut MulHCoreCols<_, NUM_LIMBS, LIMB_BITS> = row_slice.borrow_mut();
-        row_slice.a = record.a;
-        row_slice.b = record.b;
-        row_slice.c = record.c;
-        row_slice.a_mul = record.a_mul;
-        row_slice.b_ext = record.b_ext;
-        row_slice.c_ext = record.c_ext;
-        row_slice.opcode_mulh_flag = F::from_bool(record.opcode == MulHOpcode::MULH);
-        row_slice.opcode_mulhsu_flag = F::from_bool(record.opcode == MulHOpcode::MULHSU);
-        row_slice.opcode_mulhu_flag = F::from_bool(record.opcode == MulHOpcode::MULHU);
-    }
-
-    fn air(&self) -> &Self::Air {
-        &self.air
+        // Write in reverse order
+        core_row.opcode_mulhu_flag = F::from_bool(opcode == MulHOpcode::MULHU);
+        core_row.opcode_mulhsu_flag = F::from_bool(opcode == MulHOpcode::MULHSU);
+        core_row.opcode_mulh_flag = F::from_bool(opcode == MulHOpcode::MULH);
+        core_row.c_ext = F::from_canonical_u32(c_ext);
+        core_row.b_ext = F::from_canonical_u32(b_ext);
+        core_row.a_mul = a_mul.map(F::from_canonical_u32);
+        core_row.c = record.c.map(F::from_canonical_u8);
+        core_row.b = record.b.map(F::from_canonical_u8);
+        core_row.a = a.map(F::from_canonical_u32);
     }
 }
 
 // returns mulh[[s]u], mul, carry, x_ext, y_ext
+#[inline(always)]
 pub(super) fn run_mulh<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     opcode: MulHOpcode,
     x: &[u32; NUM_LIMBS],
diff --git a/extensions/rv32im/circuit/src/mulh/execution.rs b/extensions/rv32im/circuit/src/mulh/execution.rs
new file mode 100644
index 0000000000..1818a63080
--- /dev/null
+++ b/extensions/rv32im/circuit/src/mulh/execution.rs
@@ -0,0 +1,174 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32im_transpiler::MulHOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use crate::MulHExecutor;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct MulHPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl<A, const LIMB_BITS: usize> MulHExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        inst: &Instruction<F>,
+        data: &mut MulHPreCompute,
+    ) -> Result<MulHOpcode, StaticProgramError> {
+        *data = MulHPreCompute {
+            a: inst.a.as_canonical_u32() as u8,
+            b: inst.b.as_canonical_u32() as u8,
+            c: inst.c.as_canonical_u32() as u8,
+        };
+        Ok(MulHOpcode::from_usize(
+            inst.opcode.local_opcode_idx(MulHOpcode::CLASS_OFFSET),
+        ))
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> Executor<F>
+    for MulHExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    #[inline(always)]
+    fn pre_compute_size(&self) -> usize {
+        size_of::<MulHPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let pre_compute: &mut MulHPreCompute = data.borrow_mut();
+        let local_opcode = self.pre_compute_impl(inst, pre_compute)?;
+        let fn_ptr = match local_opcode {
+            MulHOpcode::MULH => execute_e1_impl::<_, _, MulHOp>,
+            MulHOpcode::MULHSU => execute_e1_impl::<_, _, MulHSuOp>,
+            MulHOpcode::MULHU => execute_e1_impl::<_, _, MulHUOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const LIMB_BITS: usize> MeteredExecutor<F>
+    for MulHExecutor<A, { RV32_REGISTER_NUM_LIMBS }, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<MulHPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        _pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let pre_compute: &mut E2PreCompute<MulHPreCompute> = data.borrow_mut();
+        pre_compute.chip_idx = chip_idx as u32;
+        let local_opcode = self.pre_compute_impl(inst, &mut pre_compute.data)?;
+        let fn_ptr = match local_opcode {
+            MulHOpcode::MULH => execute_e2_impl::<_, _, MulHOp>,
+            MulHOpcode::MULHSU => execute_e2_impl::<_, _, MulHSuOp>,
+            MulHOpcode::MULHU => execute_e2_impl::<_, _, MulHUOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+#[inline(always)]
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: MulHOperation>(
+    pre_compute: &MulHPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1: [u8; RV32_REGISTER_NUM_LIMBS] =
+        vm_state.vm_read(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2: [u8; RV32_REGISTER_NUM_LIMBS] =
+        vm_state.vm_read(RV32_REGISTER_AS, pre_compute.c as u32);
+    let rd = <OP as MulHOperation>::compute(rs1, rs2);
+    vm_state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &rd);
+
+    vm_state.pc += DEFAULT_PC_STEP;
+    vm_state.instret += 1;
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait, OP: MulHOperation>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &MulHPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, OP>(pre_compute, vm_state);
+}
+
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait, OP: MulHOperation>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<MulHPreCompute> = pre_compute.borrow();
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, OP>(&pre_compute.data, vm_state);
+}
+
+trait MulHOperation {
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4];
+}
+struct MulHOp;
+struct MulHSuOp;
+struct MulHUOp;
+impl MulHOperation for MulHOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4] {
+        let rs1 = i32::from_le_bytes(rs1) as i64;
+        let rs2 = i32::from_le_bytes(rs2) as i64;
+        ((rs1.wrapping_mul(rs2) >> 32) as u32).to_le_bytes()
+    }
+}
+impl MulHOperation for MulHSuOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4] {
+        let rs1 = i32::from_le_bytes(rs1) as i64;
+        let rs2 = u32::from_le_bytes(rs2) as i64;
+        ((rs1.wrapping_mul(rs2) >> 32) as u32).to_le_bytes()
+    }
+}
+impl MulHOperation for MulHUOp {
+    #[inline(always)]
+    fn compute(rs1: [u8; 4], rs2: [u8; 4]) -> [u8; 4] {
+        let rs1 = u32::from_le_bytes(rs1) as i64;
+        let rs2 = u32::from_le_bytes(rs2) as i64;
+        ((rs1.wrapping_mul(rs2) >> 32) as u32).to_le_bytes()
+    }
+}
diff --git a/extensions/rv32im/circuit/src/mulh/mod.rs b/extensions/rv32im/circuit/src/mulh/mod.rs
index 284b77191a..c3a39d1f50 100644
--- a/extensions/rv32im/circuit/src/mulh/mod.rs
+++ b/extensions/rv32im/circuit/src/mulh/mod.rs
@@ -1,12 +1,18 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::{Rv32MultAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use super::adapters::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use crate::adapters::{Rv32MultAdapterAir, Rv32MultAdapterExecutor, Rv32MultAdapterFiller};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
+pub type Rv32MulHAir =
+    VmAirWrapper<Rv32MultAdapterAir, MulHCoreAir<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
+pub type Rv32MulHExecutor =
+    MulHExecutor<Rv32MultAdapterExecutor, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>;
 pub type Rv32MulHChip<F> =
-    VmChipWrapper<F, Rv32MultAdapterChip<F>, MulHCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
+    VmChipWrapper<F, MulHFiller<Rv32MultAdapterFiller, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
diff --git a/extensions/rv32im/circuit/src/mulh/tests.rs b/extensions/rv32im/circuit/src/mulh/tests.rs
index 1c7cf5b5cb..161e999987 100644
--- a/extensions/rv32im/circuit/src/mulh/tests.rs
+++ b/extensions/rv32im/circuit/src/mulh/tests.rs
@@ -1,21 +1,24 @@
-use std::borrow::BorrowMut;
+use std::{borrow::BorrowMut, sync::Arc};
 
 use openvm_circuit::{
-    arch::{
-        testing::{
-            memory::gen_pointer, TestAdapterChip, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS,
-            RANGE_TUPLE_CHECKER_BUS,
-        },
-        ExecutionBridge, InstructionExecutor, VmAdapterChip, VmChipWrapper,
+    arch::testing::{
+        memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS,
+        RANGE_TUPLE_CHECKER_BUS,
     },
     utils::generate_long_number,
 };
 use openvm_circuit_primitives::{
-    bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
-    range_tuple::{RangeTupleCheckerBus, SharedRangeTupleCheckerChip},
+    bitwise_op_lookup::{
+        BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+        SharedBitwiseOperationLookupChip,
+    },
+    range_tuple::{
+        RangeTupleCheckerAir, RangeTupleCheckerBus, RangeTupleCheckerChip,
+        SharedRangeTupleCheckerChip,
+    },
 };
 use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_rv32im_transpiler::MulHOpcode;
+use openvm_rv32im_transpiler::MulHOpcode::{self, *};
 use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::FieldAlgebra,
@@ -24,36 +27,90 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::rngs::StdRng;
+use test_case::test_case;
 
 use super::core::run_mulh;
 use crate::{
-    adapters::{Rv32MultAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
-    mulh::{MulHCoreChip, MulHCoreCols, Rv32MulHChip},
+    adapters::{
+        Rv32MultAdapterAir, Rv32MultAdapterExecutor, Rv32MultAdapterFiller, RV32_CELL_BITS,
+        RV32_REGISTER_NUM_LIMBS,
+    },
+    mulh::{MulHCoreCols, Rv32MulHChip},
+    test_utils::get_verification_error,
+    MulHCoreAir, MulHFiller, Rv32MulHAir, Rv32MulHExecutor,
 };
 
+const MAX_INS_CAPACITY: usize = 128;
+// the max number of limbs we currently support MUL for is 32 (i.e. for U256s)
+const MAX_NUM_LIMBS: u32 = 32;
 type F = BabyBear;
+type Harness = TestChipHarness<F, Rv32MulHExecutor, Rv32MulHAir, Rv32MulHChip<F>>;
 
-//////////////////////////////////////////////////////////////////////////////////////
-// POSITIVE TESTS
-//
-// Randomly generate computations and execute, ensuring that the generated trace
-// passes all constraints.
-//////////////////////////////////////////////////////////////////////////////////////
+fn create_test_chip(
+    tester: &mut VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+    (RangeTupleCheckerAir<2>, SharedRangeTupleCheckerChip<2>),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let range_tuple_bus = RangeTupleCheckerBus::new(
+        RANGE_TUPLE_CHECKER_BUS,
+        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
+    );
+
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+    let range_tuple_chip =
+        SharedRangeTupleCheckerChip::new(RangeTupleCheckerChip::<2>::new(range_tuple_bus));
+
+    let air = Rv32MulHAir::new(
+        Rv32MultAdapterAir::new(tester.execution_bridge(), tester.memory_bridge()),
+        MulHCoreAir::new(bitwise_bus, range_tuple_bus),
+    );
+    let executor = Rv32MulHExecutor::new(Rv32MultAdapterExecutor, MulHOpcode::CLASS_OFFSET);
+    let chip = Rv32MulHChip::<F>::new(
+        MulHFiller::new(
+            Rv32MultAdapterFiller,
+            bitwise_chip.clone(),
+            range_tuple_chip.clone(),
+        ),
+        tester.memory_helper(),
+    );
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+
+    (
+        harness,
+        (bitwise_chip.air, bitwise_chip),
+        (range_tuple_chip.air, range_tuple_chip),
+    )
+}
 
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_mulh_rand_write_execute<E: InstructionExecutor<F>>(
-    opcode: MulHOpcode,
+fn set_and_execute(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut E,
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    c: [u32; RV32_REGISTER_NUM_LIMBS],
+    harness: &mut Harness,
     rng: &mut StdRng,
+    opcode: MulHOpcode,
+    b: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
+    c: Option<[u32; RV32_REGISTER_NUM_LIMBS]>,
 ) {
+    let b = b.unwrap_or(generate_long_number::<
+        RV32_REGISTER_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >(rng));
+    let c = c.unwrap_or(generate_long_number::<
+        RV32_REGISTER_NUM_LIMBS,
+        RV32_CELL_BITS,
+    >(rng));
+
     let rs1 = gen_pointer(rng, 4);
     let rs2 = gen_pointer(rng, 4);
     let rd = gen_pointer(rng, 4);
@@ -61,160 +118,103 @@ fn run_rv32_mulh_rand_write_execute<E: InstructionExecutor<F>>(
     tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs1, b.map(F::from_canonical_u32));
     tester.write::<RV32_REGISTER_NUM_LIMBS>(1, rs2, c.map(F::from_canonical_u32));
 
-    let (a, _, _, _, _) = run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c);
     tester.execute(
-        chip,
+        harness,
         &Instruction::from_usize(opcode.global_opcode(), [rd, rs1, rs2, 1, 0]),
     );
 
+    let (a, _, _, _, _) = run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c);
     assert_eq!(
         a.map(F::from_canonical_u32),
         tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd)
     );
 }
 
+//////////////////////////////////////////////////////////////////////////////////////
+// POSITIVE TESTS
+//
+// Randomly generate computations and execute, ensuring that the generated trace
+// passes all constraints.
+//////////////////////////////////////////////////////////////////////////////////////
+
+#[test_case(MULH, 100)]
+#[test_case(MULHSU, 100)]
+#[test_case(MULHU, 100)]
 fn run_rv32_mulh_rand_test(opcode: MulHOpcode, num_ops: usize) {
-    // the max number of limbs we currently support MUL for is 32 (i.e. for U256s)
-    const MAX_NUM_LIMBS: u32 = 32;
     let mut rng = create_seeded_rng();
-
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let range_tuple_bus = RangeTupleCheckerBus::new(
-        RANGE_TUPLE_CHECKER_BUS,
-        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
-    );
-
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let range_tuple_checker = SharedRangeTupleCheckerChip::new(range_tuple_bus);
-
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32MulHChip::<F>::new(
-        Rv32MultAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
-            tester.memory_bridge(),
-        ),
-        MulHCoreChip::new(bitwise_chip.clone(), range_tuple_checker.clone()),
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, bitwise, range_tuple) = create_test_chip(&mut tester);
 
     for _ in 0..num_ops {
-        let b = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        let c = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        run_rv32_mulh_rand_write_execute(opcode, &mut tester, &mut chip, b, c, &mut rng);
+        set_and_execute(&mut tester, &mut harness, &mut rng, opcode, None, None);
     }
 
     let tester = tester
         .build()
-        .load(chip)
-        .load(bitwise_chip)
-        .load(range_tuple_checker)
+        .load(harness)
+        .load_periphery(bitwise)
+        .load_periphery(range_tuple)
         .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn rv32_mulh_rand_test() {
-    run_rv32_mulh_rand_test(MulHOpcode::MULH, 100);
-}
-
-#[test]
-fn rv32_mulhsu_rand_test() {
-    run_rv32_mulh_rand_test(MulHOpcode::MULHSU, 100);
-}
-
-#[test]
-fn rv32_mulhu_rand_test() {
-    run_rv32_mulh_rand_test(MulHOpcode::MULHU, 100);
-}
-
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adapter is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-type Rv32MulHTestChip<F> =
-    VmChipWrapper<F, TestAdapterChip<F>, MulHCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
-
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_mulh_negative_test(
+fn run_negative_mulh_test(
     opcode: MulHOpcode,
-    a: [u32; RV32_REGISTER_NUM_LIMBS],
+    prank_a: [u32; RV32_REGISTER_NUM_LIMBS],
     b: [u32; RV32_REGISTER_NUM_LIMBS],
     c: [u32; RV32_REGISTER_NUM_LIMBS],
-    a_mul: [u32; RV32_REGISTER_NUM_LIMBS],
-    b_ext: u32,
-    c_ext: u32,
+    prank_a_mul: [u32; RV32_REGISTER_NUM_LIMBS],
+    prank_b_ext: u32,
+    prank_c_ext: u32,
     interaction_error: bool,
 ) {
-    const MAX_NUM_LIMBS: u32 = 32;
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let range_tuple_bus = RangeTupleCheckerBus::new(
-        RANGE_TUPLE_CHECKER_BUS,
-        [1 << RV32_CELL_BITS, MAX_NUM_LIMBS * (1 << RV32_CELL_BITS)],
-    );
-
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let range_tuple_chip = SharedRangeTupleCheckerChip::new(range_tuple_bus);
-
+    let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32MulHTestChip::<F>::new(
-        TestAdapterChip::new(
-            vec![[b.map(F::from_canonical_u32), c.map(F::from_canonical_u32)].concat()],
-            vec![None],
-            ExecutionBridge::new(tester.execution_bus(), tester.program_bus()),
-        ),
-        MulHCoreChip::new(bitwise_chip.clone(), range_tuple_chip.clone()),
-        tester.offline_memory_mutex_arc(),
-    );
-
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(opcode.global_opcode(), [0, 0, 0, 1, 0]),
+    let (mut harness, bitwise, range_tuple) = create_test_chip(&mut tester);
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(b),
+        Some(c),
     );
 
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
-    let (_, _, carry, _, _) = run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c);
-
-    range_tuple_chip.clear();
-    for i in 0..RV32_REGISTER_NUM_LIMBS {
-        range_tuple_chip.add_count(&[a_mul[i], carry[i]]);
-        range_tuple_chip.add_count(&[a[i], carry[RV32_REGISTER_NUM_LIMBS + i]]);
-    }
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut values = trace.row_slice(0).to_vec();
         let cols: &mut MulHCoreCols<F, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> =
             values.split_at_mut(adapter_width).1.borrow_mut();
-        cols.a = a.map(F::from_canonical_u32);
-        cols.a_mul = a_mul.map(F::from_canonical_u32);
-        cols.b_ext = F::from_canonical_u32(b_ext);
-        cols.c_ext = F::from_canonical_u32(c_ext);
-        *trace = RowMajorMatrix::new(values, trace_width);
+        cols.a = prank_a.map(F::from_canonical_u32);
+        cols.a_mul = prank_a_mul.map(F::from_canonical_u32);
+        cols.b_ext = F::from_canonical_u32(prank_b_ext);
+        cols.c_ext = F::from_canonical_u32(prank_c_ext);
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
-        .load(bitwise_chip)
-        .load(range_tuple_chip)
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
+        .load_periphery(range_tuple)
         .finalize();
-    tester.simple_test_with_expected_error(if interaction_error {
-        VerificationError::ChallengePhaseError
-    } else {
-        VerificationError::OodEvaluationMismatch
-    });
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
 fn rv32_mulh_wrong_a_mul_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULH,
+    run_negative_mulh_test(
+        MULH,
         [130, 9, 135, 241],
         [197, 85, 150, 32],
         [51, 109, 78, 142],
@@ -227,8 +227,8 @@ fn rv32_mulh_wrong_a_mul_negative_test() {
 
 #[test]
 fn rv32_mulh_wrong_a_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULH,
+    run_negative_mulh_test(
+        MULH,
         [130, 9, 135, 242],
         [197, 85, 150, 32],
         [51, 109, 78, 142],
@@ -241,8 +241,8 @@ fn rv32_mulh_wrong_a_negative_test() {
 
 #[test]
 fn rv32_mulh_wrong_ext_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULH,
+    run_negative_mulh_test(
+        MULH,
         [1, 0, 0, 0],
         [0, 0, 0, 128],
         [2, 0, 0, 0],
@@ -255,8 +255,8 @@ fn rv32_mulh_wrong_ext_negative_test() {
 
 #[test]
 fn rv32_mulh_invalid_ext_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULH,
+    run_negative_mulh_test(
+        MULH,
         [3, 2, 2, 2],
         [0, 0, 0, 128],
         [2, 0, 0, 0],
@@ -269,8 +269,8 @@ fn rv32_mulh_invalid_ext_negative_test() {
 
 #[test]
 fn rv32_mulhsu_wrong_a_mul_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULHSU,
+    run_negative_mulh_test(
+        MULHSU,
         [174, 40, 246, 202],
         [197, 85, 150, 160],
         [51, 109, 78, 142],
@@ -283,8 +283,8 @@ fn rv32_mulhsu_wrong_a_mul_negative_test() {
 
 #[test]
 fn rv32_mulhsu_wrong_a_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULHSU,
+    run_negative_mulh_test(
+        MULHSU,
         [174, 40, 246, 201],
         [197, 85, 150, 160],
         [51, 109, 78, 142],
@@ -297,8 +297,8 @@ fn rv32_mulhsu_wrong_a_negative_test() {
 
 #[test]
 fn rv32_mulhsu_wrong_b_ext_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULHSU,
+    run_negative_mulh_test(
+        MULHSU,
         [1, 0, 0, 0],
         [0, 0, 0, 128],
         [2, 0, 0, 0],
@@ -311,8 +311,8 @@ fn rv32_mulhsu_wrong_b_ext_negative_test() {
 
 #[test]
 fn rv32_mulhsu_wrong_c_ext_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULHSU,
+    run_negative_mulh_test(
+        MULHSU,
         [0, 0, 0, 64],
         [0, 0, 0, 128],
         [0, 0, 0, 128],
@@ -325,8 +325,8 @@ fn rv32_mulhsu_wrong_c_ext_negative_test() {
 
 #[test]
 fn rv32_mulhu_wrong_a_mul_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULHU,
+    run_negative_mulh_test(
+        MULHU,
         [130, 9, 135, 241],
         [197, 85, 150, 32],
         [51, 109, 78, 142],
@@ -339,8 +339,8 @@ fn rv32_mulhu_wrong_a_mul_negative_test() {
 
 #[test]
 fn rv32_mulhu_wrong_a_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULHU,
+    run_negative_mulh_test(
+        MULHU,
         [130, 9, 135, 240],
         [197, 85, 150, 32],
         [51, 109, 78, 142],
@@ -353,8 +353,8 @@ fn rv32_mulhu_wrong_a_negative_test() {
 
 #[test]
 fn rv32_mulhu_wrong_ext_negative_test() {
-    run_rv32_mulh_negative_test(
-        MulHOpcode::MULHU,
+    run_negative_mulh_test(
+        MULHU,
         [255, 255, 255, 255],
         [0, 0, 0, 128],
         [2, 0, 0, 0],
@@ -380,7 +380,7 @@ fn run_mulh_sanity_test() {
     let c: [u32; RV32_REGISTER_NUM_LIMBS] = [303, 375, 449, 463];
     let c_mul: [u32; RV32_REGISTER_NUM_LIMBS] = [39, 100, 126, 205];
     let (res, res_mul, carry, x_ext, y_ext) =
-        run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(MulHOpcode::MULH, &x, &y);
+        run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(MULH, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], res[i]);
         assert_eq!(z_mul[i], res_mul[i]);
@@ -400,7 +400,7 @@ fn run_mulhu_sanity_test() {
     let c: [u32; RV32_REGISTER_NUM_LIMBS] = [107, 93, 18, 0];
     let c_mul: [u32; RV32_REGISTER_NUM_LIMBS] = [39, 100, 126, 205];
     let (res, res_mul, carry, x_ext, y_ext) =
-        run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(MulHOpcode::MULHU, &x, &y);
+        run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(MULHU, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], res[i]);
         assert_eq!(z_mul[i], res_mul[i]);
@@ -420,7 +420,7 @@ fn run_mulhsu_pos_sanity_test() {
     let c: [u32; RV32_REGISTER_NUM_LIMBS] = [107, 93, 18, 0];
     let c_mul: [u32; RV32_REGISTER_NUM_LIMBS] = [39, 100, 126, 205];
     let (res, res_mul, carry, x_ext, y_ext) =
-        run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(MulHOpcode::MULHSU, &x, &y);
+        run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(MULHSU, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], res[i]);
         assert_eq!(z_mul[i], res_mul[i]);
@@ -440,7 +440,7 @@ fn run_mulhsu_neg_sanity_test() {
     let c: [u32; RV32_REGISTER_NUM_LIMBS] = [212, 292, 326, 379];
     let c_mul: [u32; RV32_REGISTER_NUM_LIMBS] = [39, 100, 126, 231];
     let (res, res_mul, carry, x_ext, y_ext) =
-        run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(MulHOpcode::MULHSU, &x, &y);
+        run_mulh::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(MULHSU, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], res[i]);
         assert_eq!(z_mul[i], res_mul[i]);
diff --git a/extensions/rv32im/circuit/src/shift/core.rs b/extensions/rv32im/circuit/src/shift/core.rs
index cada97685e..c3d79a7207 100644
--- a/extensions/rv32im/circuit/src/shift/core.rs
+++ b/extensions/rv32im/circuit/src/shift/core.rs
@@ -3,17 +3,18 @@ use std::{
     borrow::{Borrow, BorrowMut},
 };
 
-use openvm_circuit::arch::{
-    AdapterAirContext, AdapterRuntimeContext, MinimalInstruction, Result, VmAdapterInterface,
-    VmCoreAir, VmCoreChip,
+use openvm_circuit::{
+    arch::*,
+    system::memory::{online::TracingMemory, MemoryAuxColsFactory},
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::{BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip},
     utils::not,
     var_range::{SharedVariableRangeCheckerChip, VariableRangeCheckerBus},
+    AlignedBytesBorrow,
 };
 use openvm_circuit_primitives_derive::AlignedBorrow;
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
+use openvm_instructions::{instruction::Instruction, program::DEFAULT_PC_STEP, LocalOpcode};
 use openvm_rv32im_transpiler::ShiftOpcode;
 use openvm_stark_backend::{
     interaction::InteractionBuilder,
@@ -21,8 +22,6 @@ use openvm_stark_backend::{
     p3_field::{Field, FieldAlgebra, PrimeField32},
     rap::BaseAirWithPublicValues,
 };
-use serde::{de::DeserializeOwned, Deserialize, Serialize};
-use serde_big_array::BigArray;
 use strum::IntoEnumIterator;
 
 #[repr(C)]
@@ -51,7 +50,10 @@ pub struct ShiftCoreCols<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bit_shift_carry: [T; NUM_LIMBS],
 }
 
-#[derive(Copy, Clone, Debug)]
+/// RV32 shift AIR.
+/// Note: when the shift amount from operand is greater than the number of bits, only shift
+/// `shift_amount % num_bits` bits. This matches the RV32 specs for SLL/SRL/SRA.
+#[derive(Copy, Clone, Debug, derive_new::new)]
 pub struct ShiftCoreAir<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bitwise_lookup_bus: BitwiseOperationLookupBus,
     pub range_bus: VariableRangeCheckerBus,
@@ -238,154 +240,194 @@ where
 }
 
 #[repr(C)]
-#[derive(Clone, Debug, Serialize, Deserialize)]
-#[serde(bound = "T: Serialize + DeserializeOwned")]
-pub struct ShiftCoreRecord<T, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    #[serde(with = "BigArray")]
-    pub a: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub b: [T; NUM_LIMBS],
-    #[serde(with = "BigArray")]
-    pub c: [T; NUM_LIMBS],
-    pub b_sign: T,
-    #[serde(with = "BigArray")]
-    pub bit_shift_carry: [u32; NUM_LIMBS],
-    pub bit_shift: usize,
-    pub limb_shift: usize,
-    pub opcode: ShiftOpcode,
+#[derive(AlignedBytesBorrow, Debug)]
+pub struct ShiftCoreRecord<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    pub b: [u8; NUM_LIMBS],
+    pub c: [u8; NUM_LIMBS],
+    pub local_opcode: u8,
+}
+
+#[derive(Clone, Copy)]
+pub struct ShiftExecutor<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
 }
 
-pub struct ShiftCoreChip<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
-    pub air: ShiftCoreAir<NUM_LIMBS, LIMB_BITS>,
+#[derive(Clone)]
+pub struct ShiftFiller<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> {
+    adapter: A,
+    pub offset: usize,
     pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
     pub range_checker_chip: SharedVariableRangeCheckerChip,
 }
 
-impl<const NUM_LIMBS: usize, const LIMB_BITS: usize> ShiftCoreChip<NUM_LIMBS, LIMB_BITS> {
+impl<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> ShiftExecutor<A, NUM_LIMBS, LIMB_BITS> {
+    pub fn new(adapter: A, offset: usize) -> Self {
+        assert_eq!(NUM_LIMBS % 2, 0, "Number of limbs must be divisible by 2");
+        Self { adapter, offset }
+    }
+}
+
+impl<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> ShiftFiller<A, NUM_LIMBS, LIMB_BITS> {
     pub fn new(
+        adapter: A,
         bitwise_lookup_chip: SharedBitwiseOperationLookupChip<LIMB_BITS>,
         range_checker_chip: SharedVariableRangeCheckerChip,
         offset: usize,
     ) -> Self {
         assert_eq!(NUM_LIMBS % 2, 0, "Number of limbs must be divisible by 2");
         Self {
-            air: ShiftCoreAir {
-                bitwise_lookup_bus: bitwise_lookup_chip.bus(),
-                range_bus: range_checker_chip.bus(),
-                offset,
-            },
+            adapter,
+            offset,
             bitwise_lookup_chip,
             range_checker_chip,
         }
     }
 }
 
-impl<F: PrimeField32, I: VmAdapterInterface<F>, const NUM_LIMBS: usize, const LIMB_BITS: usize>
-    VmCoreChip<F, I> for ShiftCoreChip<NUM_LIMBS, LIMB_BITS>
+impl<F, A, RA, const NUM_LIMBS: usize, const LIMB_BITS: usize> PreflightExecutor<F, RA>
+    for ShiftExecutor<A, NUM_LIMBS, LIMB_BITS>
 where
-    I::Reads: Into<[[F; NUM_LIMBS]; 2]>,
-    I::Writes: From<[[F; NUM_LIMBS]; 1]>,
+    F: PrimeField32,
+    A: 'static
+        + AdapterTraceExecutor<
+            F,
+            ReadData: Into<[[u8; NUM_LIMBS]; 2]>,
+            WriteData: From<[[u8; NUM_LIMBS]; 1]>,
+        >,
+    for<'buf> RA: RecordArena<
+        'buf,
+        EmptyAdapterCoreLayout<F, A>,
+        (
+            A::RecordMut<'buf>,
+            &'buf mut ShiftCoreRecord<NUM_LIMBS, LIMB_BITS>,
+        ),
+    >,
 {
-    type Record = ShiftCoreRecord<F, NUM_LIMBS, LIMB_BITS>;
-    type Air = ShiftCoreAir<NUM_LIMBS, LIMB_BITS>;
+    fn get_opcode_name(&self, opcode: usize) -> String {
+        format!("{:?}", ShiftOpcode::from_usize(opcode - self.offset))
+    }
 
-    #[allow(clippy::type_complexity)]
-    fn execute_instruction(
+    fn execute(
         &self,
+        state: VmStateMut<F, TracingMemory, RA>,
         instruction: &Instruction<F>,
-        _from_pc: u32,
-        reads: I::Reads,
-    ) -> Result<(AdapterRuntimeContext<F, I>, Self::Record)> {
+    ) -> Result<(), ExecutionError> {
         let Instruction { opcode, .. } = instruction;
-        let shift_opcode = ShiftOpcode::from_usize(opcode.local_opcode_idx(self.air.offset));
 
-        let data: [[F; NUM_LIMBS]; 2] = reads.into();
-        let b = data[0].map(|x| x.as_canonical_u32());
-        let c = data[1].map(|y| y.as_canonical_u32());
-        let (a, limb_shift, bit_shift) = run_shift::<NUM_LIMBS, LIMB_BITS>(shift_opcode, &b, &c);
+        let local_opcode = ShiftOpcode::from_usize(opcode.local_opcode_idx(self.offset));
 
-        let bit_shift_carry = array::from_fn(|i| match shift_opcode {
-            ShiftOpcode::SLL => b[i] >> (LIMB_BITS - bit_shift),
-            _ => b[i] % (1 << bit_shift),
-        });
+        let (mut adapter_record, core_record) = state.ctx.alloc(EmptyAdapterCoreLayout::new());
 
-        let mut b_sign = 0;
-        if shift_opcode == ShiftOpcode::SRA {
-            b_sign = b[NUM_LIMBS - 1] >> (LIMB_BITS - 1);
-            self.bitwise_lookup_chip
-                .request_xor(b[NUM_LIMBS - 1], 1 << (LIMB_BITS - 1));
-        }
+        A::start(*state.pc, state.memory, &mut adapter_record);
 
-        for i in 0..(NUM_LIMBS / 2) {
-            self.bitwise_lookup_chip
-                .request_range(a[i * 2], a[i * 2 + 1]);
-        }
+        let [rs1, rs2] = self
+            .adapter
+            .read(state.memory, instruction, &mut adapter_record)
+            .into();
 
-        let output = AdapterRuntimeContext::without_pc([a.map(F::from_canonical_u32)]);
-        let record = ShiftCoreRecord {
-            opcode: shift_opcode,
-            a: a.map(F::from_canonical_u32),
-            b: data[0],
-            c: data[1],
-            bit_shift_carry,
-            bit_shift,
-            limb_shift,
-            b_sign: F::from_canonical_u32(b_sign),
-        };
+        let (output, _, _) = run_shift::<NUM_LIMBS, LIMB_BITS>(local_opcode, &rs1, &rs2);
 
-        Ok((output, record))
-    }
+        core_record.b = rs1;
+        core_record.c = rs2;
+        core_record.local_opcode = local_opcode as u8;
 
-    fn get_opcode_name(&self, opcode: usize) -> String {
-        format!("{:?}", ShiftOpcode::from_usize(opcode - self.air.offset))
+        self.adapter.write(
+            state.memory,
+            instruction,
+            [output].into(),
+            &mut adapter_record,
+        );
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
     }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> TraceFiller<F>
+    for ShiftFiller<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+    A: 'static + AdapterTraceFiller<F>,
+{
+    fn fill_trace_row(&self, mem_helper: &MemoryAuxColsFactory<F>, row_slice: &mut [F]) {
+        let (adapter_row, mut core_row) = unsafe { row_slice.split_at_mut_unchecked(A::WIDTH) };
+        self.adapter.fill_trace_row(mem_helper, adapter_row);
+
+        let record: &ShiftCoreRecord<NUM_LIMBS, LIMB_BITS> =
+            unsafe { get_record_from_slice(&mut core_row, ()) };
 
-    fn generate_trace_row(&self, row_slice: &mut [F], record: Self::Record) {
-        for carry_val in record.bit_shift_carry {
-            self.range_checker_chip
-                .add_count(carry_val, record.bit_shift);
+        let core_row: &mut ShiftCoreCols<F, NUM_LIMBS, LIMB_BITS> = core_row.borrow_mut();
+
+        let opcode = ShiftOpcode::from_usize(record.local_opcode as usize);
+        let (a, limb_shift, bit_shift) =
+            run_shift::<NUM_LIMBS, LIMB_BITS>(opcode, &record.b, &record.c);
+
+        for pair in a.chunks_exact(2) {
+            self.bitwise_lookup_chip
+                .request_range(pair[0] as u32, pair[1] as u32);
         }
 
         let num_bits_log = (NUM_LIMBS * LIMB_BITS).ilog2();
         self.range_checker_chip.add_count(
-            (((record.c[0].as_canonical_u32() as usize)
-                - record.bit_shift
-                - record.limb_shift * LIMB_BITS)
-                >> num_bits_log) as u32,
+            ((record.c[0] as usize - bit_shift - limb_shift * LIMB_BITS) >> num_bits_log) as u32,
             LIMB_BITS - num_bits_log as usize,
         );
 
-        let row_slice: &mut ShiftCoreCols<_, NUM_LIMBS, LIMB_BITS> = row_slice.borrow_mut();
-        row_slice.a = record.a;
-        row_slice.b = record.b;
-        row_slice.c = record.c;
-        row_slice.bit_multiplier_left = match record.opcode {
-            ShiftOpcode::SLL => F::from_canonical_usize(1 << record.bit_shift),
-            _ => F::ZERO,
+        core_row.bit_shift_carry = if bit_shift == 0 {
+            for _ in 0..NUM_LIMBS {
+                self.range_checker_chip.add_count(0, 0);
+            }
+            [F::ZERO; NUM_LIMBS]
+        } else {
+            array::from_fn(|i| {
+                let carry = match opcode {
+                    ShiftOpcode::SLL => record.b[i] >> (LIMB_BITS - bit_shift),
+                    _ => record.b[i] % (1 << bit_shift),
+                };
+                self.range_checker_chip.add_count(carry as u32, bit_shift);
+                F::from_canonical_u8(carry)
+            })
         };
-        row_slice.bit_multiplier_right = match record.opcode {
+
+        core_row.limb_shift_marker = [F::ZERO; NUM_LIMBS];
+        core_row.limb_shift_marker[limb_shift] = F::ONE;
+        core_row.bit_shift_marker = [F::ZERO; LIMB_BITS];
+        core_row.bit_shift_marker[bit_shift] = F::ONE;
+
+        core_row.b_sign = F::ZERO;
+        if opcode == ShiftOpcode::SRA {
+            core_row.b_sign = F::from_canonical_u8(record.b[NUM_LIMBS - 1] >> (LIMB_BITS - 1));
+            self.bitwise_lookup_chip
+                .request_xor(record.b[NUM_LIMBS - 1] as u32, 1 << (LIMB_BITS - 1));
+        }
+
+        core_row.bit_multiplier_right = match opcode {
             ShiftOpcode::SLL => F::ZERO,
-            _ => F::from_canonical_usize(1 << record.bit_shift),
+            _ => F::from_canonical_usize(1 << bit_shift),
         };
-        row_slice.b_sign = record.b_sign;
-        row_slice.bit_shift_marker = array::from_fn(|i| F::from_bool(i == record.bit_shift));
-        row_slice.limb_shift_marker = array::from_fn(|i| F::from_bool(i == record.limb_shift));
-        row_slice.bit_shift_carry = record.bit_shift_carry.map(F::from_canonical_u32);
-        row_slice.opcode_sll_flag = F::from_bool(record.opcode == ShiftOpcode::SLL);
-        row_slice.opcode_srl_flag = F::from_bool(record.opcode == ShiftOpcode::SRL);
-        row_slice.opcode_sra_flag = F::from_bool(record.opcode == ShiftOpcode::SRA);
-    }
+        core_row.bit_multiplier_left = match opcode {
+            ShiftOpcode::SLL => F::from_canonical_usize(1 << bit_shift),
+            _ => F::ZERO,
+        };
+
+        core_row.opcode_sra_flag = F::from_bool(opcode == ShiftOpcode::SRA);
+        core_row.opcode_srl_flag = F::from_bool(opcode == ShiftOpcode::SRL);
+        core_row.opcode_sll_flag = F::from_bool(opcode == ShiftOpcode::SLL);
 
-    fn air(&self) -> &Self::Air {
-        &self.air
+        core_row.c = record.c.map(F::from_canonical_u8);
+        core_row.b = record.b.map(F::from_canonical_u8);
+        core_row.a = a.map(F::from_canonical_u8);
     }
 }
 
+// Returns (result, limb_shift, bit_shift)
+#[inline(always)]
 pub(super) fn run_shift<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     opcode: ShiftOpcode,
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> ([u32; NUM_LIMBS], usize, usize) {
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
+) -> ([u8; NUM_LIMBS], usize, usize) {
     match opcode {
         ShiftOpcode::SLL => run_shift_left::<NUM_LIMBS, LIMB_BITS>(x, y),
         ShiftOpcode::SRL => run_shift_right::<NUM_LIMBS, LIMB_BITS>(x, y, true),
@@ -393,53 +435,60 @@ pub(super) fn run_shift<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
     }
 }
 
+#[inline(always)]
 fn run_shift_left<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
-) -> ([u32; NUM_LIMBS], usize, usize) {
-    let mut result = [0u32; NUM_LIMBS];
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
+) -> ([u8; NUM_LIMBS], usize, usize) {
+    let mut result = [0u8; NUM_LIMBS];
 
     let (limb_shift, bit_shift) = get_shift::<NUM_LIMBS, LIMB_BITS>(y);
 
     for i in limb_shift..NUM_LIMBS {
         result[i] = if i > limb_shift {
-            ((x[i - limb_shift] << bit_shift) + (x[i - limb_shift - 1] >> (LIMB_BITS - bit_shift)))
-                % (1 << LIMB_BITS)
+            (((x[i - limb_shift] as u16) << bit_shift)
+                | ((x[i - limb_shift - 1] as u16) >> (LIMB_BITS - bit_shift)))
+                % (1u16 << LIMB_BITS)
         } else {
-            (x[i - limb_shift] << bit_shift) % (1 << LIMB_BITS)
-        };
+            ((x[i - limb_shift] as u16) << bit_shift) % (1u16 << LIMB_BITS)
+        } as u8;
     }
     (result, limb_shift, bit_shift)
 }
 
+#[inline(always)]
 fn run_shift_right<const NUM_LIMBS: usize, const LIMB_BITS: usize>(
-    x: &[u32; NUM_LIMBS],
-    y: &[u32; NUM_LIMBS],
+    x: &[u8; NUM_LIMBS],
+    y: &[u8; NUM_LIMBS],
     logical: bool,
-) -> ([u32; NUM_LIMBS], usize, usize) {
+) -> ([u8; NUM_LIMBS], usize, usize) {
     let fill = if logical {
         0
     } else {
-        ((1 << LIMB_BITS) - 1) * (x[NUM_LIMBS - 1] >> (LIMB_BITS - 1))
+        (((1u16 << LIMB_BITS) - 1) as u8) * (x[NUM_LIMBS - 1] >> (LIMB_BITS - 1))
     };
     let mut result = [fill; NUM_LIMBS];
 
     let (limb_shift, bit_shift) = get_shift::<NUM_LIMBS, LIMB_BITS>(y);
 
     for i in 0..(NUM_LIMBS - limb_shift) {
-        result[i] = if i + limb_shift + 1 < NUM_LIMBS {
-            ((x[i + limb_shift] >> bit_shift) + (x[i + limb_shift + 1] << (LIMB_BITS - bit_shift)))
-                % (1 << LIMB_BITS)
+        let res = if i + limb_shift + 1 < NUM_LIMBS {
+            (((x[i + limb_shift] >> bit_shift) as u16)
+                | ((x[i + limb_shift + 1] as u16) << (LIMB_BITS - bit_shift)))
+                % (1u16 << LIMB_BITS)
         } else {
-            ((x[i + limb_shift] >> bit_shift) + (fill << (LIMB_BITS - bit_shift)))
-                % (1 << LIMB_BITS)
-        }
+            (((x[i + limb_shift] >> bit_shift) as u16) | ((fill as u16) << (LIMB_BITS - bit_shift)))
+                % (1u16 << LIMB_BITS)
+        };
+        result[i] = res as u8;
     }
     (result, limb_shift, bit_shift)
 }
 
-fn get_shift<const NUM_LIMBS: usize, const LIMB_BITS: usize>(y: &[u32]) -> (usize, usize) {
-    // We assume `NUM_LIMBS * LIMB_BITS <= 2^LIMB_BITS` so so the shift is defined
+#[inline(always)]
+fn get_shift<const NUM_LIMBS: usize, const LIMB_BITS: usize>(y: &[u8]) -> (usize, usize) {
+    debug_assert!(NUM_LIMBS * LIMB_BITS <= (1 << LIMB_BITS));
+    // We assume `NUM_LIMBS * LIMB_BITS <= 2^LIMB_BITS` so the shift is defined
     // entirely in y[0].
     let shift = (y[0] as usize) % (NUM_LIMBS * LIMB_BITS);
     (shift / LIMB_BITS, shift % LIMB_BITS)
diff --git a/extensions/rv32im/circuit/src/shift/execution.rs b/extensions/rv32im/circuit/src/shift/execution.rs
new file mode 100644
index 0000000000..b756f8b768
--- /dev/null
+++ b/extensions/rv32im/circuit/src/shift/execution.rs
@@ -0,0 +1,211 @@
+use std::{
+    borrow::{Borrow, BorrowMut},
+    mem::size_of,
+};
+
+use openvm_circuit::{
+    arch::{
+        E2PreCompute, ExecuteFunc, ExecutionCtxTrait, Executor, MeteredExecutionCtxTrait,
+        MeteredExecutor, StaticProgramError, VmExecState,
+    },
+    system::memory::online::GuestMemory,
+};
+use openvm_circuit_primitives_derive::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_IMM_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_rv32im_transpiler::ShiftOpcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::ShiftExecutor;
+use crate::adapters::imm_to_bytes;
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct ShiftPreCompute {
+    c: u32,
+    a: u8,
+    b: u8,
+}
+
+impl<A, const NUM_LIMBS: usize, const LIMB_BITS: usize> ShiftExecutor<A, NUM_LIMBS, LIMB_BITS> {
+    #[inline(always)]
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut ShiftPreCompute,
+    ) -> Result<(bool, ShiftOpcode), StaticProgramError> {
+        let Instruction {
+            opcode, a, b, c, e, ..
+        } = inst;
+        let shift_opcode = ShiftOpcode::from_usize(opcode.local_opcode_idx(self.offset));
+        let e_u32 = e.as_canonical_u32();
+        if inst.d.as_canonical_u32() != RV32_REGISTER_AS
+            || !(e_u32 == RV32_IMM_AS || e_u32 == RV32_REGISTER_AS)
+        {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        let is_imm = e_u32 == RV32_IMM_AS;
+        let c_u32 = c.as_canonical_u32();
+        *data = ShiftPreCompute {
+            c: if is_imm {
+                u32::from_le_bytes(imm_to_bytes(c_u32))
+            } else {
+                c_u32
+            },
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+        };
+        // `d` is always expected to be RV32_REGISTER_AS.
+        Ok((is_imm, shift_opcode))
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> Executor<F>
+    for ShiftExecutor<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn pre_compute_size(&self) -> usize {
+        size_of::<ShiftPreCompute>()
+    }
+
+    #[inline(always)]
+    fn pre_compute<Ctx: ExecutionCtxTrait>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let data: &mut ShiftPreCompute = data.borrow_mut();
+        let (is_imm, shift_opcode) = self.pre_compute_impl(pc, inst, data)?;
+        // `d` is always expected to be RV32_REGISTER_AS.
+        let fn_ptr = match (is_imm, shift_opcode) {
+            (true, ShiftOpcode::SLL) => execute_e1_impl::<_, _, true, SllOp>,
+            (false, ShiftOpcode::SLL) => execute_e1_impl::<_, _, false, SllOp>,
+            (true, ShiftOpcode::SRL) => execute_e1_impl::<_, _, true, SrlOp>,
+            (false, ShiftOpcode::SRL) => execute_e1_impl::<_, _, false, SrlOp>,
+            (true, ShiftOpcode::SRA) => execute_e1_impl::<_, _, true, SraOp>,
+            (false, ShiftOpcode::SRA) => execute_e1_impl::<_, _, false, SraOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+impl<F, A, const NUM_LIMBS: usize, const LIMB_BITS: usize> MeteredExecutor<F>
+    for ShiftExecutor<A, NUM_LIMBS, LIMB_BITS>
+where
+    F: PrimeField32,
+{
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<ShiftPreCompute>>()
+    }
+
+    #[inline(always)]
+    fn metered_pre_compute<Ctx: MeteredExecutionCtxTrait>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError> {
+        let data: &mut E2PreCompute<ShiftPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        let (is_imm, shift_opcode) = self.pre_compute_impl(pc, inst, &mut data.data)?;
+        // `d` is always expected to be RV32_REGISTER_AS.
+        let fn_ptr = match (is_imm, shift_opcode) {
+            (true, ShiftOpcode::SLL) => execute_e2_impl::<_, _, true, SllOp>,
+            (false, ShiftOpcode::SLL) => execute_e2_impl::<_, _, false, SllOp>,
+            (true, ShiftOpcode::SRL) => execute_e2_impl::<_, _, true, SrlOp>,
+            (false, ShiftOpcode::SRL) => execute_e2_impl::<_, _, false, SrlOp>,
+            (true, ShiftOpcode::SRA) => execute_e2_impl::<_, _, true, SraOp>,
+            (false, ShiftOpcode::SRA) => execute_e2_impl::<_, _, false, SraOp>,
+        };
+        Ok(fn_ptr)
+    }
+}
+
+unsafe fn execute_e12_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const IS_IMM: bool,
+    OP: ShiftOp,
+>(
+    pre_compute: &ShiftPreCompute,
+    state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let rs1 = state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.b as u32);
+    let rs2 = if IS_IMM {
+        pre_compute.c.to_le_bytes()
+    } else {
+        state.vm_read::<u8, 4>(RV32_REGISTER_AS, pre_compute.c)
+    };
+    let rs2 = u32::from_le_bytes(rs2);
+
+    // Execute the shift operation
+    let rd = <OP as ShiftOp>::compute(rs1, rs2);
+    // Write the result back to memory
+    state.vm_write(RV32_REGISTER_AS, pre_compute.a as u32, &rd);
+
+    state.instret += 1;
+    state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+}
+
+unsafe fn execute_e1_impl<
+    F: PrimeField32,
+    CTX: ExecutionCtxTrait,
+    const IS_IMM: bool,
+    OP: ShiftOp,
+>(
+    pre_compute: &[u8],
+    state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &ShiftPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, IS_IMM, OP>(pre_compute, state);
+}
+
+unsafe fn execute_e2_impl<
+    F: PrimeField32,
+    CTX: MeteredExecutionCtxTrait,
+    const IS_IMM: bool,
+    OP: ShiftOp,
+>(
+    pre_compute: &[u8],
+    state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<ShiftPreCompute> = pre_compute.borrow();
+    state.ctx.on_height_change(pre_compute.chip_idx as usize, 1);
+    execute_e12_impl::<F, CTX, IS_IMM, OP>(&pre_compute.data, state);
+}
+
+trait ShiftOp {
+    fn compute(rs1: [u8; 4], rs2: u32) -> [u8; 4];
+}
+struct SllOp;
+struct SrlOp;
+struct SraOp;
+impl ShiftOp for SllOp {
+    fn compute(rs1: [u8; 4], rs2: u32) -> [u8; 4] {
+        let rs1 = u32::from_le_bytes(rs1);
+        // `rs2`'s  other bits are ignored.
+        (rs1 << (rs2 & 0x1F)).to_le_bytes()
+    }
+}
+impl ShiftOp for SrlOp {
+    fn compute(rs1: [u8; 4], rs2: u32) -> [u8; 4] {
+        let rs1 = u32::from_le_bytes(rs1);
+        // `rs2`'s  other bits are ignored.
+        (rs1 >> (rs2 & 0x1F)).to_le_bytes()
+    }
+}
+impl ShiftOp for SraOp {
+    fn compute(rs1: [u8; 4], rs2: u32) -> [u8; 4] {
+        let rs1 = i32::from_le_bytes(rs1);
+        // `rs2`'s  other bits are ignored.
+        (rs1 >> (rs2 & 0x1F)).to_le_bytes()
+    }
+}
diff --git a/extensions/rv32im/circuit/src/shift/mod.rs b/extensions/rv32im/circuit/src/shift/mod.rs
index 58d5ad022b..fd063b3907 100644
--- a/extensions/rv32im/circuit/src/shift/mod.rs
+++ b/extensions/rv32im/circuit/src/shift/mod.rs
@@ -1,15 +1,25 @@
-use openvm_circuit::arch::VmChipWrapper;
+use openvm_circuit::arch::{VmAirWrapper, VmChipWrapper};
 
-use super::adapters::{Rv32BaseAluAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
+use super::adapters::{
+    Rv32BaseAluAdapterAir, Rv32BaseAluAdapterExecutor, Rv32BaseAluAdapterFiller, RV32_CELL_BITS,
+    RV32_REGISTER_NUM_LIMBS,
+};
 
 mod core;
+mod execution;
 pub use core::*;
 
 #[cfg(test)]
 mod tests;
 
+pub type Rv32ShiftAir =
+    VmAirWrapper<Rv32BaseAluAdapterAir, ShiftCoreAir<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
+pub type Rv32ShiftExecutor = ShiftExecutor<
+    Rv32BaseAluAdapterExecutor<RV32_CELL_BITS>,
+    RV32_REGISTER_NUM_LIMBS,
+    RV32_CELL_BITS,
+>;
 pub type Rv32ShiftChip<F> = VmChipWrapper<
     F,
-    Rv32BaseAluAdapterChip<F>,
-    ShiftCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
+    ShiftFiller<Rv32BaseAluAdapterFiller<RV32_CELL_BITS>, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
 >;
diff --git a/extensions/rv32im/circuit/src/shift/tests.rs b/extensions/rv32im/circuit/src/shift/tests.rs
index 7a3ef6e72c..e1051a164b 100644
--- a/extensions/rv32im/circuit/src/shift/tests.rs
+++ b/extensions/rv32im/circuit/src/shift/tests.rs
@@ -1,17 +1,12 @@
-use std::{array, borrow::BorrowMut};
+use std::{array, borrow::BorrowMut, sync::Arc};
 
-use openvm_circuit::{
-    arch::{
-        testing::{TestAdapterChip, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-        ExecutionBridge, VmAdapterChip, VmChipWrapper,
-    },
-    utils::generate_long_number,
-};
+use openvm_circuit::arch::testing::{TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS};
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
-use openvm_instructions::{instruction::Instruction, LocalOpcode};
-use openvm_rv32im_transpiler::ShiftOpcode;
+use openvm_instructions::LocalOpcode;
+use openvm_rv32im_transpiler::ShiftOpcode::{self, *};
 use openvm_stark_backend::{
     p3_air::BaseAir,
     p3_field::FieldAlgebra,
@@ -20,108 +15,147 @@ use openvm_stark_backend::{
         Matrix,
     },
     utils::disable_debug_builder,
-    verifier::VerificationError,
-    ChipUsageGetter,
 };
 use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
-use rand::Rng;
+use rand::{rngs::StdRng, Rng};
+use test_case::test_case;
 
-use super::{core::run_shift, Rv32ShiftChip, ShiftCoreChip};
+use super::{core::run_shift, Rv32ShiftChip, ShiftCoreAir, ShiftCoreCols};
 use crate::{
-    adapters::{Rv32BaseAluAdapterChip, RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS},
-    shift::ShiftCoreCols,
-    test_utils::{generate_rv32_is_type_immediate, rv32_rand_write_register_or_imm},
+    adapters::{
+        Rv32BaseAluAdapterAir, Rv32BaseAluAdapterExecutor, Rv32BaseAluAdapterFiller,
+        RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS,
+    },
+    test_utils::{
+        generate_rv32_is_type_immediate, get_verification_error, rv32_rand_write_register_or_imm,
+    },
+    Rv32ShiftAir, Rv32ShiftExecutor, ShiftFiller,
 };
 
 type F = BabyBear;
-
-//////////////////////////////////////////////////////////////////////////////////////
-// POSITIVE TESTS
-//
-// Randomly generate computations and execute, ensuring that the generated trace
-// passes all constraints.
-//////////////////////////////////////////////////////////////////////////////////////
-
-fn run_rv32_shift_rand_test(opcode: ShiftOpcode, num_ops: usize) {
-    let mut rng = create_seeded_rng();
+const MAX_INS_CAPACITY: usize = 128;
+type Harness = TestChipHarness<F, Rv32ShiftExecutor, Rv32ShiftAir, Rv32ShiftChip<F>>;
+
+fn create_test_chip(
+    tester: &VmChipTestBuilder<F>,
+) -> (
+    Harness,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let range_checker = tester.range_checker().clone();
     let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
 
-    let mut tester = VmChipTestBuilder::default();
-    let mut chip = Rv32ShiftChip::<F>::new(
-        Rv32BaseAluAdapterChip::new(
-            tester.execution_bus(),
-            tester.program_bus(),
+    let air = Rv32ShiftAir::new(
+        Rv32BaseAluAdapterAir::new(
+            tester.execution_bridge(),
             tester.memory_bridge(),
-            bitwise_chip.clone(),
+            bitwise_bus,
         ),
-        ShiftCoreChip::new(
+        ShiftCoreAir::new(bitwise_bus, range_checker.bus(), ShiftOpcode::CLASS_OFFSET),
+    );
+    let executor = Rv32ShiftExecutor::new(Rv32BaseAluAdapterExecutor, ShiftOpcode::CLASS_OFFSET);
+    let chip = Rv32ShiftChip::<F>::new(
+        ShiftFiller::new(
+            Rv32BaseAluAdapterFiller::new(bitwise_chip.clone()),
             bitwise_chip.clone(),
-            tester.memory_controller().borrow().range_checker.clone(),
+            range_checker.clone(),
             ShiftOpcode::CLASS_OFFSET,
         ),
-        tester.offline_memory_mutex_arc(),
+        tester.memory_helper(),
     );
+    let harness = Harness::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
 
-    for _ in 0..num_ops {
-        let b = generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng);
-        let (c_imm, c) = if rng.gen_bool(0.5) {
-            (
-                None,
-                generate_long_number::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(&mut rng),
-            )
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
+#[allow(clippy::too_many_arguments)]
+fn set_and_execute(
+    tester: &mut VmChipTestBuilder<F>,
+    harness: &mut Harness,
+    rng: &mut StdRng,
+    opcode: ShiftOpcode,
+    b: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+    is_imm: Option<bool>,
+    c: Option<[u8; RV32_REGISTER_NUM_LIMBS]>,
+) {
+    let b = b.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX)));
+    let (c_imm, c) = if is_imm.unwrap_or(rng.gen_bool(0.5)) {
+        let (imm, c) = if let Some(c) = c {
+            ((u32::from_le_bytes(c) & 0xFFFFFF) as usize, c)
         } else {
-            let (imm, c) = generate_rv32_is_type_immediate(&mut rng);
-            (Some(imm), c)
+            generate_rv32_is_type_immediate(rng)
         };
+        (Some(imm), c)
+    } else {
+        (
+            None,
+            c.unwrap_or(array::from_fn(|_| rng.gen_range(0..=u8::MAX))),
+        )
+    };
+    let (instruction, rd) = rv32_rand_write_register_or_imm(
+        tester,
+        b,
+        c,
+        c_imm,
+        opcode.global_opcode().as_usize(),
+        rng,
+    );
+    tester.execute(harness, &instruction);
+
+    let (a, _, _) = run_shift::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c);
+    assert_eq!(
+        a.map(F::from_canonical_u8),
+        tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd)
+    )
+}
+
+//////////////////////////////////////////////////////////////////////////////////////
+// POSITIVE TESTS
+//
+// Randomly generate computations and execute, ensuring that the generated trace
+// passes all constraints.
+//////////////////////////////////////////////////////////////////////////////////////
+#[test_case(SLL, 100)]
+#[test_case(SRL, 100)]
+#[test_case(SRA, 100)]
+fn run_rv32_shift_rand_test(opcode: ShiftOpcode, num_ops: usize) {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut harness, bitwise_chip) = create_test_chip(&tester);
 
-        let (instruction, rd) = rv32_rand_write_register_or_imm(
+    for _ in 0..num_ops {
+        set_and_execute(
             &mut tester,
-            b,
-            c,
-            c_imm,
-            opcode.global_opcode().as_usize(),
+            &mut harness,
             &mut rng,
+            opcode,
+            None,
+            None,
+            None,
         );
-        tester.execute(&mut chip, &instruction);
-
-        let (a, _, _) = run_shift::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(opcode, &b, &c);
-        assert_eq!(
-            a.map(F::from_canonical_u32),
-            tester.read::<RV32_REGISTER_NUM_LIMBS>(1, rd)
-        )
     }
 
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise_chip)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
-#[test]
-fn rv32_shift_sll_rand_test() {
-    run_rv32_shift_rand_test(ShiftOpcode::SLL, 100);
-}
-
-#[test]
-fn rv32_shift_srl_rand_test() {
-    run_rv32_shift_rand_test(ShiftOpcode::SRL, 100);
-}
-
-#[test]
-fn rv32_shift_sra_rand_test() {
-    run_rv32_shift_rand_test(ShiftOpcode::SRA, 100);
-}
-
 //////////////////////////////////////////////////////////////////////////////////////
 // NEGATIVE TESTS
 //
 // Given a fake trace of a single operation, setup a chip and run the test. We replace
-// the write part of the trace and check that the core chip throws the expected error.
-// A dummy adapter is used so memory interactions don't indirectly cause false passes.
+// part of the trace and check that the chip throws the expected error.
 //////////////////////////////////////////////////////////////////////////////////////
 
-type Rv32ShiftTestChip<F> =
-    VmChipWrapper<F, TestAdapterChip<F>, ShiftCoreChip<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>>;
-
 #[derive(Clone, Copy, Default, PartialEq)]
 struct ShiftPrankValues<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
     pub bit_shift: Option<u32>,
@@ -134,63 +168,35 @@ struct ShiftPrankValues<const NUM_LIMBS: usize, const LIMB_BITS: usize> {
 }
 
 #[allow(clippy::too_many_arguments)]
-fn run_rv32_shift_negative_test(
+fn run_negative_shift_test(
     opcode: ShiftOpcode,
-    a: [u32; RV32_REGISTER_NUM_LIMBS],
-    b: [u32; RV32_REGISTER_NUM_LIMBS],
-    c: [u32; RV32_REGISTER_NUM_LIMBS],
+    prank_a: [u32; RV32_REGISTER_NUM_LIMBS],
+    b: [u8; RV32_REGISTER_NUM_LIMBS],
+    c: [u8; RV32_REGISTER_NUM_LIMBS],
     prank_vals: ShiftPrankValues<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>,
     interaction_error: bool,
 ) {
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
+    let mut rng = create_seeded_rng();
     let mut tester: VmChipTestBuilder<BabyBear> = VmChipTestBuilder::default();
-    let range_checker_chip = tester.memory_controller().borrow().range_checker.clone();
-    let mut chip = Rv32ShiftTestChip::<F>::new(
-        TestAdapterChip::new(
-            vec![[b.map(F::from_canonical_u32), c.map(F::from_canonical_u32)].concat()],
-            vec![None],
-            ExecutionBridge::new(tester.execution_bus(), tester.program_bus()),
-        ),
-        ShiftCoreChip::new(
-            bitwise_chip.clone(),
-            range_checker_chip.clone(),
-            ShiftOpcode::CLASS_OFFSET,
-        ),
-        tester.offline_memory_mutex_arc(),
-    );
-
-    tester.execute(
-        &mut chip,
-        &Instruction::from_usize(opcode.global_opcode(), [0, 0, 0, 1, 1]),
+    let (mut harness, bitwise) = create_test_chip(&tester);
+
+    set_and_execute(
+        &mut tester,
+        &mut harness,
+        &mut rng,
+        opcode,
+        Some(b),
+        Some(false),
+        Some(c),
     );
 
-    let bit_shift = prank_vals
-        .bit_shift
-        .unwrap_or(c[0] % (RV32_CELL_BITS as u32));
-    let bit_shift_carry = prank_vals
-        .bit_shift_carry
-        .unwrap_or(array::from_fn(|i| match opcode {
-            ShiftOpcode::SLL => b[i] >> ((RV32_CELL_BITS as u32) - bit_shift),
-            _ => b[i] % (1 << bit_shift),
-        }));
-
-    range_checker_chip.clear();
-    range_checker_chip.add_count(bit_shift, RV32_CELL_BITS.ilog2() as usize);
-    for (a_val, carry_val) in a.iter().zip(bit_shift_carry.iter()) {
-        range_checker_chip.add_count(*a_val, RV32_CELL_BITS);
-        range_checker_chip.add_count(*carry_val, bit_shift as usize);
-    }
-
-    let trace_width = chip.trace_width();
-    let adapter_width = BaseAir::<F>::width(chip.adapter.air());
-
+    let adapter_width = BaseAir::<F>::width(&harness.air.adapter);
     let modify_trace = |trace: &mut DenseMatrix<BabyBear>| {
         let mut values = trace.row_slice(0).to_vec();
         let cols: &mut ShiftCoreCols<F, RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS> =
             values.split_at_mut(adapter_width).1.borrow_mut();
 
-        cols.a = a.map(F::from_canonical_u32);
+        cols.a = prank_a.map(F::from_canonical_u32);
         if let Some(bit_multiplier_left) = prank_vals.bit_multiplier_left {
             cols.bit_multiplier_left = F::from_canonical_u32(bit_multiplier_left);
         }
@@ -210,21 +216,16 @@ fn run_rv32_shift_negative_test(
             cols.bit_shift_carry = bit_shift_carry.map(F::from_canonical_u32);
         }
 
-        *trace = RowMajorMatrix::new(values, trace_width);
+        *trace = RowMajorMatrix::new(values, trace.width());
     };
 
-    drop(range_checker_chip);
     disable_debug_builder();
     let tester = tester
         .build()
-        .load_and_prank_trace(chip, modify_trace)
-        .load(bitwise_chip)
+        .load_and_prank_trace(harness, modify_trace)
+        .load_periphery(bitwise)
         .finalize();
-    tester.simple_test_with_expected_error(if interaction_error {
-        VerificationError::ChallengePhaseError
-    } else {
-        VerificationError::OodEvaluationMismatch
-    });
+    tester.simple_test_with_expected_error(get_verification_error(interaction_error));
 }
 
 #[test]
@@ -233,9 +234,9 @@ fn rv32_shift_wrong_negative_test() {
     let b = [1, 0, 0, 0];
     let c = [1, 0, 0, 0];
     let prank_vals = Default::default();
-    run_rv32_shift_negative_test(ShiftOpcode::SLL, a, b, c, prank_vals, false);
-    run_rv32_shift_negative_test(ShiftOpcode::SRL, a, b, c, prank_vals, false);
-    run_rv32_shift_negative_test(ShiftOpcode::SRA, a, b, c, prank_vals, false);
+    run_negative_shift_test(SLL, a, b, c, prank_vals, false);
+    run_negative_shift_test(SRL, a, b, c, prank_vals, false);
+    run_negative_shift_test(SRA, a, b, c, prank_vals, false);
 }
 
 #[test]
@@ -249,7 +250,7 @@ fn rv32_sll_wrong_bit_shift_negative_test() {
         bit_shift_marker: Some([0, 0, 1, 0, 0, 0, 0, 0]),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SLL, a, b, c, prank_vals, true);
+    run_negative_shift_test(SLL, a, b, c, prank_vals, true);
 }
 
 #[test]
@@ -261,7 +262,7 @@ fn rv32_sll_wrong_limb_shift_negative_test() {
         limb_shift_marker: Some([0, 0, 1, 0]),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SLL, a, b, c, prank_vals, true);
+    run_negative_shift_test(SLL, a, b, c, prank_vals, true);
 }
 
 #[test]
@@ -273,7 +274,7 @@ fn rv32_sll_wrong_bit_carry_negative_test() {
         bit_shift_carry: Some([0, 0, 0, 0]),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SLL, a, b, c, prank_vals, true);
+    run_negative_shift_test(SLL, a, b, c, prank_vals, true);
 }
 
 #[test]
@@ -286,7 +287,7 @@ fn rv32_sll_wrong_bit_mult_side_negative_test() {
         bit_multiplier_right: Some(1),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SLL, a, b, c, prank_vals, false);
+    run_negative_shift_test(SLL, a, b, c, prank_vals, false);
 }
 
 #[test]
@@ -300,7 +301,7 @@ fn rv32_srl_wrong_bit_shift_negative_test() {
         bit_shift_marker: Some([0, 0, 1, 0, 0, 0, 0, 0]),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SRL, a, b, c, prank_vals, false);
+    run_negative_shift_test(SRL, a, b, c, prank_vals, false);
 }
 
 #[test]
@@ -312,7 +313,7 @@ fn rv32_srl_wrong_limb_shift_negative_test() {
         limb_shift_marker: Some([0, 1, 0, 0]),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SRL, a, b, c, prank_vals, false);
+    run_negative_shift_test(SRL, a, b, c, prank_vals, false);
 }
 
 #[test]
@@ -325,8 +326,8 @@ fn rv32_srx_wrong_bit_mult_side_negative_test() {
         bit_multiplier_right: Some(0),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SRL, a, b, c, prank_vals, false);
-    run_rv32_shift_negative_test(ShiftOpcode::SRA, a, b, c, prank_vals, false);
+    run_negative_shift_test(SRL, a, b, c, prank_vals, false);
+    run_negative_shift_test(SRA, a, b, c, prank_vals, false);
 }
 
 #[test]
@@ -340,7 +341,7 @@ fn rv32_sra_wrong_bit_shift_negative_test() {
         bit_shift_marker: Some([0, 0, 1, 0, 0, 0, 0, 0]),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SRA, a, b, c, prank_vals, false);
+    run_negative_shift_test(SRA, a, b, c, prank_vals, false);
 }
 
 #[test]
@@ -352,7 +353,7 @@ fn rv32_sra_wrong_limb_shift_negative_test() {
         limb_shift_marker: Some([0, 1, 0, 0]),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SRA, a, b, c, prank_vals, false);
+    run_negative_shift_test(SRA, a, b, c, prank_vals, false);
 }
 
 #[test]
@@ -364,7 +365,7 @@ fn rv32_sra_wrong_sign_negative_test() {
         b_sign: Some(0),
         ..Default::default()
     };
-    run_rv32_shift_negative_test(ShiftOpcode::SRA, a, b, c, prank_vals, true);
+    run_negative_shift_test(SRA, a, b, c, prank_vals, true);
 }
 
 ///////////////////////////////////////////////////////////////////////////////////////
@@ -375,11 +376,11 @@ fn rv32_sra_wrong_sign_negative_test() {
 
 #[test]
 fn run_sll_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [45, 7, 61, 186];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [91, 0, 100, 0];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [0, 0, 0, 104];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [45, 7, 61, 186];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [91, 0, 100, 0];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [0, 0, 0, 104];
     let (result, limb_shift, bit_shift) =
-        run_shift::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(ShiftOpcode::SLL, &x, &y);
+        run_shift::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(SLL, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], result[i])
     }
@@ -390,11 +391,11 @@ fn run_sll_sanity_test() {
 
 #[test]
 fn run_srl_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [31, 190, 221, 200];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [49, 190, 190, 190];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [110, 100, 0, 0];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [31, 190, 221, 200];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [49, 190, 190, 190];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [110, 100, 0, 0];
     let (result, limb_shift, bit_shift) =
-        run_shift::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(ShiftOpcode::SRL, &x, &y);
+        run_shift::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(SRL, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], result[i])
     }
@@ -405,11 +406,11 @@ fn run_srl_sanity_test() {
 
 #[test]
 fn run_sra_sanity_test() {
-    let x: [u32; RV32_REGISTER_NUM_LIMBS] = [31, 190, 221, 200];
-    let y: [u32; RV32_REGISTER_NUM_LIMBS] = [113, 20, 50, 80];
-    let z: [u32; RV32_REGISTER_NUM_LIMBS] = [110, 228, 255, 255];
+    let x: [u8; RV32_REGISTER_NUM_LIMBS] = [31, 190, 221, 200];
+    let y: [u8; RV32_REGISTER_NUM_LIMBS] = [113, 20, 50, 80];
+    let z: [u8; RV32_REGISTER_NUM_LIMBS] = [110, 228, 255, 255];
     let (result, limb_shift, bit_shift) =
-        run_shift::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(ShiftOpcode::SRA, &x, &y);
+        run_shift::<RV32_REGISTER_NUM_LIMBS, RV32_CELL_BITS>(SRA, &x, &y);
     for i in 0..RV32_REGISTER_NUM_LIMBS {
         assert_eq!(z[i], result[i])
     }
diff --git a/extensions/rv32im/circuit/src/test_utils.rs b/extensions/rv32im/circuit/src/test_utils.rs
index 8a105ff990..f018b0d845 100644
--- a/extensions/rv32im/circuit/src/test_utils.rs
+++ b/extensions/rv32im/circuit/src/test_utils.rs
@@ -1,6 +1,6 @@
 use openvm_circuit::arch::testing::{memory::gen_pointer, VmChipTestBuilder};
 use openvm_instructions::{instruction::Instruction, VmOpcode};
-use openvm_stark_backend::p3_field::FieldAlgebra;
+use openvm_stark_backend::{p3_field::FieldAlgebra, verifier::VerificationError};
 use openvm_stark_sdk::p3_baby_bear::BabyBear;
 use rand::{rngs::StdRng, Rng};
 
@@ -10,8 +10,8 @@ use super::adapters::{RV32_REGISTER_NUM_LIMBS, RV_IS_TYPE_IMM_BITS};
 #[cfg_attr(all(feature = "test-utils", not(test)), allow(dead_code))]
 pub fn rv32_rand_write_register_or_imm<const NUM_LIMBS: usize>(
     tester: &mut VmChipTestBuilder<BabyBear>,
-    rs1_writes: [u32; NUM_LIMBS],
-    rs2_writes: [u32; NUM_LIMBS],
+    rs1_writes: [u8; NUM_LIMBS],
+    rs2_writes: [u8; NUM_LIMBS],
     imm: Option<usize>,
     opcode_with_offset: usize,
     rng: &mut StdRng,
@@ -22,9 +22,9 @@ pub fn rv32_rand_write_register_or_imm<const NUM_LIMBS: usize>(
     let rs2 = imm.unwrap_or_else(|| gen_pointer(rng, NUM_LIMBS));
     let rd = gen_pointer(rng, NUM_LIMBS);
 
-    tester.write::<NUM_LIMBS>(1, rs1, rs1_writes.map(BabyBear::from_canonical_u32));
+    tester.write::<NUM_LIMBS>(1, rs1, rs1_writes.map(BabyBear::from_canonical_u8));
     if !rs2_is_imm {
-        tester.write::<NUM_LIMBS>(1, rs2, rs2_writes.map(BabyBear::from_canonical_u32));
+        tester.write::<NUM_LIMBS>(1, rs2, rs2_writes.map(BabyBear::from_canonical_u8));
     }
 
     (
@@ -37,9 +37,7 @@ pub fn rv32_rand_write_register_or_imm<const NUM_LIMBS: usize>(
 }
 
 #[cfg_attr(all(feature = "test-utils", not(test)), allow(dead_code))]
-pub fn generate_rv32_is_type_immediate(
-    rng: &mut StdRng,
-) -> (usize, [u32; RV32_REGISTER_NUM_LIMBS]) {
+pub fn generate_rv32_is_type_immediate(rng: &mut StdRng) -> (usize, [u8; RV32_REGISTER_NUM_LIMBS]) {
     let mut imm: u32 = rng.gen_range(0..(1 << RV_IS_TYPE_IMM_BITS));
     if (imm & 0x800) != 0 {
         imm |= !0xFFF
@@ -51,7 +49,17 @@ pub fn generate_rv32_is_type_immediate(
             (imm >> 8) as u8,
             (imm >> 16) as u8,
             (imm >> 16) as u8,
-        ]
-        .map(|x| x as u32),
+        ],
     )
 }
+
+/// Returns the corresponding verification error based on whether
+/// an interaction error or a constraint error is expected
+#[cfg_attr(all(feature = "test-utils", not(test)), allow(dead_code))]
+pub fn get_verification_error(is_interaction_error: bool) -> VerificationError {
+    if is_interaction_error {
+        VerificationError::ChallengePhaseError
+    } else {
+        VerificationError::OodEvaluationMismatch
+    }
+}
diff --git a/extensions/rv32im/tests/Cargo.toml b/extensions/rv32im/tests/Cargo.toml
index 2e68359532..45eb4c1654 100644
--- a/extensions/rv32im/tests/Cargo.toml
+++ b/extensions/rv32im/tests/Cargo.toml
@@ -20,6 +20,7 @@ openvm-toolchain-tests = { path = "../../../crates/toolchain/tests" }
 eyre.workspace = true
 test-case.workspace = true
 serde = { workspace = true, features = ["alloc"] }
+strum.workspace = true
 
 [features]
 default = ["parallel"]
diff --git a/extensions/rv32im/tests/src/lib.rs b/extensions/rv32im/tests/src/lib.rs
index a4de516462..41ae207474 100644
--- a/extensions/rv32im/tests/src/lib.rs
+++ b/extensions/rv32im/tests/src/lib.rs
@@ -5,14 +5,17 @@ mod tests {
     use eyre::Result;
     use openvm_circuit::{
         arch::{hasher::poseidon2::vm_poseidon2_hasher, ExecutionError, Streams, VmExecutor},
-        system::memory::tree::public_values::UserPublicValuesProof,
-        utils::{air_test, air_test_with_min_segments},
+        system::memory::merkle::public_values::UserPublicValuesProof,
+        utils::{air_test, air_test_with_min_segments, test_system_config},
     };
-    use openvm_instructions::exe::VmExe;
-    use openvm_rv32im_circuit::{Rv32IConfig, Rv32ImConfig};
+    use openvm_instructions::{exe::VmExe, instruction::Instruction, LocalOpcode, SystemOpcode};
+    #[cfg(test)]
+    use openvm_rv32im_circuit::Rv32ImCpuBuilder;
+    use openvm_rv32im_circuit::{Rv32IConfig, Rv32ICpuBuilder, Rv32ImConfig};
     use openvm_rv32im_guest::hint_load_by_key_encode;
     use openvm_rv32im_transpiler::{
-        Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
+        DivRemOpcode, MulHOpcode, MulOpcode, Rv32ITranspilerExtension, Rv32IoTranspilerExtension,
+        Rv32MTranspilerExtension,
     };
     use openvm_stark_sdk::{openvm_stark_backend::p3_field::FieldAlgebra, p3_baby_bear::BabyBear};
     use openvm_toolchain_tests::{
@@ -20,28 +23,42 @@ mod tests {
         get_programs_dir,
     };
     use openvm_transpiler::{transpiler::Transpiler, FromElf};
+    use strum::IntoEnumIterator;
     use test_case::test_case;
 
     type F = BabyBear;
 
+    #[cfg(test)]
+    fn test_rv32im_config() -> Rv32ImConfig {
+        Rv32ImConfig {
+            rv32i: Rv32IConfig {
+                system: test_system_config(),
+                ..Default::default()
+            },
+            ..Default::default()
+        }
+    }
+
     #[test_case("fibonacci", 1)]
     fn test_rv32i(example_name: &str, min_segments: usize) -> Result<()> {
         let config = Rv32IConfig::default();
         let elf = build_example_program_at_path(get_programs_dir!(), example_name, &config)?;
-        let exe = VmExe::from_elf(
+        let mut exe = VmExe::from_elf(
             elf,
             Transpiler::<F>::default()
                 .with_extension(Rv32ITranspilerExtension)
                 .with_extension(Rv32MTranspilerExtension)
                 .with_extension(Rv32IoTranspilerExtension),
         )?;
-        air_test_with_min_segments(config, exe, vec![], min_segments);
+        change_rv32m_insn_to_nop(&mut exe);
+        air_test_with_min_segments(Rv32ICpuBuilder, config, exe, vec![], min_segments);
         Ok(())
     }
 
+    #[test_case("fibonacci", 1)]
     #[test_case("collatz", 1)]
     fn test_rv32im(example_name: &str, min_segments: usize) -> Result<()> {
-        let config = Rv32ImConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path(get_programs_dir!(), example_name, &config)?;
         let exe = VmExe::from_elf(
             elf,
@@ -50,14 +67,14 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension)
                 .with_extension(Rv32MTranspilerExtension),
         )?;
-        air_test_with_min_segments(config, exe, vec![], min_segments);
+        air_test_with_min_segments(Rv32ImCpuBuilder, config, exe, vec![], min_segments);
         Ok(())
     }
 
-    // #[test_case("fibonacci", 1)]
+    #[test_case("fibonacci", 1)]
     #[test_case("collatz", 1)]
     fn test_rv32im_std(example_name: &str, min_segments: usize) -> Result<()> {
-        let config = Rv32ImConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!(),
             example_name,
@@ -71,13 +88,13 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension)
                 .with_extension(Rv32MTranspilerExtension),
         )?;
-        air_test_with_min_segments(config, exe, vec![], min_segments);
+        air_test_with_min_segments(Rv32ImCpuBuilder, config, exe, vec![], min_segments);
         Ok(())
     }
 
     #[test]
     fn test_read_vec() -> Result<()> {
-        let config = Rv32IConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path(get_programs_dir!(), "hint", &config)?;
         let exe = VmExe::from_elf(
             elf,
@@ -87,13 +104,13 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension),
         )?;
         let input = vec![[0, 1, 2, 3].map(F::from_canonical_u8).to_vec()];
-        air_test_with_min_segments(config, exe, input, 1);
+        air_test_with_min_segments(Rv32ImCpuBuilder, config, exe, input, 1);
         Ok(())
     }
 
     #[test]
     fn test_hint_load_by_key() -> Result<()> {
-        let config = Rv32IConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path(get_programs_dir!(), "hint_load_by_key", &config)?;
         let exe = VmExe::from_elf(
             elf,
@@ -110,13 +127,13 @@ mod tests {
             "key".as_bytes().to_vec(),
             hint_load_by_key_encode(&input),
         )]));
-        air_test_with_min_segments(config, exe, streams, 1);
+        air_test_with_min_segments(Rv32ImCpuBuilder, config, exe, streams, 1);
         Ok(())
     }
 
     #[test]
     fn test_read() -> Result<()> {
-        let config = Rv32IConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path(get_programs_dir!(), "read", &config)?;
         let exe = VmExe::from_elf(
             elf,
@@ -141,13 +158,13 @@ mod tests {
             .flat_map(|w| w.to_le_bytes())
             .map(F::from_canonical_u8)
             .collect();
-        air_test_with_min_segments(config, exe, vec![input], 1);
+        air_test_with_min_segments(Rv32ImCpuBuilder, config, exe, vec![input], 1);
         Ok(())
     }
 
     #[test]
     fn test_reveal() -> Result<()> {
-        let config = Rv32IConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path(get_programs_dir!(), "reveal", &config)?;
         let exe = VmExe::from_elf(
             elf,
@@ -156,11 +173,14 @@ mod tests {
                 .with_extension(Rv32MTranspilerExtension)
                 .with_extension(Rv32IoTranspilerExtension),
         )?;
-        let executor = VmExecutor::<F, _>::new(config.clone());
-        let final_memory = executor.execute(exe, vec![])?.unwrap();
-        let hasher = vm_poseidon2_hasher();
+
+        let executor = VmExecutor::new(config.clone())?;
+        let instance = executor.instance(&exe)?;
+        let state = instance.execute(vec![], None)?;
+        let final_memory = state.memory.memory;
+        let hasher = vm_poseidon2_hasher::<F>();
         let pv_proof = UserPublicValuesProof::compute(
-            config.system.memory_config.memory_dimensions(),
+            config.as_ref().memory_config.memory_dimensions(),
             64,
             &hasher,
             &final_memory,
@@ -186,7 +206,7 @@ mod tests {
 
     #[test]
     fn test_print() -> Result<()> {
-        let config = Rv32IConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path(get_programs_dir!(), "print", &config)?;
         let exe = VmExe::from_elf(
             elf,
@@ -195,13 +215,13 @@ mod tests {
                 .with_extension(Rv32MTranspilerExtension)
                 .with_extension(Rv32IoTranspilerExtension),
         )?;
-        air_test(config, exe);
+        air_test(Rv32ImCpuBuilder, config, exe);
         Ok(())
     }
 
     #[test]
     fn test_heap_overflow() -> Result<()> {
-        let config = Rv32ImConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path(get_programs_dir!(), "heap_overflow", &config)?;
         let exe = VmExe::from_elf(
             elf,
@@ -211,8 +231,10 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension),
         )?;
 
-        let executor = VmExecutor::<F, _>::new(config.clone());
-        match executor.execute(exe, vec![[0, 0, 0, 1].map(F::from_canonical_u8).to_vec()]) {
+        let executor = VmExecutor::new(config)?;
+        let instance = executor.instance(&exe)?;
+        let input = vec![[0, 0, 0, 1].map(F::from_canonical_u8).to_vec()];
+        match instance.execute(input.clone(), None) {
             Err(ExecutionError::FailedWithExitCode(_)) => Ok(()),
             Err(_) => panic!("should fail with `FailedWithExitCode`"),
             Ok(_) => panic!("should fail"),
@@ -221,7 +243,7 @@ mod tests {
 
     #[test]
     fn test_hashmap() -> Result<()> {
-        let config = Rv32ImConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!(),
             "hashmap",
@@ -235,13 +257,13 @@ mod tests {
                 .with_extension(Rv32MTranspilerExtension)
                 .with_extension(Rv32IoTranspilerExtension),
         )?;
-        air_test(config, exe);
+        air_test(Rv32ImCpuBuilder, config, exe);
         Ok(())
     }
 
     #[test]
     fn test_tiny_mem_test() -> Result<()> {
-        let config = Rv32ImConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!(),
             "tiny-mem-test",
@@ -255,14 +277,14 @@ mod tests {
                 .with_extension(Rv32MTranspilerExtension)
                 .with_extension(Rv32IoTranspilerExtension),
         )?;
-        air_test(config, exe);
+        air_test(Rv32ImCpuBuilder, config, exe);
         Ok(())
     }
 
     #[test]
     #[should_panic]
     fn test_load_x0() {
-        let config = Rv32ImConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path(get_programs_dir!(), "load_x0", &config).unwrap();
         let exe = VmExe::from_elf(
             elf,
@@ -272,8 +294,9 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension),
         )
         .unwrap();
-        let executor = VmExecutor::<F, _>::new(config.clone());
-        executor.execute(exe, vec![]).unwrap();
+        let executor = VmExecutor::new(config).unwrap();
+        let instance = executor.instance(&exe).unwrap();
+        instance.execute(vec![], None).unwrap();
     }
 
     #[test_case("getrandom", vec!["getrandom", "getrandom-unsupported"])]
@@ -281,7 +304,7 @@ mod tests {
     #[test_case("getrandom_v02", vec!["getrandom-v02", "getrandom-unsupported"])]
     #[test_case("getrandom_v02", vec!["getrandom-v02/custom"])]
     fn test_getrandom_unsupported(program: &str, features: Vec<&str>) {
-        let config = Rv32ImConfig::default();
+        let config = test_rv32im_config();
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!(),
             program,
@@ -297,6 +320,26 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension),
         )
         .unwrap();
-        air_test(config, exe);
+        air_test(Rv32ImCpuBuilder, config, exe);
+    }
+
+    // For testing programs that should only execute RV32I:
+    // The ELF might still have Mul instructions even though the program doesn't use them. We
+    // mask those to NOP here.
+    fn change_rv32m_insn_to_nop(exe: &mut VmExe<F>) {
+        for (insn, _) in exe
+            .program
+            .instructions_and_debug_infos
+            .iter_mut()
+            .flatten()
+        {
+            if MulOpcode::iter().any(|op| op.global_opcode() == insn.opcode)
+                || MulHOpcode::iter().any(|op| op.global_opcode() == insn.opcode)
+                || DivRemOpcode::iter().any(|op| op.global_opcode() == insn.opcode)
+            {
+                *insn = Instruction::default();
+                insn.opcode = SystemOpcode::PHANTOM.global_opcode();
+            }
+        }
     }
 }
diff --git a/extensions/sha256/circuit/Cargo.toml b/extensions/sha256/circuit/Cargo.toml
index 95c87b0871..413265b622 100644
--- a/extensions/sha256/circuit/Cargo.toml
+++ b/extensions/sha256/circuit/Cargo.toml
@@ -9,7 +9,6 @@ description = "OpenVM circuit extension for sha256"
 openvm-stark-backend = { workspace = true }
 openvm-stark-sdk = { workspace = true }
 openvm-circuit-primitives = { workspace = true }
-openvm-circuit-primitives-derive = { workspace = true }
 openvm-circuit-derive = { workspace = true }
 openvm-circuit = { workspace = true }
 openvm-instructions = { workspace = true }
@@ -25,6 +24,7 @@ sha2 = { version = "0.10", default-features = false }
 strum = { workspace = true }
 
 [dev-dependencies]
+hex = { workspace = true }
 openvm-stark-sdk = { workspace = true }
 openvm-circuit = { workspace = true, features = ["test-utils"] }
 
diff --git a/extensions/sha256/circuit/src/extension.rs b/extensions/sha256/circuit/src/extension.rs
index 783bc54f63..e58dc3c29b 100644
--- a/extensions/sha256/circuit/src/extension.rs
+++ b/extensions/sha256/circuit/src/extension.rs
@@ -1,105 +1,128 @@
+use std::{result::Result, sync::Arc};
+
 use derive_more::derive::From;
 use openvm_circuit::{
     arch::{
-        InitFileGenerator, SystemConfig, VmExtension, VmInventory, VmInventoryBuilder,
-        VmInventoryError,
+        AirInventory, AirInventoryError, ChipInventory, ChipInventoryError,
+        ExecutorInventoryBuilder, ExecutorInventoryError, RowMajorMatrixArena, VmCircuitExtension,
+        VmExecutionExtension, VmProverExtension,
     },
-    system::phantom::PhantomChip,
+    system::memory::SharedMemoryHelper,
 };
-use openvm_circuit_derive::{AnyEnum, InstructionExecutor, VmConfig};
+use openvm_circuit_derive::{AnyEnum, Executor, MeteredExecutor, PreflightExecutor};
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
 };
-use openvm_circuit_primitives_derive::{Chip, ChipUsageGetter};
 use openvm_instructions::*;
-use openvm_rv32im_circuit::{
-    Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32Io, Rv32IoExecutor, Rv32IoPeriphery, Rv32M,
-    Rv32MExecutor, Rv32MPeriphery,
-};
 use openvm_sha256_transpiler::Rv32Sha256Opcode;
-use openvm_stark_backend::p3_field::PrimeField32;
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use openvm_stark_sdk::engine::StarkEngine;
 use serde::{Deserialize, Serialize};
 use strum::IntoEnumIterator;
 
 use crate::*;
 
-#[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
-pub struct Sha256Rv32Config {
-    #[system]
-    pub system: SystemConfig,
-    #[extension]
-    pub rv32i: Rv32I,
-    #[extension]
-    pub rv32m: Rv32M,
-    #[extension]
-    pub io: Rv32Io,
-    #[extension]
-    pub sha256: Sha256,
+// =================================== VM Extension Implementation =================================
+#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
+pub struct Sha256;
+
+#[derive(Clone, From, AnyEnum, Executor, MeteredExecutor, PreflightExecutor)]
+pub enum Sha256Executor {
+    Sha256(Sha256VmExecutor),
 }
 
-impl Default for Sha256Rv32Config {
-    fn default() -> Self {
-        Self {
-            system: SystemConfig::default().with_continuations(),
-            rv32i: Rv32I,
-            rv32m: Rv32M::default(),
-            io: Rv32Io,
-            sha256: Sha256,
-        }
+impl<F> VmExecutionExtension<F> for Sha256 {
+    type Executor = Sha256Executor;
+
+    fn extend_execution(
+        &self,
+        inventory: &mut ExecutorInventoryBuilder<F, Sha256Executor>,
+    ) -> Result<(), ExecutorInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
+        let sha256_step = Sha256VmExecutor::new(Rv32Sha256Opcode::CLASS_OFFSET, pointer_max_bits);
+        inventory.add_executor(
+            sha256_step,
+            Rv32Sha256Opcode::iter().map(|x| x.global_opcode()),
+        )?;
+
+        Ok(())
     }
 }
 
-// Default implementation uses no init file
-impl InitFileGenerator for Sha256Rv32Config {}
+impl<SC: StarkGenericConfig> VmCircuitExtension<SC> for Sha256 {
+    fn extend_circuit(&self, inventory: &mut AirInventory<SC>) -> Result<(), AirInventoryError> {
+        let pointer_max_bits = inventory.pointer_max_bits();
 
-#[derive(Clone, Copy, Debug, Default, Serialize, Deserialize)]
-pub struct Sha256;
+        let bitwise_lu = {
+            let existing_air = inventory.find_air::<BitwiseOperationLookupAir<8>>().next();
+            if let Some(air) = existing_air {
+                air.bus
+            } else {
+                let bus = BitwiseOperationLookupBus::new(inventory.new_bus_idx());
+                let air = BitwiseOperationLookupAir::<8>::new(bus);
+                inventory.add_air(air);
+                air.bus
+            }
+        };
 
-#[derive(ChipUsageGetter, Chip, InstructionExecutor, From, AnyEnum)]
-pub enum Sha256Executor<F: PrimeField32> {
-    Sha256(Sha256VmChip<F>),
-}
+        let sha256 = Sha256VmAir::new(
+            inventory.system().port(),
+            bitwise_lu,
+            pointer_max_bits,
+            inventory.new_bus_idx(),
+        );
+        inventory.add_air(sha256);
 
-#[derive(From, ChipUsageGetter, Chip, AnyEnum)]
-pub enum Sha256Periphery<F: PrimeField32> {
-    BitwiseOperationLookup(SharedBitwiseOperationLookupChip<8>),
-    Phantom(PhantomChip<F>),
+        Ok(())
+    }
 }
 
-impl<F: PrimeField32> VmExtension<F> for Sha256 {
-    type Executor = Sha256Executor<F>;
-    type Periphery = Sha256Periphery<F>;
-
-    fn build(
+pub struct Sha2CpuProverExt;
+// This implementation is specific to CpuBackend because the lookup chips (VariableRangeChecker,
+// BitwiseOperationLookupChip) are specific to CpuBackend.
+impl<E, SC, RA> VmProverExtension<E, RA, Sha256> for Sha2CpuProverExt
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    RA: RowMajorMatrixArena<Val<SC>>,
+    Val<SC>: PrimeField32,
+{
+    fn extend_prover(
         &self,
-        builder: &mut VmInventoryBuilder<F>,
-    ) -> Result<VmInventory<Self::Executor, Self::Periphery>, VmInventoryError> {
-        let mut inventory = VmInventory::new();
-        let bitwise_lu_chip = if let Some(&chip) = builder
-            .find_chip::<SharedBitwiseOperationLookupChip<8>>()
-            .first()
-        {
-            chip.clone()
-        } else {
-            let bitwise_lu_bus = BitwiseOperationLookupBus::new(builder.new_bus_idx());
-            let chip = SharedBitwiseOperationLookupChip::new(bitwise_lu_bus);
-            inventory.add_periphery_chip(chip.clone());
-            chip
+        _: &Sha256,
+        inventory: &mut ChipInventory<SC, RA, CpuBackend<SC>>,
+    ) -> Result<(), ChipInventoryError> {
+        let range_checker = inventory.range_checker()?.clone();
+        let timestamp_max_bits = inventory.timestamp_max_bits();
+        let mem_helper = SharedMemoryHelper::new(range_checker.clone(), timestamp_max_bits);
+        let pointer_max_bits = inventory.airs().pointer_max_bits();
+
+        let bitwise_lu = {
+            let existing_chip = inventory
+                .find_chip::<SharedBitwiseOperationLookupChip<8>>()
+                .next();
+            if let Some(chip) = existing_chip {
+                chip.clone()
+            } else {
+                let air: &BitwiseOperationLookupAir<8> = inventory.next_air()?;
+                let chip = Arc::new(BitwiseOperationLookupChip::new(air.bus));
+                inventory.add_periphery_chip(chip.clone());
+                chip
+            }
         };
 
-        let sha256_chip = Sha256VmChip::new(
-            builder.system_port(),
-            builder.system_config().memory_config.pointer_max_bits,
-            bitwise_lu_chip,
-            builder.new_bus_idx(),
-            Rv32Sha256Opcode::CLASS_OFFSET,
-            builder.system_base().offline_memory(),
+        inventory.next_air::<Sha256VmAir>()?;
+        let sha256 = Sha256VmChip::new(
+            Sha256VmFiller::new(bitwise_lu, pointer_max_bits),
+            mem_helper,
         );
-        inventory.add_executor(
-            sha256_chip,
-            Rv32Sha256Opcode::iter().map(|x| x.global_opcode()),
-        )?;
+        inventory.add_executor_chip(sha256);
 
-        Ok(inventory)
+        Ok(())
     }
 }
diff --git a/extensions/sha256/circuit/src/lib.rs b/extensions/sha256/circuit/src/lib.rs
index fe0844f902..741cf3ec9d 100644
--- a/extensions/sha256/circuit/src/lib.rs
+++ b/extensions/sha256/circuit/src/lib.rs
@@ -1,5 +1,87 @@
+use std::result::Result;
+
+use openvm_circuit::{
+    arch::{
+        AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena, SystemConfig,
+        VmBuilder, VmChipComplex, VmProverExtension,
+    },
+    system::{SystemChipInventory, SystemCpuBuilder, SystemExecutor},
+};
+use openvm_circuit_derive::VmConfig;
+use openvm_rv32im_circuit::{
+    Rv32I, Rv32IExecutor, Rv32ImCpuProverExt, Rv32Io, Rv32IoExecutor, Rv32M, Rv32MExecutor,
+};
+use openvm_stark_backend::{
+    config::{StarkGenericConfig, Val},
+    p3_field::PrimeField32,
+    prover::cpu::{CpuBackend, CpuDevice},
+};
+use openvm_stark_sdk::engine::StarkEngine;
+use serde::{Deserialize, Serialize};
+
 mod sha256_chip;
 pub use sha256_chip::*;
 
 mod extension;
 pub use extension::*;
+
+#[derive(Clone, Debug, VmConfig, derive_new::new, Serialize, Deserialize)]
+pub struct Sha256Rv32Config {
+    #[config(executor = "SystemExecutor<F>")]
+    pub system: SystemConfig,
+    #[extension]
+    pub rv32i: Rv32I,
+    #[extension]
+    pub rv32m: Rv32M,
+    #[extension]
+    pub io: Rv32Io,
+    #[extension]
+    pub sha256: Sha256,
+}
+
+impl Default for Sha256Rv32Config {
+    fn default() -> Self {
+        Self {
+            system: SystemConfig::default(),
+            rv32i: Rv32I,
+            rv32m: Rv32M::default(),
+            io: Rv32Io,
+            sha256: Sha256,
+        }
+    }
+}
+
+// Default implementation uses no init file
+impl InitFileGenerator for Sha256Rv32Config {}
+
+#[derive(Clone)]
+pub struct Sha256Rv32CpuBuilder;
+
+impl<E, SC> VmBuilder<E> for Sha256Rv32CpuBuilder
+where
+    SC: StarkGenericConfig,
+    E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+    Val<SC>: PrimeField32,
+{
+    type VmConfig = Sha256Rv32Config;
+    type SystemChipInventory = SystemChipInventory<SC>;
+    type RecordArena = MatrixRecordArena<Val<SC>>;
+
+    fn create_chip_complex(
+        &self,
+        config: &Sha256Rv32Config,
+        circuit: AirInventory<SC>,
+    ) -> Result<
+        VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+        ChipInventoryError,
+    > {
+        let mut chip_complex =
+            VmBuilder::<E>::create_chip_complex(&SystemCpuBuilder, &config.system, circuit)?;
+        let inventory = &mut chip_complex.inventory;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32i, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.rv32m, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Rv32ImCpuProverExt, &config.io, inventory)?;
+        VmProverExtension::<E, _, _>::extend_prover(&Sha2CpuProverExt, &config.sha256, inventory)?;
+        Ok(chip_complex)
+    }
+}
diff --git a/extensions/sha256/circuit/src/sha256_chip/air.rs b/extensions/sha256/circuit/src/sha256_chip/air.rs
index f4f1df34eb..2fe1cb26c0 100644
--- a/extensions/sha256/circuit/src/sha256_chip/air.rs
+++ b/extensions/sha256/circuit/src/sha256_chip/air.rs
@@ -2,7 +2,10 @@ use std::{array, borrow::Borrow, cmp::min};
 
 use openvm_circuit::{
     arch::ExecutionBridge,
-    system::memory::{offline_checker::MemoryBridge, MemoryAddress},
+    system::{
+        memory::{offline_checker::MemoryBridge, MemoryAddress},
+        SystemPort,
+    },
 };
 use openvm_circuit_primitives::{
     bitwise_op_lookup::BitwiseOperationLookupBus, encoder::Encoder, utils::not, SubAir,
@@ -17,7 +20,7 @@ use openvm_sha256_air::{
 };
 use openvm_sha256_transpiler::Rv32Sha256Opcode;
 use openvm_stark_backend::{
-    interaction::InteractionBuilder,
+    interaction::{BusIndex, InteractionBuilder},
     p3_air::{Air, AirBuilder, BaseAir},
     p3_field::{Field, FieldAlgebra},
     p3_matrix::Matrix,
@@ -31,7 +34,7 @@ use super::{
 
 /// Sha256VmAir does all constraints related to message padding and
 /// the Sha256Air subair constrains the actual hash
-#[derive(Clone, Debug, derive_new::new)]
+#[derive(Clone, Debug)]
 pub struct Sha256VmAir {
     pub execution_bridge: ExecutionBridge,
     pub memory_bridge: MemoryBridge,
@@ -44,6 +47,28 @@ pub struct Sha256VmAir {
     pub(super) padding_encoder: Encoder,
 }
 
+impl Sha256VmAir {
+    pub fn new(
+        SystemPort {
+            execution_bus,
+            program_bus,
+            memory_bridge,
+        }: SystemPort,
+        bitwise_lookup_bus: BitwiseOperationLookupBus,
+        ptr_max_bits: usize,
+        self_bus_idx: BusIndex,
+    ) -> Self {
+        Self {
+            execution_bridge: ExecutionBridge::new(execution_bus, program_bus),
+            memory_bridge,
+            bitwise_lookup_bus,
+            ptr_max_bits,
+            sha256_subair: Sha256Air::new(bitwise_lookup_bus, self_bus_idx),
+            padding_encoder: Encoder::new(PaddingFlags::COUNT, 2, false),
+        }
+    }
+}
+
 impl<F: Field> BaseAirWithPublicValues<F> for Sha256VmAir {}
 impl<F: Field> PartitionedBaseAir<F> for Sha256VmAir {}
 impl<F: Field> BaseAir<F> for Sha256VmAir {
diff --git a/extensions/sha256/circuit/src/sha256_chip/execution.rs b/extensions/sha256/circuit/src/sha256_chip/execution.rs
new file mode 100644
index 0000000000..befbb25f41
--- /dev/null
+++ b/extensions/sha256/circuit/src/sha256_chip/execution.rs
@@ -0,0 +1,154 @@
+use std::borrow::{Borrow, BorrowMut};
+
+use openvm_circuit::{arch::*, system::memory::online::GuestMemory};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_MEMORY_AS, RV32_REGISTER_AS},
+    LocalOpcode,
+};
+use openvm_sha256_air::{get_sha256_num_blocks, SHA256_ROWS_PER_BLOCK};
+use openvm_sha256_transpiler::Rv32Sha256Opcode;
+use openvm_stark_backend::p3_field::PrimeField32;
+
+use super::{sha256_solve, Sha256VmExecutor, SHA256_NUM_READ_ROWS, SHA256_READ_SIZE};
+
+#[derive(AlignedBytesBorrow, Clone)]
+#[repr(C)]
+struct ShaPreCompute {
+    a: u8,
+    b: u8,
+    c: u8,
+}
+
+impl<F: PrimeField32> Executor<F> for Sha256VmExecutor {
+    fn pre_compute_size(&self) -> usize {
+        size_of::<ShaPreCompute>()
+    }
+
+    fn pre_compute<Ctx>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: ExecutionCtxTrait,
+    {
+        let data: &mut ShaPreCompute = data.borrow_mut();
+        self.pre_compute_impl(pc, inst, data)?;
+        Ok(execute_e1_impl::<_, _>)
+    }
+}
+impl<F: PrimeField32> MeteredExecutor<F> for Sha256VmExecutor {
+    fn metered_pre_compute_size(&self) -> usize {
+        size_of::<E2PreCompute<ShaPreCompute>>()
+    }
+
+    fn metered_pre_compute<Ctx>(
+        &self,
+        chip_idx: usize,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut [u8],
+    ) -> Result<ExecuteFunc<F, Ctx>, StaticProgramError>
+    where
+        Ctx: MeteredExecutionCtxTrait,
+    {
+        let data: &mut E2PreCompute<ShaPreCompute> = data.borrow_mut();
+        data.chip_idx = chip_idx as u32;
+        self.pre_compute_impl(pc, inst, &mut data.data)?;
+        Ok(execute_e2_impl::<_, _>)
+    }
+}
+
+unsafe fn execute_e12_impl<F: PrimeField32, CTX: ExecutionCtxTrait, const IS_E1: bool>(
+    pre_compute: &ShaPreCompute,
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) -> u32 {
+    let dst = vm_state.vm_read(RV32_REGISTER_AS, pre_compute.a as u32);
+    let src = vm_state.vm_read(RV32_REGISTER_AS, pre_compute.b as u32);
+    let len = vm_state.vm_read(RV32_REGISTER_AS, pre_compute.c as u32);
+    let dst_u32 = u32::from_le_bytes(dst);
+    let src_u32 = u32::from_le_bytes(src);
+    let len_u32 = u32::from_le_bytes(len);
+
+    let (output, height) = if IS_E1 {
+        // SAFETY: RV32_MEMORY_AS is memory address space of type u8
+        let message = vm_state.vm_read_slice(RV32_MEMORY_AS, src_u32, len_u32 as usize);
+        let output = sha256_solve(message);
+        (output, 0)
+    } else {
+        let num_blocks = get_sha256_num_blocks(len_u32);
+        let mut message = Vec::with_capacity(len_u32 as usize);
+        for block_idx in 0..num_blocks as usize {
+            // Reads happen on the first 4 rows of each block
+            for row in 0..SHA256_NUM_READ_ROWS {
+                let read_idx = block_idx * SHA256_NUM_READ_ROWS + row;
+                let row_input: [u8; SHA256_READ_SIZE] = vm_state.vm_read(
+                    RV32_MEMORY_AS,
+                    src_u32 + (read_idx * SHA256_READ_SIZE) as u32,
+                );
+                message.extend_from_slice(&row_input);
+            }
+        }
+        let output = sha256_solve(&message[..len_u32 as usize]);
+        let height = num_blocks * SHA256_ROWS_PER_BLOCK as u32;
+        (output, height)
+    };
+    vm_state.vm_write(RV32_MEMORY_AS, dst_u32, &output);
+
+    vm_state.pc = vm_state.pc.wrapping_add(DEFAULT_PC_STEP);
+    vm_state.instret += 1;
+
+    height
+}
+
+unsafe fn execute_e1_impl<F: PrimeField32, CTX: ExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &ShaPreCompute = pre_compute.borrow();
+    execute_e12_impl::<F, CTX, true>(pre_compute, vm_state);
+}
+unsafe fn execute_e2_impl<F: PrimeField32, CTX: MeteredExecutionCtxTrait>(
+    pre_compute: &[u8],
+    vm_state: &mut VmExecState<F, GuestMemory, CTX>,
+) {
+    let pre_compute: &E2PreCompute<ShaPreCompute> = pre_compute.borrow();
+    let height = execute_e12_impl::<F, CTX, false>(&pre_compute.data, vm_state);
+    vm_state
+        .ctx
+        .on_height_change(pre_compute.chip_idx as usize, height);
+}
+
+impl Sha256VmExecutor {
+    fn pre_compute_impl<F: PrimeField32>(
+        &self,
+        pc: u32,
+        inst: &Instruction<F>,
+        data: &mut ShaPreCompute,
+    ) -> Result<(), StaticProgramError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = inst;
+        let e_u32 = e.as_canonical_u32();
+        if d.as_canonical_u32() != RV32_REGISTER_AS || e_u32 != RV32_MEMORY_AS {
+            return Err(StaticProgramError::InvalidInstruction(pc));
+        }
+        *data = ShaPreCompute {
+            a: a.as_canonical_u32() as u8,
+            b: b.as_canonical_u32() as u8,
+            c: c.as_canonical_u32() as u8,
+        };
+        assert_eq!(&Rv32Sha256Opcode::SHA256.global_opcode(), opcode);
+        Ok(())
+    }
+}
diff --git a/extensions/sha256/circuit/src/sha256_chip/mod.rs b/extensions/sha256/circuit/src/sha256_chip/mod.rs
index 4c40eca5d8..861bfe2f6d 100644
--- a/extensions/sha256/circuit/src/sha256_chip/mod.rs
+++ b/extensions/sha256/circuit/src/sha256_chip/mod.rs
@@ -1,37 +1,22 @@
 //! Sha256 hasher. Handles full sha256 hashing with padding.
 //! variable length inputs read from VM memory.
-use std::{
-    array,
-    cmp::{max, min},
-    sync::{Arc, Mutex},
-};
 
-use openvm_circuit::arch::{
-    ExecutionBridge, ExecutionError, ExecutionState, InstructionExecutor, SystemPort,
-};
+use openvm_circuit::arch::*;
 use openvm_circuit_primitives::{
     bitwise_op_lookup::SharedBitwiseOperationLookupChip, encoder::Encoder,
 };
-use openvm_instructions::{
-    instruction::Instruction,
-    program::DEFAULT_PC_STEP,
-    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS},
-    LocalOpcode,
-};
-use openvm_rv32im_circuit::adapters::read_rv32_register;
-use openvm_sha256_air::{Sha256Air, SHA256_BLOCK_BITS};
-use openvm_sha256_transpiler::Rv32Sha256Opcode;
-use openvm_stark_backend::{interaction::BusIndex, p3_field::PrimeField32};
-use serde::{Deserialize, Serialize};
+use openvm_instructions::riscv::RV32_CELL_BITS;
+use openvm_sha256_air::{Sha256FillerHelper, SHA256_BLOCK_BITS};
 use sha2::{Digest, Sha256};
 
 mod air;
 mod columns;
+mod execution;
 mod trace;
 
 pub use air::*;
 pub use columns::*;
-use openvm_circuit::system::memory::{MemoryController, OfflineMemory, RecordId};
+pub use trace::*;
 
 #[cfg(test)]
 mod tests;
@@ -47,156 +32,38 @@ const SHA256_WRITE_SIZE: usize = 32;
 pub const SHA256_BLOCK_CELLS: usize = SHA256_BLOCK_BITS / RV32_CELL_BITS;
 /// Number of rows we will do a read on for each SHA256 block
 pub const SHA256_NUM_READ_ROWS: usize = SHA256_BLOCK_CELLS / SHA256_READ_SIZE;
-pub struct Sha256VmChip<F: PrimeField32> {
-    pub air: Sha256VmAir,
-    /// IO and memory data necessary for each opcode call
-    pub records: Vec<Sha256Record<F>>,
-    pub offline_memory: Arc<Mutex<OfflineMemory<F>>>,
-    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
+/// Maximum message length that this chip supports in bytes
+pub const SHA256_MAX_MESSAGE_LEN: usize = 1 << 29;
 
-    offset: usize,
+pub type Sha256VmChip<F> = VmChipWrapper<F, Sha256VmFiller>;
+
+#[derive(derive_new::new, Clone)]
+pub struct Sha256VmExecutor {
+    pub offset: usize,
+    pub pointer_max_bits: usize,
 }
 
-#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
-pub struct Sha256Record<F> {
-    pub from_state: ExecutionState<F>,
-    pub dst_read: RecordId,
-    pub src_read: RecordId,
-    pub len_read: RecordId,
-    pub input_records: Vec<[RecordId; SHA256_NUM_READ_ROWS]>,
-    pub input_message: Vec<[[u8; SHA256_READ_SIZE]; SHA256_NUM_READ_ROWS]>,
-    pub digest_write: RecordId,
+pub struct Sha256VmFiller {
+    pub inner: Sha256FillerHelper,
+    pub padding_encoder: Encoder,
+    pub bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    pub pointer_max_bits: usize,
 }
 
-impl<F: PrimeField32> Sha256VmChip<F> {
+impl Sha256VmFiller {
     pub fn new(
-        SystemPort {
-            execution_bus,
-            program_bus,
-            memory_bridge,
-        }: SystemPort,
-        address_bits: usize,
-        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<8>,
-        self_bus_idx: BusIndex,
-        offset: usize,
-        offline_memory: Arc<Mutex<OfflineMemory<F>>>,
+        bitwise_lookup_chip: SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+        pointer_max_bits: usize,
     ) -> Self {
         Self {
-            air: Sha256VmAir::new(
-                ExecutionBridge::new(execution_bus, program_bus),
-                memory_bridge,
-                bitwise_lookup_chip.bus(),
-                address_bits,
-                Sha256Air::new(bitwise_lookup_chip.bus(), self_bus_idx),
-                Encoder::new(PaddingFlags::COUNT, 2, false),
-            ),
+            inner: Sha256FillerHelper::new(),
+            padding_encoder: Encoder::new(PaddingFlags::COUNT, 2, false),
             bitwise_lookup_chip,
-            records: Vec::new(),
-            offset,
-            offline_memory,
+            pointer_max_bits,
         }
     }
 }
 
-impl<F: PrimeField32> InstructionExecutor<F> for Sha256VmChip<F> {
-    fn execute(
-        &mut self,
-        memory: &mut MemoryController<F>,
-        instruction: &Instruction<F>,
-        from_state: ExecutionState<u32>,
-    ) -> Result<ExecutionState<u32>, ExecutionError> {
-        let &Instruction {
-            opcode,
-            a,
-            b,
-            c,
-            d,
-            e,
-            ..
-        } = instruction;
-        let local_opcode = opcode.local_opcode_idx(self.offset);
-        debug_assert_eq!(local_opcode, Rv32Sha256Opcode::SHA256.local_usize());
-        debug_assert_eq!(d, F::from_canonical_u32(RV32_REGISTER_AS));
-        debug_assert_eq!(e, F::from_canonical_u32(RV32_MEMORY_AS));
-
-        debug_assert_eq!(from_state.timestamp, memory.timestamp());
-
-        let (dst_read, dst) = read_rv32_register(memory, d, a);
-        let (src_read, src) = read_rv32_register(memory, d, b);
-        let (len_read, len) = read_rv32_register(memory, d, c);
-
-        #[cfg(debug_assertions)]
-        {
-            assert!(dst < (1 << self.air.ptr_max_bits));
-            assert!(src < (1 << self.air.ptr_max_bits));
-            assert!(len < (1 << self.air.ptr_max_bits));
-        }
-
-        // need to pad with one 1 bit, 64 bits for the message length and then pad until the length
-        // is divisible by [SHA256_BLOCK_BITS]
-        let num_blocks = ((len << 3) as usize + 1 + 64).div_ceil(SHA256_BLOCK_BITS);
-
-        // we will read [num_blocks] * [SHA256_BLOCK_CELLS] cells but only [len] cells will be used
-        debug_assert!(
-            src as usize + num_blocks * SHA256_BLOCK_CELLS <= (1 << self.air.ptr_max_bits)
-        );
-        let mut hasher = Sha256::new();
-        let mut input_records = Vec::with_capacity(num_blocks * SHA256_NUM_READ_ROWS);
-        let mut input_message = Vec::with_capacity(num_blocks * SHA256_NUM_READ_ROWS);
-        let mut read_ptr = src;
-        for _ in 0..num_blocks {
-            let block_reads_records = array::from_fn(|i| {
-                memory.read(
-                    e,
-                    F::from_canonical_u32(read_ptr + (i * SHA256_READ_SIZE) as u32),
-                )
-            });
-            let block_reads_bytes = array::from_fn(|i| {
-                // we add to the hasher only the bytes that are part of the message
-                let num_reads = min(
-                    SHA256_READ_SIZE,
-                    (max(read_ptr, src + len) - read_ptr) as usize,
-                );
-                let row_input = block_reads_records[i]
-                    .1
-                    .map(|x| x.as_canonical_u32().try_into().unwrap());
-                hasher.update(&row_input[..num_reads]);
-                read_ptr += SHA256_READ_SIZE as u32;
-                row_input
-            });
-            input_records.push(block_reads_records.map(|x| x.0));
-            input_message.push(block_reads_bytes);
-        }
-
-        let mut digest = [0u8; SHA256_WRITE_SIZE];
-        digest.copy_from_slice(hasher.finalize().as_ref());
-        let (digest_write, _) = memory.write(
-            e,
-            F::from_canonical_u32(dst),
-            digest.map(|b| F::from_canonical_u8(b)),
-        );
-
-        self.records.push(Sha256Record {
-            from_state: from_state.map(F::from_canonical_u32),
-            dst_read,
-            src_read,
-            len_read,
-            input_records,
-            input_message,
-            digest_write,
-        });
-
-        Ok(ExecutionState {
-            pc: from_state.pc + DEFAULT_PC_STEP,
-            timestamp: memory.timestamp(),
-        })
-    }
-
-    fn get_opcode_name(&self, _: usize) -> String {
-        "SHA256".to_string()
-    }
-}
-
 pub fn sha256_solve(input_message: &[u8]) -> [u8; SHA256_WRITE_SIZE] {
     let mut hasher = Sha256::new();
     hasher.update(input_message);
diff --git a/extensions/sha256/circuit/src/sha256_chip/tests.rs b/extensions/sha256/circuit/src/sha256_chip/tests.rs
index 55bc076e2c..52d2f1153b 100644
--- a/extensions/sha256/circuit/src/sha256_chip/tests.rs
+++ b/extensions/sha256/circuit/src/sha256_chip/tests.rs
@@ -1,31 +1,80 @@
-use openvm_circuit::arch::{
-    testing::{memory::gen_pointer, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
-    SystemPort,
+use std::{array, sync::Arc};
+
+use hex::FromHex;
+use openvm_circuit::{
+    arch::{
+        testing::{memory::gen_pointer, TestChipHarness, VmChipTestBuilder, BITWISE_OP_LOOKUP_BUS},
+        Arena, DenseRecordArena, MatrixRecordArena, PreflightExecutor,
+    },
+    utils::get_random_message,
 };
 use openvm_circuit_primitives::bitwise_op_lookup::{
-    BitwiseOperationLookupBus, SharedBitwiseOperationLookupChip,
+    BitwiseOperationLookupAir, BitwiseOperationLookupBus, BitwiseOperationLookupChip,
+    SharedBitwiseOperationLookupChip,
+};
+use openvm_instructions::{
+    instruction::Instruction,
+    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS},
+    LocalOpcode,
 };
-use openvm_instructions::{instruction::Instruction, riscv::RV32_CELL_BITS, LocalOpcode};
-use openvm_sha256_air::get_random_message;
+use openvm_sha256_air::{get_sha256_num_blocks, SHA256_BLOCK_U8S};
 use openvm_sha256_transpiler::Rv32Sha256Opcode::{self, *};
 use openvm_stark_backend::{interaction::BusIndex, p3_field::FieldAlgebra};
-use openvm_stark_sdk::{config::setup_tracing, p3_baby_bear::BabyBear, utils::create_seeded_rng};
+use openvm_stark_sdk::{p3_baby_bear::BabyBear, utils::create_seeded_rng};
 use rand::{rngs::StdRng, Rng};
 
-use super::Sha256VmChip;
-use crate::{sha256_solve, Sha256VmDigestCols, Sha256VmRoundCols};
+use super::{Sha256VmAir, Sha256VmChip, Sha256VmExecutor};
+use crate::{
+    sha256_chip::trace::Sha256VmRecordLayout, sha256_solve, Sha256VmDigestCols, Sha256VmFiller,
+    Sha256VmRoundCols,
+};
 
 type F = BabyBear;
-const BUS_IDX: BusIndex = 28;
-fn set_and_execute(
+const SELF_BUS_IDX: BusIndex = 28;
+const MAX_INS_CAPACITY: usize = 4096;
+type Harness<RA> = TestChipHarness<F, Sha256VmExecutor, Sha256VmAir, Sha256VmChip<F>, RA>;
+
+fn create_test_chips<RA: Arena>(
+    tester: &mut VmChipTestBuilder<F>,
+) -> (
+    Harness<RA>,
+    (
+        BitwiseOperationLookupAir<RV32_CELL_BITS>,
+        SharedBitwiseOperationLookupChip<RV32_CELL_BITS>,
+    ),
+) {
+    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
+    let bitwise_chip = Arc::new(BitwiseOperationLookupChip::<RV32_CELL_BITS>::new(
+        bitwise_bus,
+    ));
+
+    let air = Sha256VmAir::new(
+        tester.system_port(),
+        bitwise_chip.bus(),
+        tester.address_bits(),
+        SELF_BUS_IDX,
+    );
+    let executor = Sha256VmExecutor::new(Rv32Sha256Opcode::CLASS_OFFSET, tester.address_bits());
+    let chip = Sha256VmChip::new(
+        Sha256VmFiller::new(bitwise_chip.clone(), tester.address_bits()),
+        tester.memory_helper(),
+    );
+
+    let harness = Harness::<RA>::with_capacity(executor, air, chip, MAX_INS_CAPACITY);
+    (harness, (bitwise_chip.air, bitwise_chip))
+}
+
+fn set_and_execute<RA: Arena>(
     tester: &mut VmChipTestBuilder<F>,
-    chip: &mut Sha256VmChip<F>,
+    harness: &mut Harness<RA>,
     rng: &mut StdRng,
     opcode: Rv32Sha256Opcode,
     message: Option<&[u8]>,
     len: Option<usize>,
-) {
-    let len = len.unwrap_or(rng.gen_range(1..100000));
+) where
+    Sha256VmExecutor: PreflightExecutor<F, RA>,
+{
+    let len = len.unwrap_or(rng.gen_range(1..3000));
     let tmp = get_random_message(rng, len);
     let message: &[u8] = message.unwrap_or(&tmp);
     let len = message.len();
@@ -34,33 +83,35 @@ fn set_and_execute(
     let rs1 = gen_pointer(rng, 4);
     let rs2 = gen_pointer(rng, 4);
 
-    let max_mem_ptr: u32 = 1
-        << tester
-            .memory_controller()
-            .borrow()
-            .mem_config()
-            .pointer_max_bits;
-    let dst_ptr = rng.gen_range(0..max_mem_ptr);
-    let dst_ptr = dst_ptr ^ (dst_ptr & 3);
+    let dst_ptr = gen_pointer(rng, 4);
+    let src_ptr = gen_pointer(rng, 4);
     tester.write(1, rd, dst_ptr.to_le_bytes().map(F::from_canonical_u8));
-    let src_ptr = rng.gen_range(0..(max_mem_ptr - len as u32));
-    let src_ptr = src_ptr ^ (src_ptr & 3);
     tester.write(1, rs1, src_ptr.to_le_bytes().map(F::from_canonical_u8));
     tester.write(1, rs2, len.to_le_bytes().map(F::from_canonical_u8));
 
-    for (i, &byte) in message.iter().enumerate() {
-        tester.write(2, src_ptr as usize + i, [F::from_canonical_u8(byte)]);
+    // Adding random memory after the message
+    let num_blocks = get_sha256_num_blocks(len as u32) as usize;
+    for offset in (0..num_blocks * SHA256_BLOCK_U8S).step_by(4) {
+        let chunk: [F; 4] = array::from_fn(|i| {
+            if offset + i < message.len() {
+                F::from_canonical_u8(message[offset + i])
+            } else {
+                F::from_canonical_u8(rng.gen())
+            }
+        });
+
+        tester.write(RV32_MEMORY_AS as usize, src_ptr + offset, chunk);
     }
 
     tester.execute(
-        chip,
+        harness,
         &Instruction::from_usize(opcode.global_opcode(), [rd, rs1, rs2, 1, 2]),
     );
 
     let output = sha256_solve(message);
     assert_eq!(
         output.map(F::from_canonical_u8),
-        tester.read::<32>(2, dst_ptr as usize)
+        tester.read::<32>(RV32_MEMORY_AS as usize, dst_ptr)
     );
 }
 
@@ -72,30 +123,62 @@ fn set_and_execute(
 ///////////////////////////////////////////////////////////////////////////////////////
 #[test]
 fn rand_sha256_test() {
-    setup_tracing();
     let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let mut chip = Sha256VmChip::new(
-        SystemPort {
-            execution_bus: tester.execution_bus(),
-            program_bus: tester.program_bus(),
-            memory_bridge: tester.memory_bridge(),
-        },
-        tester.address_bits(),
-        bitwise_chip.clone(),
-        BUS_IDX,
-        Rv32Sha256Opcode::CLASS_OFFSET,
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, bitwise) = create_test_chips(&mut tester);
 
-    let num_tests: usize = 3;
-    for _ in 0..num_tests {
-        set_and_execute(&mut tester, &mut chip, &mut rng, SHA256, None, None);
+    let num_ops: usize = 10;
+    for _ in 0..num_ops {
+        set_and_execute(&mut tester, &mut harness, &mut rng, SHA256, None, None);
+    }
+
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
+}
+
+#[test]
+fn sha256_edge_test_lengths() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut harness, bitwise) = create_test_chips(&mut tester);
+
+    let test_vectors = [
+        ("", "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"),
+        (
+            "98c1c0bdb7d5fea9a88859f06c6c439f",
+            "b6b2c9c9b6f30e5c66c977f1bd7ad97071bee739524aecf793384890619f2b05",
+        ),
+        ("5b58f4163e248467cc1cd3eecafe749e8e2baaf82c0f63af06df0526347d7a11327463c115210a46b6740244eddf370be89c", "ac0e25049870b91d78ef6807bb87fce4603c81abd3c097fba2403fd18b6ce0b7"),
+        ("9ad198539e3160194f38ac076a782bd5210a007560d1fce9ef78f8a4a5e4d78c6b96c250cff3520009036e9c6087d5dab587394edda862862013de49a12072485a6c01165ec0f28ffddf1873fbd53e47fcd02fb6a5ccc9622d5588a92429c663ce298cb71b50022fc2ec4ba9f5bbd250974e1a607b165fee16e8f3f2be20d7348b91a2f518ce928491900d56d9f86970611580350cee08daea7717fe28a73b8dcfdea22a65ed9f5a09198de38e4e4f2cc05b0ba3dd787a5363ab6c9f39dcb66c1a29209b1d6b1152769395df8150b4316658ea6ab19af94903d643fcb0ae4d598035ebe73c8b1b687df1ab16504f633c929569c6d0e5fae6eea43838fbc8ce2c2b43161d0addc8ccf945a9c4e06294e56a67df0000f561f61b630b1983ba403e775aaeefa8d339f669d1e09ead7eae979383eda983321e1743e5404b4b328da656de79ff52d179833a6bd5129f49432d74d001996c37c68d9ab49fcff8061d193576f396c20e1f0d9ee83a51290ba60efa9c3cb2e15b756321a7ca668cdbf63f95ec33b1c450aa100101be059dc00077245b25a6a66698dee81953ed4a606944076e2858b1420de0095a7f60b08194d6d9a997009d345c71f63a7034b976e409af8a9a040ac7113664609a7adedb76b2fadf04b0348392a1650526eb2a4d6ed5e4bbcda8aabc8488b38f4f5d9a398103536bb8250ed82a9b9825f7703c263f9e", "080ad71239852124fc26758982090611b9b19abf22d22db3a57f67a06e984a23")
+    ];
+
+    for (input, _) in test_vectors.iter() {
+        let input = Vec::from_hex(input).unwrap();
+
+        set_and_execute(
+            &mut tester,
+            &mut harness,
+            &mut rng,
+            SHA256,
+            Some(&input),
+            None,
+        );
     }
 
-    let tester = tester.build().load(chip).load(bitwise_chip).finalize();
+    // check every possible input length modulo 64
+    for i in 65..=128 {
+        set_and_execute(&mut tester, &mut harness, &mut rng, SHA256, None, Some(i));
+    }
+
+    let tester = tester
+        .build()
+        .load(harness)
+        .load_periphery(bitwise)
+        .finalize();
     tester.simple_test().expect("Verification failed");
 }
 
@@ -108,20 +191,7 @@ fn rand_sha256_test() {
 fn execute_roundtrip_sanity_test() {
     let mut rng = create_seeded_rng();
     let mut tester = VmChipTestBuilder::default();
-    let bitwise_bus = BitwiseOperationLookupBus::new(BITWISE_OP_LOOKUP_BUS);
-    let bitwise_chip = SharedBitwiseOperationLookupChip::<RV32_CELL_BITS>::new(bitwise_bus);
-    let mut chip = Sha256VmChip::new(
-        SystemPort {
-            execution_bus: tester.execution_bus(),
-            program_bus: tester.program_bus(),
-            memory_bridge: tester.memory_bridge(),
-        },
-        tester.address_bits(),
-        bitwise_chip.clone(),
-        BUS_IDX,
-        Rv32Sha256Opcode::CLASS_OFFSET,
-        tester.offline_memory_mutex_arc(),
-    );
+    let (mut harness, _) = create_test_chips::<MatrixRecordArena<F>>(&mut tester);
 
     println!(
         "Sha256VmDigestCols::width(): {}",
@@ -133,7 +203,7 @@ fn execute_roundtrip_sanity_test() {
     );
     let num_tests: usize = 1;
     for _ in 0..num_tests {
-        set_and_execute(&mut tester, &mut chip, &mut rng, SHA256, None, None);
+        set_and_execute(&mut tester, &mut harness, &mut rng, SHA256, None, None);
     }
 }
 
@@ -147,3 +217,47 @@ fn sha256_solve_sanity_check() {
     ];
     assert_eq!(output, expected);
 }
+
+///////////////////////////////////////////////////////////////////////////////////////
+/// DENSE TESTS
+///
+/// Ensure that the chip works as expected with dense records.
+/// We first execute some instructions with a [DenseRecordArena] and transfer the records
+/// to a [MatrixRecordArena]. After transferring we generate the trace and make sure that
+/// all the constraints pass.
+///////////////////////////////////////////////////////////////////////////////////////
+
+#[test]
+fn dense_record_arena_test() {
+    let mut rng = create_seeded_rng();
+    let mut tester = VmChipTestBuilder::default();
+    let (mut sparse_harness, bitwise) = create_test_chips(&mut tester);
+
+    {
+        let mut dense_harness = create_test_chips::<DenseRecordArena>(&mut tester).0;
+
+        let num_ops: usize = 10;
+        for _ in 0..num_ops {
+            set_and_execute(
+                &mut tester,
+                &mut dense_harness,
+                &mut rng,
+                SHA256,
+                None,
+                None,
+            );
+        }
+
+        let mut record_interpreter = dense_harness
+            .arena
+            .get_record_seeker::<_, Sha256VmRecordLayout>();
+        record_interpreter.transfer_to_matrix_arena(&mut sparse_harness.arena);
+    }
+
+    let tester = tester
+        .build()
+        .load(sparse_harness)
+        .load_periphery(bitwise)
+        .finalize();
+    tester.simple_test().expect("Verification failed");
+}
diff --git a/extensions/sha256/circuit/src/sha256_chip/trace.rs b/extensions/sha256/circuit/src/sha256_chip/trace.rs
index c02cd00dd8..c257b5a47e 100644
--- a/extensions/sha256/circuit/src/sha256_chip/trace.rs
+++ b/extensions/sha256/circuit/src/sha256_chip/trace.rs
@@ -1,351 +1,594 @@
-use std::{array, borrow::BorrowMut, sync::Arc};
+use std::{
+    array,
+    borrow::{Borrow, BorrowMut},
+    cmp::min,
+};
 
-use openvm_circuit_primitives::utils::next_power_of_two_or_zero;
-use openvm_instructions::riscv::{RV32_CELL_BITS, RV32_REGISTER_NUM_LIMBS};
-use openvm_rv32im_circuit::adapters::compose;
+use openvm_circuit::{
+    arch::*,
+    system::memory::{
+        offline_checker::{MemoryReadAuxRecord, MemoryWriteBytesAuxRecord},
+        online::TracingMemory,
+        MemoryAuxColsFactory,
+    },
+};
+use openvm_circuit_primitives::AlignedBytesBorrow;
+use openvm_instructions::{
+    instruction::Instruction,
+    program::DEFAULT_PC_STEP,
+    riscv::{RV32_CELL_BITS, RV32_MEMORY_AS, RV32_REGISTER_AS, RV32_REGISTER_NUM_LIMBS},
+    LocalOpcode,
+};
+use openvm_rv32im_circuit::adapters::{read_rv32_register, tracing_read, tracing_write};
 use openvm_sha256_air::{
-    get_flag_pt_array, limbs_into_u32, Sha256Air, SHA256_BLOCK_WORDS, SHA256_BUFFER_SIZE, SHA256_H,
-    SHA256_HASH_WORDS, SHA256_ROWS_PER_BLOCK, SHA256_WORD_U8S,
+    get_flag_pt_array, get_sha256_num_blocks, Sha256FillerHelper, SHA256_BLOCK_BITS, SHA256_H,
+    SHA256_ROWS_PER_BLOCK,
 };
+use openvm_sha256_transpiler::Rv32Sha256Opcode;
 use openvm_stark_backend::{
-    config::{StarkGenericConfig, Val},
-    p3_air::BaseAir,
-    p3_field::{FieldAlgebra, PrimeField32},
-    p3_matrix::dense::RowMajorMatrix,
+    p3_field::PrimeField32,
+    p3_matrix::{dense::RowMajorMatrix, Matrix},
     p3_maybe_rayon::prelude::*,
-    prover::types::AirProofInput,
-    rap::get_air_name,
-    AirRef, Chip, ChipUsageGetter,
 };
 
 use super::{
-    Sha256VmChip, Sha256VmDigestCols, Sha256VmRoundCols, SHA256VM_CONTROL_WIDTH,
-    SHA256VM_DIGEST_WIDTH, SHA256VM_ROUND_WIDTH,
+    Sha256VmDigestCols, Sha256VmExecutor, Sha256VmRoundCols, SHA256VM_CONTROL_WIDTH,
+    SHA256VM_DIGEST_WIDTH,
 };
 use crate::{
-    sha256_chip::{PaddingFlags, SHA256_READ_SIZE},
-    SHA256_BLOCK_CELLS,
+    sha256_chip::{PaddingFlags, SHA256_READ_SIZE, SHA256_REGISTER_READS, SHA256_WRITE_SIZE},
+    sha256_solve, Sha256VmControlCols, Sha256VmFiller, SHA256VM_ROUND_WIDTH, SHA256VM_WIDTH,
+    SHA256_BLOCK_CELLS, SHA256_MAX_MESSAGE_LEN, SHA256_NUM_READ_ROWS,
 };
 
-impl<SC: StarkGenericConfig> Chip<SC> for Sha256VmChip<Val<SC>>
+#[derive(Clone, Copy)]
+pub struct Sha256VmMetadata {
+    pub num_blocks: u32,
+}
+
+impl MultiRowMetadata for Sha256VmMetadata {
+    #[inline(always)]
+    fn get_num_rows(&self) -> usize {
+        self.num_blocks as usize * SHA256_ROWS_PER_BLOCK
+    }
+}
+
+pub(crate) type Sha256VmRecordLayout = MultiRowLayout<Sha256VmMetadata>;
+
+#[repr(C)]
+#[derive(AlignedBytesBorrow, Debug, Clone)]
+pub struct Sha256VmRecordHeader {
+    pub from_pc: u32,
+    pub timestamp: u32,
+    pub rd_ptr: u32,
+    pub rs1_ptr: u32,
+    pub rs2_ptr: u32,
+    pub dst_ptr: u32,
+    pub src_ptr: u32,
+    pub len: u32,
+
+    pub register_reads_aux: [MemoryReadAuxRecord; SHA256_REGISTER_READS],
+    pub write_aux: MemoryWriteBytesAuxRecord<SHA256_WRITE_SIZE>,
+}
+
+pub struct Sha256VmRecordMut<'a> {
+    pub inner: &'a mut Sha256VmRecordHeader,
+    // Having a continuous slice of the input is useful for fast hashing in `execute`
+    pub input: &'a mut [u8],
+    pub read_aux: &'a mut [MemoryReadAuxRecord],
+}
+
+/// Custom borrowing that splits the buffer into a fixed `Sha256VmRecord` header
+/// followed by a slice of `u8`'s of length `SHA256_BLOCK_CELLS * num_blocks` where `num_blocks` is
+/// provided at runtime, followed by a slice of `MemoryReadAuxRecord`'s of length
+/// `SHA256_NUM_READ_ROWS * num_blocks`. Uses `align_to_mut()` to make sure the slice is properly
+/// aligned to `MemoryReadAuxRecord`. Has debug assertions that check the size and alignment of the
+/// slices.
+impl<'a> CustomBorrow<'a, Sha256VmRecordMut<'a>, Sha256VmRecordLayout> for [u8] {
+    fn custom_borrow(&'a mut self, layout: Sha256VmRecordLayout) -> Sha256VmRecordMut<'a> {
+        let (header_buf, rest) =
+            unsafe { self.split_at_mut_unchecked(size_of::<Sha256VmRecordHeader>()) };
+
+        // Using `split_at_mut_unchecked` for perf reasons
+        // input is a slice of `u8`'s of length `SHA256_BLOCK_CELLS * num_blocks`, so the alignment
+        // is always satisfied
+        let (input, rest) = unsafe {
+            rest.split_at_mut_unchecked((layout.metadata.num_blocks as usize) * SHA256_BLOCK_CELLS)
+        };
+
+        // Using `align_to_mut` to make sure the returned slice is properly aligned to
+        // `MemoryReadAuxRecord` Additionally, Rust's subslice operation (a few lines below)
+        // will verify that the buffer has enough capacity
+        let (_, read_aux_buf, _) = unsafe { rest.align_to_mut::<MemoryReadAuxRecord>() };
+        Sha256VmRecordMut {
+            inner: header_buf.borrow_mut(),
+            input,
+            read_aux: &mut read_aux_buf
+                [..(layout.metadata.num_blocks as usize) * SHA256_NUM_READ_ROWS],
+        }
+    }
+
+    unsafe fn extract_layout(&self) -> Sha256VmRecordLayout {
+        let header: &Sha256VmRecordHeader = self.borrow();
+        Sha256VmRecordLayout {
+            metadata: Sha256VmMetadata {
+                num_blocks: get_sha256_num_blocks(header.len),
+            },
+        }
+    }
+}
+
+impl SizedRecord<Sha256VmRecordLayout> for Sha256VmRecordMut<'_> {
+    fn size(layout: &Sha256VmRecordLayout) -> usize {
+        let mut total_len = size_of::<Sha256VmRecordHeader>();
+        total_len += layout.metadata.num_blocks as usize * SHA256_BLOCK_CELLS;
+        // Align the pointer to the alignment of `MemoryReadAuxRecord`
+        total_len = total_len.next_multiple_of(align_of::<MemoryReadAuxRecord>());
+        total_len += layout.metadata.num_blocks as usize
+            * SHA256_NUM_READ_ROWS
+            * size_of::<MemoryReadAuxRecord>();
+        total_len
+    }
+
+    fn alignment(_layout: &Sha256VmRecordLayout) -> usize {
+        align_of::<Sha256VmRecordHeader>()
+    }
+}
+
+impl<F, RA> PreflightExecutor<F, RA> for Sha256VmExecutor
 where
-    Val<SC>: PrimeField32,
+    F: PrimeField32,
+    for<'buf> RA: RecordArena<'buf, Sha256VmRecordLayout, Sha256VmRecordMut<'buf>>,
 {
-    fn air(&self) -> AirRef<SC> {
-        Arc::new(self.air.clone())
+    fn get_opcode_name(&self, _: usize) -> String {
+        format!("{:?}", Rv32Sha256Opcode::SHA256)
     }
 
-    fn generate_air_proof_input(self) -> AirProofInput<SC> {
-        let non_padded_height = self.current_trace_height();
-        let height = next_power_of_two_or_zero(non_padded_height);
-        let width = self.trace_width();
-        let mut values = Val::<SC>::zero_vec(height * width);
-        if height == 0 {
-            return AirProofInput::simple_no_pis(RowMajorMatrix::new(values, width));
+    fn execute(
+        &self,
+        state: VmStateMut<F, TracingMemory, RA>,
+        instruction: &Instruction<F>,
+    ) -> Result<(), ExecutionError> {
+        let Instruction {
+            opcode,
+            a,
+            b,
+            c,
+            d,
+            e,
+            ..
+        } = instruction;
+        debug_assert_eq!(*opcode, Rv32Sha256Opcode::SHA256.global_opcode());
+        debug_assert_eq!(d.as_canonical_u32(), RV32_REGISTER_AS);
+        debug_assert_eq!(e.as_canonical_u32(), RV32_MEMORY_AS);
+
+        // Reading the length first to allocate a record of correct size
+        let len = read_rv32_register(state.memory.data(), c.as_canonical_u32());
+
+        let num_blocks = get_sha256_num_blocks(len);
+        let record = state.ctx.alloc(MultiRowLayout {
+            metadata: Sha256VmMetadata { num_blocks },
+        });
+
+        record.inner.from_pc = *state.pc;
+        record.inner.timestamp = state.memory.timestamp();
+        record.inner.rd_ptr = a.as_canonical_u32();
+        record.inner.rs1_ptr = b.as_canonical_u32();
+        record.inner.rs2_ptr = c.as_canonical_u32();
+
+        record.inner.dst_ptr = u32::from_le_bytes(tracing_read(
+            state.memory,
+            RV32_REGISTER_AS,
+            record.inner.rd_ptr,
+            &mut record.inner.register_reads_aux[0].prev_timestamp,
+        ));
+        record.inner.src_ptr = u32::from_le_bytes(tracing_read(
+            state.memory,
+            RV32_REGISTER_AS,
+            record.inner.rs1_ptr,
+            &mut record.inner.register_reads_aux[1].prev_timestamp,
+        ));
+        record.inner.len = u32::from_le_bytes(tracing_read(
+            state.memory,
+            RV32_REGISTER_AS,
+            record.inner.rs2_ptr,
+            &mut record.inner.register_reads_aux[2].prev_timestamp,
+        ));
+
+        // we will read [num_blocks] * [SHA256_BLOCK_CELLS] cells but only [len] cells will be used
+        debug_assert!(
+            record.inner.src_ptr as usize + num_blocks as usize * SHA256_BLOCK_CELLS
+                <= (1 << self.pointer_max_bits)
+        );
+        debug_assert!(
+            record.inner.dst_ptr as usize + SHA256_WRITE_SIZE <= (1 << self.pointer_max_bits)
+        );
+        // We don't support messages longer than 2^29 bytes
+        debug_assert!(record.inner.len < SHA256_MAX_MESSAGE_LEN as u32);
+
+        for block_idx in 0..num_blocks as usize {
+            // Reads happen on the first 4 rows of each block
+            for row in 0..SHA256_NUM_READ_ROWS {
+                let read_idx = block_idx * SHA256_NUM_READ_ROWS + row;
+                let row_input: [u8; SHA256_READ_SIZE] = tracing_read(
+                    state.memory,
+                    RV32_MEMORY_AS,
+                    record.inner.src_ptr + (read_idx * SHA256_READ_SIZE) as u32,
+                    &mut record.read_aux[read_idx].prev_timestamp,
+                );
+                record.input[read_idx * SHA256_READ_SIZE..(read_idx + 1) * SHA256_READ_SIZE]
+                    .copy_from_slice(&row_input);
+            }
+        }
+
+        let output = sha256_solve(&record.input[..len as usize]);
+        tracing_write(
+            state.memory,
+            RV32_MEMORY_AS,
+            record.inner.dst_ptr,
+            output,
+            &mut record.inner.write_aux.prev_timestamp,
+            &mut record.inner.write_aux.prev_data,
+        );
+
+        *state.pc = state.pc.wrapping_add(DEFAULT_PC_STEP);
+
+        Ok(())
+    }
+}
+
+impl<F: PrimeField32> TraceFiller<F> for Sha256VmFiller {
+    fn fill_trace(
+        &self,
+        mem_helper: &MemoryAuxColsFactory<F>,
+        trace_matrix: &mut RowMajorMatrix<F>,
+        rows_used: usize,
+    ) {
+        if rows_used == 0 {
+            return;
         }
-        let records = self.records;
-        let offline_memory = self.offline_memory.lock().unwrap();
-        let memory_aux_cols_factory = offline_memory.aux_cols_factory();
-
-        let mem_ptr_shift: u32 =
-            1 << (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS - self.air.ptr_max_bits);
-
-        let mut states = Vec::with_capacity(height.div_ceil(SHA256_ROWS_PER_BLOCK));
-        let mut global_block_idx = 0;
-        for (record_idx, record) in records.iter().enumerate() {
-            let dst_read = offline_memory.record_by_id(record.dst_read);
-            let src_read = offline_memory.record_by_id(record.src_read);
-            let len_read = offline_memory.record_by_id(record.len_read);
-
-            self.bitwise_lookup_chip.request_range(
-                dst_read
-                    .data_at(RV32_REGISTER_NUM_LIMBS - 1)
-                    .as_canonical_u32()
-                    * mem_ptr_shift,
-                src_read
-                    .data_at(RV32_REGISTER_NUM_LIMBS - 1)
-                    .as_canonical_u32()
-                    * mem_ptr_shift,
-            );
-            let len = compose(len_read.data_slice().try_into().unwrap());
-            let mut state = &None;
-            for (i, input_message) in record.input_message.iter().enumerate() {
-                let input_message = input_message
-                    .iter()
-                    .flatten()
-                    .copied()
-                    .collect::<Vec<_>>()
-                    .try_into()
-                    .unwrap();
-                states.push(Some(Self::generate_state(
-                    state,
-                    input_message,
-                    record_idx,
-                    len,
-                    i == record.input_records.len() - 1,
-                )));
-                state = &states[global_block_idx];
-                global_block_idx += 1;
+
+        let mut chunks = Vec::with_capacity(trace_matrix.height() / SHA256_ROWS_PER_BLOCK);
+        let mut sizes = Vec::with_capacity(trace_matrix.height() / SHA256_ROWS_PER_BLOCK);
+        let mut trace = &mut trace_matrix.values[..];
+        let mut num_blocks_so_far = 0;
+
+        // First pass over the trace to get the number of blocks for each instruction
+        // and divide the matrix into chunks of needed sizes
+        loop {
+            if num_blocks_so_far * SHA256_ROWS_PER_BLOCK >= rows_used {
+                // Push all the padding rows as a single chunk and break
+                chunks.push(trace);
+                sizes.push((0, num_blocks_so_far));
+                break;
+            } else {
+                let record: &Sha256VmRecordHeader =
+                    unsafe { get_record_from_slice(&mut trace, ()) };
+                let num_blocks = ((record.len << 3) as usize + 1 + 64).div_ceil(SHA256_BLOCK_BITS);
+                let (chunk, rest) =
+                    trace.split_at_mut(SHA256VM_WIDTH * SHA256_ROWS_PER_BLOCK * num_blocks);
+                chunks.push(chunk);
+                sizes.push((num_blocks, num_blocks_so_far));
+                num_blocks_so_far += num_blocks;
+                trace = rest;
             }
         }
-        states.extend(std::iter::repeat_n(
-            None,
-            (height - non_padded_height).div_ceil(SHA256_ROWS_PER_BLOCK),
-        ));
 
         // During the first pass we will fill out most of the matrix
         // But there are some cells that can't be generated by the first pass so we will do a second
-        // pass over the matrix
-        values
-            .par_chunks_mut(width * SHA256_ROWS_PER_BLOCK)
-            .zip(states.into_par_iter().enumerate())
-            .for_each(|(block, (global_block_idx, state))| {
-                // Fill in a valid block
-                if let Some(state) = state {
-                    let mut has_padding_occurred =
-                        state.local_block_idx * SHA256_BLOCK_CELLS > state.message_len as usize;
-                    let message_left = if has_padding_occurred {
-                        0
-                    } else {
-                        state.message_len as usize - state.local_block_idx * SHA256_BLOCK_CELLS
-                    };
-                    let is_last_block = state.is_last_block;
-                    let buffer: [[Val<SC>; SHA256_BUFFER_SIZE]; 4] = array::from_fn(|j| {
-                        array::from_fn(|k| {
-                            Val::<SC>::from_canonical_u8(
-                                state.block_input_message[j * SHA256_BUFFER_SIZE + k],
-                            )
-                        })
+        // pass over the matrix later
+        chunks.par_iter_mut().zip(sizes.par_iter()).for_each(
+            |(slice, (num_blocks, global_block_offset))| {
+                if global_block_offset * SHA256_ROWS_PER_BLOCK >= rows_used {
+                    // Fill in the invalid rows
+                    slice.par_chunks_mut(SHA256VM_WIDTH).for_each(|row| {
+                        // Need to get rid of the accidental garbage data that might overflow the
+                        // F's prime field. Unfortunately, there is no good way around this
+                        unsafe {
+                            std::ptr::write_bytes(
+                                row.as_mut_ptr() as *mut u8,
+                                0,
+                                SHA256VM_WIDTH * size_of::<F>(),
+                            );
+                        }
+                        let cols: &mut Sha256VmRoundCols<F> =
+                            row[..SHA256VM_ROUND_WIDTH].borrow_mut();
+                        self.inner.generate_default_row(&mut cols.inner);
                     });
+                    return;
+                }
 
-                    let padded_message: [u32; SHA256_BLOCK_WORDS] = array::from_fn(|j| {
-                        limbs_into_u32::<RV32_REGISTER_NUM_LIMBS>(array::from_fn(|k| {
-                            state.block_padded_message[(j + 1) * SHA256_WORD_U8S - k - 1] as u32
-                        }))
-                    });
+                let record: Sha256VmRecordMut = unsafe {
+                    get_record_from_slice(
+                        slice,
+                        Sha256VmRecordLayout {
+                            metadata: Sha256VmMetadata {
+                                num_blocks: *num_blocks as u32,
+                            },
+                        },
+                    )
+                };
 
-                    self.air.sha256_subair.generate_block_trace::<Val<SC>>(
-                        block,
-                        width,
-                        SHA256VM_CONTROL_WIDTH,
-                        &padded_message,
-                        self.bitwise_lookup_chip.clone(),
-                        &state.hash,
-                        is_last_block,
-                        global_block_idx as u32 + 1,
-                        state.local_block_idx as u32,
-                        &buffer,
-                    );
-
-                    let block_reads = records[state.message_idx].input_records
-                        [state.local_block_idx]
-                        .map(|record_id| offline_memory.record_by_id(record_id));
-
-                    let mut read_ptr = block_reads[0].pointer;
-                    let mut cur_timestamp = Val::<SC>::from_canonical_u32(block_reads[0].timestamp);
-
-                    let read_size = Val::<SC>::from_canonical_usize(SHA256_READ_SIZE);
-                    for row in 0..SHA256_ROWS_PER_BLOCK {
-                        let row_slice = &mut block[row * width..(row + 1) * width];
-                        if row < 16 {
-                            let cols: &mut Sha256VmRoundCols<Val<SC>> =
-                                row_slice[..SHA256VM_ROUND_WIDTH].borrow_mut();
-                            cols.control.len = Val::<SC>::from_canonical_u32(state.message_len);
-                            cols.control.read_ptr = read_ptr;
-                            cols.control.cur_timestamp = cur_timestamp;
-                            if row < 4 {
-                                read_ptr += read_size;
-                                cur_timestamp += Val::<SC>::ONE;
-                                memory_aux_cols_factory
-                                    .generate_read_aux(block_reads[row], &mut cols.read_aux);
-
-                                if (row + 1) * SHA256_READ_SIZE <= message_left {
-                                    cols.control.pad_flags = get_flag_pt_array(
-                                        &self.air.padding_encoder,
-                                        PaddingFlags::NotPadding as usize,
-                                    )
-                                    .map(Val::<SC>::from_canonical_u32);
-                                } else if !has_padding_occurred {
-                                    has_padding_occurred = true;
-                                    let len = message_left - row * SHA256_READ_SIZE;
-                                    cols.control.pad_flags = get_flag_pt_array(
-                                        &self.air.padding_encoder,
-                                        if row == 3 && is_last_block {
-                                            PaddingFlags::FirstPadding0_LastRow
-                                        } else {
-                                            PaddingFlags::FirstPadding0
-                                        } as usize
-                                            + len,
-                                    )
-                                    .map(Val::<SC>::from_canonical_u32);
-                                } else {
-                                    cols.control.pad_flags = get_flag_pt_array(
-                                        &self.air.padding_encoder,
-                                        if row == 3 && is_last_block {
-                                            PaddingFlags::EntirePaddingLastRow
-                                        } else {
-                                            PaddingFlags::EntirePadding
-                                        } as usize,
-                                    )
-                                    .map(Val::<SC>::from_canonical_u32);
-                                }
-                            } else {
-                                cols.control.pad_flags = get_flag_pt_array(
-                                    &self.air.padding_encoder,
-                                    PaddingFlags::NotConsidered as usize,
-                                )
-                                .map(Val::<SC>::from_canonical_u32);
-                            }
-                            cols.control.padding_occurred =
-                                Val::<SC>::from_bool(has_padding_occurred);
-                        } else {
-                            if is_last_block {
-                                has_padding_occurred = false;
-                            }
-                            let cols: &mut Sha256VmDigestCols<Val<SC>> =
-                                row_slice[..SHA256VM_DIGEST_WIDTH].borrow_mut();
-                            cols.control.len = Val::<SC>::from_canonical_u32(state.message_len);
-                            cols.control.read_ptr = read_ptr;
-                            cols.control.cur_timestamp = cur_timestamp;
-                            cols.control.pad_flags = get_flag_pt_array(
-                                &self.air.padding_encoder,
-                                PaddingFlags::NotConsidered as usize,
-                            )
-                            .map(Val::<SC>::from_canonical_u32);
-                            if is_last_block {
-                                let record = &records[state.message_idx];
-                                let dst_read = offline_memory.record_by_id(record.dst_read);
-                                let src_read = offline_memory.record_by_id(record.src_read);
-                                let len_read = offline_memory.record_by_id(record.len_read);
-                                let digest_write = offline_memory.record_by_id(record.digest_write);
-                                cols.from_state = record.from_state;
-                                cols.rd_ptr = dst_read.pointer;
-                                cols.rs1_ptr = src_read.pointer;
-                                cols.rs2_ptr = len_read.pointer;
-                                cols.dst_ptr.copy_from_slice(dst_read.data_slice());
-                                cols.src_ptr.copy_from_slice(src_read.data_slice());
-                                cols.len_data.copy_from_slice(len_read.data_slice());
-                                memory_aux_cols_factory
-                                    .generate_read_aux(dst_read, &mut cols.register_reads_aux[0]);
-                                memory_aux_cols_factory
-                                    .generate_read_aux(src_read, &mut cols.register_reads_aux[1]);
-                                memory_aux_cols_factory
-                                    .generate_read_aux(len_read, &mut cols.register_reads_aux[2]);
-                                memory_aux_cols_factory
-                                    .generate_write_aux(digest_write, &mut cols.writes_aux);
-                            }
-                            cols.control.padding_occurred =
-                                Val::<SC>::from_bool(has_padding_occurred);
-                        }
-                    }
-                }
-                // Fill in the invalid rows
-                else {
-                    block.par_chunks_mut(width).for_each(|row| {
-                        let cols: &mut Sha256VmRoundCols<Val<SC>> = row.borrow_mut();
-                        self.air.sha256_subair.generate_default_row(&mut cols.inner);
-                    })
+                let mut input: Vec<u8> = Vec::with_capacity(SHA256_BLOCK_CELLS * num_blocks);
+                input.extend_from_slice(record.input);
+                let mut padded_input = input.clone();
+                let len = record.inner.len as usize;
+                let padded_input_len = padded_input.len();
+                padded_input[len] = 1 << (RV32_CELL_BITS - 1);
+                padded_input[len + 1..padded_input_len - 4].fill(0);
+                padded_input[padded_input_len - 4..]
+                    .copy_from_slice(&((len as u32) << 3).to_be_bytes());
+
+                let mut prev_hashes = Vec::with_capacity(*num_blocks);
+                prev_hashes.push(SHA256_H);
+                for i in 0..*num_blocks - 1 {
+                    prev_hashes.push(Sha256FillerHelper::get_block_hash(
+                        &prev_hashes[i],
+                        padded_input[i * SHA256_BLOCK_CELLS..(i + 1) * SHA256_BLOCK_CELLS]
+                            .try_into()
+                            .unwrap(),
+                    ));
                 }
-            });
+                // Copy the read aux records and input to another place to safely fill in the trace
+                // matrix without overwriting the record
+                let mut read_aux_records = Vec::with_capacity(SHA256_NUM_READ_ROWS * num_blocks);
+                read_aux_records.extend_from_slice(record.read_aux);
+                let vm_record = record.inner.clone();
+
+                slice
+                    .par_chunks_exact_mut(SHA256VM_WIDTH * SHA256_ROWS_PER_BLOCK)
+                    .enumerate()
+                    .for_each(|(block_idx, block_slice)| {
+                        // Need to get rid of the accidental garbage data that might overflow the
+                        // F's prime field. Unfortunately, there is no good way around this
+                        unsafe {
+                            std::ptr::write_bytes(
+                                block_slice.as_mut_ptr() as *mut u8,
+                                0,
+                                SHA256_ROWS_PER_BLOCK * SHA256VM_WIDTH * size_of::<F>(),
+                            );
+                        }
+                        self.fill_block_trace::<F>(
+                            block_slice,
+                            &vm_record,
+                            &read_aux_records[block_idx * SHA256_NUM_READ_ROWS
+                                ..(block_idx + 1) * SHA256_NUM_READ_ROWS],
+                            &input[block_idx * SHA256_BLOCK_CELLS
+                                ..(block_idx + 1) * SHA256_BLOCK_CELLS],
+                            &padded_input[block_idx * SHA256_BLOCK_CELLS
+                                ..(block_idx + 1) * SHA256_BLOCK_CELLS],
+                            block_idx == *num_blocks - 1,
+                            *global_block_offset + block_idx,
+                            block_idx,
+                            prev_hashes[block_idx],
+                            mem_helper,
+                        );
+                    });
+            },
+        );
 
         // Do a second pass over the trace to fill in the missing values
         // Note, we need to skip the very first row
-        values[width..]
-            .par_chunks_mut(width * SHA256_ROWS_PER_BLOCK)
-            .take(non_padded_height / SHA256_ROWS_PER_BLOCK)
+        trace_matrix.values[SHA256VM_WIDTH..]
+            .par_chunks_mut(SHA256VM_WIDTH * SHA256_ROWS_PER_BLOCK)
+            .take(rows_used / SHA256_ROWS_PER_BLOCK)
             .for_each(|chunk| {
-                self.air
-                    .sha256_subair
-                    .generate_missing_cells(chunk, width, SHA256VM_CONTROL_WIDTH);
+                self.inner
+                    .generate_missing_cells(chunk, SHA256VM_WIDTH, SHA256VM_CONTROL_WIDTH);
             });
-
-        AirProofInput::simple_no_pis(RowMajorMatrix::new(values, width))
     }
 }
 
-impl<F: PrimeField32> ChipUsageGetter for Sha256VmChip<F> {
-    fn air_name(&self) -> String {
-        get_air_name(&self.air)
-    }
-    fn current_trace_height(&self) -> usize {
-        self.records.iter().fold(0, |acc, record| {
-            acc + record.input_records.len() * SHA256_ROWS_PER_BLOCK
-        })
-    }
+impl Sha256VmFiller {
+    #[allow(clippy::too_many_arguments)]
+    fn fill_block_trace<F: PrimeField32>(
+        &self,
+        block_slice: &mut [F],
+        record: &Sha256VmRecordHeader,
+        read_aux_records: &[MemoryReadAuxRecord],
+        input: &[u8],
+        padded_input: &[u8],
+        is_last_block: bool,
+        global_block_idx: usize,
+        local_block_idx: usize,
+        prev_hash: [u32; 8],
+        mem_helper: &MemoryAuxColsFactory<F>,
+    ) {
+        debug_assert_eq!(input.len(), SHA256_BLOCK_CELLS);
+        debug_assert_eq!(padded_input.len(), SHA256_BLOCK_CELLS);
+        debug_assert_eq!(read_aux_records.len(), SHA256_NUM_READ_ROWS);
 
-    fn trace_width(&self) -> usize {
-        BaseAir::<F>::width(&self.air)
-    }
-}
+        let padded_input = array::from_fn(|i| {
+            u32::from_be_bytes(padded_input[i * 4..(i + 1) * 4].try_into().unwrap())
+        });
 
-/// This is the state information that a block will use to generate its trace
-#[derive(Debug, Clone)]
-struct Sha256State {
-    hash: [u32; SHA256_HASH_WORDS],
-    local_block_idx: usize,
-    message_len: u32,
-    block_input_message: [u8; SHA256_BLOCK_CELLS],
-    block_padded_message: [u8; SHA256_BLOCK_CELLS],
-    message_idx: usize,
-    is_last_block: bool,
-}
+        let block_start_timestamp = record.timestamp
+            + (SHA256_REGISTER_READS + SHA256_NUM_READ_ROWS * local_block_idx) as u32;
 
-impl<F: PrimeField32> Sha256VmChip<F> {
-    fn generate_state(
-        prev_state: &Option<Sha256State>,
-        block_input_message: [u8; SHA256_BLOCK_CELLS],
-        message_idx: usize,
-        message_len: u32,
-        is_last_block: bool,
-    ) -> Sha256State {
-        let local_block_idx = if let Some(prev_state) = prev_state {
-            prev_state.local_block_idx + 1
-        } else {
+        let read_cells = (SHA256_BLOCK_CELLS * local_block_idx) as u32;
+        let block_start_read_ptr = record.src_ptr + read_cells;
+
+        let message_left = if record.len <= read_cells {
             0
+        } else {
+            (record.len - read_cells) as usize
         };
-        let has_padding_occurred = local_block_idx * SHA256_BLOCK_CELLS > message_len as usize;
-        let message_left = if has_padding_occurred {
-            0
+
+        // -1 means that padding occurred before the start of the block
+        // 18 means that no padding occurred on this block
+        let first_padding_row = if record.len < read_cells {
+            -1
+        } else if message_left < SHA256_BLOCK_CELLS {
+            (message_left / SHA256_READ_SIZE) as i32
         } else {
-            message_len as usize - local_block_idx * SHA256_BLOCK_CELLS
+            18
         };
 
-        let padded_message_bytes: [u8; SHA256_BLOCK_CELLS] = array::from_fn(|j| {
-            if j < message_left {
-                block_input_message[j]
-            } else if j == message_left && !has_padding_occurred {
-                1 << (RV32_CELL_BITS - 1)
-            } else if !is_last_block || j < SHA256_BLOCK_CELLS - 4 {
-                0u8
-            } else {
-                let shift_amount = (SHA256_BLOCK_CELLS - j - 1) * RV32_CELL_BITS;
-                ((message_len * RV32_CELL_BITS as u32)
-                    .checked_shr(shift_amount as u32)
-                    .unwrap_or(0)
-                    & ((1 << RV32_CELL_BITS) - 1)) as u8
-            }
-        });
+        // Fill in the VM columns first because the inner `carry_or_buffer` needs to be filled in
+        block_slice
+            .par_chunks_exact_mut(SHA256VM_WIDTH)
+            .enumerate()
+            .for_each(|(row_idx, row_slice)| {
+                // Handle round rows and digest row separately
+                if row_idx == SHA256_ROWS_PER_BLOCK - 1 {
+                    // This is a digest row
+                    let digest_cols: &mut Sha256VmDigestCols<F> =
+                        row_slice[..SHA256VM_DIGEST_WIDTH].borrow_mut();
+                    digest_cols.from_state.timestamp = F::from_canonical_u32(record.timestamp);
+                    digest_cols.from_state.pc = F::from_canonical_u32(record.from_pc);
+                    digest_cols.rd_ptr = F::from_canonical_u32(record.rd_ptr);
+                    digest_cols.rs1_ptr = F::from_canonical_u32(record.rs1_ptr);
+                    digest_cols.rs2_ptr = F::from_canonical_u32(record.rs2_ptr);
+                    digest_cols.dst_ptr = record.dst_ptr.to_le_bytes().map(F::from_canonical_u8);
+                    digest_cols.src_ptr = record.src_ptr.to_le_bytes().map(F::from_canonical_u8);
+                    digest_cols.len_data = record.len.to_le_bytes().map(F::from_canonical_u8);
+                    if is_last_block {
+                        digest_cols
+                            .register_reads_aux
+                            .iter_mut()
+                            .zip(record.register_reads_aux.iter())
+                            .enumerate()
+                            .for_each(|(idx, (cols_read, record_read))| {
+                                mem_helper.fill(
+                                    record_read.prev_timestamp,
+                                    record.timestamp + idx as u32,
+                                    cols_read.as_mut(),
+                                );
+                            });
+                        digest_cols
+                            .writes_aux
+                            .set_prev_data(record.write_aux.prev_data.map(F::from_canonical_u8));
+                        // In the last block we do `SHA256_NUM_READ_ROWS` reads and then write the
+                        // result thus the timestamp of the write is
+                        // `block_start_timestamp + SHA256_NUM_READ_ROWS`
+                        mem_helper.fill(
+                            record.write_aux.prev_timestamp,
+                            block_start_timestamp + SHA256_NUM_READ_ROWS as u32,
+                            digest_cols.writes_aux.as_mut(),
+                        );
+                        // Need to range check the destination and source pointers
+                        let msl_rshift: u32 =
+                            ((RV32_REGISTER_NUM_LIMBS - 1) * RV32_CELL_BITS) as u32;
+                        let msl_lshift: u32 = (RV32_REGISTER_NUM_LIMBS * RV32_CELL_BITS
+                            - self.pointer_max_bits)
+                            as u32;
+                        self.bitwise_lookup_chip.request_range(
+                            (record.dst_ptr >> msl_rshift) << msl_lshift,
+                            (record.src_ptr >> msl_rshift) << msl_lshift,
+                        );
+                    } else {
+                        // Filling in zeros to make sure the accidental garbage data doesn't
+                        // overflow the prime
+                        digest_cols.register_reads_aux.iter_mut().for_each(|aux| {
+                            mem_helper.fill_zero(aux.as_mut());
+                        });
+                        digest_cols
+                            .writes_aux
+                            .set_prev_data([F::ZERO; SHA256_WRITE_SIZE]);
+                        mem_helper.fill_zero(digest_cols.writes_aux.as_mut());
+                    }
+                    digest_cols.inner.flags.is_last_block = F::from_bool(is_last_block);
+                    digest_cols.inner.flags.is_digest_row = F::from_bool(true);
+                } else {
+                    // This is a round row
+                    let round_cols: &mut Sha256VmRoundCols<F> =
+                        row_slice[..SHA256VM_ROUND_WIDTH].borrow_mut();
+                    // Take care of the first 4 round rows (aka read rows)
+                    if row_idx < SHA256_NUM_READ_ROWS {
+                        round_cols
+                            .inner
+                            .message_schedule
+                            .carry_or_buffer
+                            .as_flattened_mut()
+                            .iter_mut()
+                            .zip(
+                                input[row_idx * SHA256_READ_SIZE..(row_idx + 1) * SHA256_READ_SIZE]
+                                    .iter(),
+                            )
+                            .for_each(|(cell, data)| {
+                                *cell = F::from_canonical_u8(*data);
+                            });
+                        mem_helper.fill(
+                            read_aux_records[row_idx].prev_timestamp,
+                            block_start_timestamp + row_idx as u32,
+                            round_cols.read_aux.as_mut(),
+                        );
+                    } else {
+                        mem_helper.fill_zero(round_cols.read_aux.as_mut());
+                    }
+                }
+                // Fill in the control cols, doesn't matter if it is a round or digest row
+                let control_cols: &mut Sha256VmControlCols<F> =
+                    row_slice[..SHA256VM_CONTROL_WIDTH].borrow_mut();
+                control_cols.len = F::from_canonical_u32(record.len);
+                // Only the first `SHA256_NUM_READ_ROWS` rows increment the timestamp and read ptr
+                control_cols.cur_timestamp = F::from_canonical_u32(
+                    block_start_timestamp + min(row_idx, SHA256_NUM_READ_ROWS) as u32,
+                );
+                control_cols.read_ptr = F::from_canonical_u32(
+                    block_start_read_ptr
+                        + (SHA256_READ_SIZE * min(row_idx, SHA256_NUM_READ_ROWS)) as u32,
+                );
 
-        if let Some(prev_state) = prev_state {
-            Sha256State {
-                hash: Sha256Air::get_block_hash(&prev_state.hash, prev_state.block_padded_message),
-                local_block_idx,
-                message_len,
-                block_input_message,
-                block_padded_message: padded_message_bytes,
-                message_idx,
-                is_last_block,
-            }
-        } else {
-            Sha256State {
-                hash: SHA256_H,
-                local_block_idx: 0,
-                message_len,
-                block_input_message,
-                block_padded_message: padded_message_bytes,
-                message_idx,
-                is_last_block,
-            }
-        }
+                // Fill in the padding flags
+                if row_idx < SHA256_NUM_READ_ROWS {
+                    #[allow(clippy::comparison_chain)]
+                    if (row_idx as i32) < first_padding_row {
+                        control_cols.pad_flags = get_flag_pt_array(
+                            &self.padding_encoder,
+                            PaddingFlags::NotPadding as usize,
+                        )
+                        .map(F::from_canonical_u32);
+                    } else if row_idx as i32 == first_padding_row {
+                        let len = message_left - row_idx * SHA256_READ_SIZE;
+                        control_cols.pad_flags = get_flag_pt_array(
+                            &self.padding_encoder,
+                            if row_idx == 3 && is_last_block {
+                                PaddingFlags::FirstPadding0_LastRow
+                            } else {
+                                PaddingFlags::FirstPadding0
+                            } as usize
+                                + len,
+                        )
+                        .map(F::from_canonical_u32);
+                    } else {
+                        control_cols.pad_flags = get_flag_pt_array(
+                            &self.padding_encoder,
+                            if row_idx == 3 && is_last_block {
+                                PaddingFlags::EntirePaddingLastRow
+                            } else {
+                                PaddingFlags::EntirePadding
+                            } as usize,
+                        )
+                        .map(F::from_canonical_u32);
+                    }
+                } else {
+                    control_cols.pad_flags = get_flag_pt_array(
+                        &self.padding_encoder,
+                        PaddingFlags::NotConsidered as usize,
+                    )
+                    .map(F::from_canonical_u32);
+                }
+                if is_last_block && row_idx == SHA256_ROWS_PER_BLOCK - 1 {
+                    // If last digest row, then we set padding_occurred = 0
+                    control_cols.padding_occurred = F::ZERO;
+                } else {
+                    control_cols.padding_occurred =
+                        F::from_bool((row_idx as i32) >= first_padding_row);
+                }
+            });
+
+        // Fill in the inner trace when the `buffer_or_carry` is filled in
+        self.inner.generate_block_trace::<F>(
+            block_slice,
+            SHA256VM_WIDTH,
+            SHA256VM_CONTROL_WIDTH,
+            &padded_input,
+            self.bitwise_lookup_chip.as_ref(),
+            &prev_hash,
+            is_last_block,
+            global_block_idx as u32 + 1, // global block index is 1-indexed
+            local_block_idx as u32,
+        );
     }
 }
diff --git a/guest-libs/ff_derive/Cargo.toml b/guest-libs/ff_derive/Cargo.toml
index 54d4628897..a4d9c24579 100644
--- a/guest-libs/ff_derive/Cargo.toml
+++ b/guest-libs/ff_derive/Cargo.toml
@@ -27,7 +27,7 @@ syn = { version = "1", features = ["full"] }
 
 [dev-dependencies]
 openvm-instructions = { workspace = true }
-openvm-stark-sdk = { workspace = true } 
+openvm-stark-sdk = { workspace = true }
 openvm-circuit = { workspace = true, features = ["test-utils", "parallel"]}
 openvm-transpiler = { workspace = true }
 openvm-algebra-transpiler = { workspace = true }
@@ -37,4 +37,3 @@ openvm-toolchain-tests = { workspace = true }
 
 eyre = { workspace = true }
 num-bigint = { workspace = true }
-
diff --git a/guest-libs/ff_derive/src/lib.rs b/guest-libs/ff_derive/src/lib.rs
index 8a64062c33..10a8b64cd2 100644
--- a/guest-libs/ff_derive/src/lib.rs
+++ b/guest-libs/ff_derive/src/lib.rs
@@ -1,4 +1,5 @@
 #![recursion_limit = "1024"]
+#![allow(clippy::manual_repeat_n)]
 
 extern crate proc_macro;
 extern crate proc_macro2;
diff --git a/guest-libs/ff_derive/tests/lib.rs b/guest-libs/ff_derive/tests/lib.rs
index 6df9a1d675..727c10831e 100644
--- a/guest-libs/ff_derive/tests/lib.rs
+++ b/guest-libs/ff_derive/tests/lib.rs
@@ -4,9 +4,9 @@ mod tests {
 
     use eyre::Result;
     use num_bigint::BigUint;
-    use openvm_algebra_circuit::Rv32ModularConfig;
+    use openvm_algebra_circuit::{Rv32ModularConfig, Rv32ModularCpuBuilder};
     use openvm_algebra_transpiler::ModularTranspilerExtension;
-    use openvm_circuit::utils::air_test;
+    use openvm_circuit::utils::{air_test, test_system_config};
     use openvm_instructions::exe::VmExe;
     use openvm_rv32im_transpiler::{
         Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
@@ -20,11 +20,18 @@ mod tests {
 
     type F = BabyBear;
 
+    #[cfg(test)]
+    fn test_rv32modular_config(moduli: Vec<BigUint>) -> Rv32ModularConfig {
+        let mut config = Rv32ModularConfig::new(moduli);
+        config.system = test_system_config();
+        config
+    }
+
     #[test]
     fn test_full_limbs() -> Result<()> {
         let moduli = ["39402006196394479212279040100143613805079739270465446667948293404245721771496870329047266088258938001861606973112319"]
         .map(|s| BigUint::from_str(s).unwrap());
-        let config = Rv32ModularConfig::new(moduli.to_vec());
+        let config = test_rv32modular_config(moduli.to_vec());
         let elf = build_example_program_at_path(
             get_programs_dir!("tests/programs"),
             "full_limbs",
@@ -39,14 +46,14 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )?;
 
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_fermat() -> Result<()> {
         let moduli = ["65537"].map(|s| BigUint::from_str(s).unwrap());
-        let config = Rv32ModularConfig::new(moduli.to_vec());
+        let config = test_rv32modular_config(moduli.to_vec());
         let elf =
             build_example_program_at_path(get_programs_dir!("tests/programs"), "fermat", &config)?;
         let openvm_exe = VmExe::from_elf(
@@ -58,14 +65,14 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )?;
 
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_sqrt() -> Result<()> {
         let moduli = ["357686312646216567629137"].map(|s| BigUint::from_str(s).unwrap());
-        let config = Rv32ModularConfig::new(moduli.to_vec());
+        let config = test_rv32modular_config(moduli.to_vec());
         let elf =
             build_example_program_at_path(get_programs_dir!("tests/programs"), "sqrt", &config)?;
         let openvm_exe = VmExe::from_elf(
@@ -77,7 +84,7 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )?;
 
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
@@ -86,7 +93,7 @@ mod tests {
         let moduli =
             ["52435875175126190479447740508185965837690552500527637822603658699938581184513"]
                 .map(|s| BigUint::from_str(s).unwrap());
-        let config = Rv32ModularConfig::new(moduli.to_vec());
+        let config = test_rv32modular_config(moduli.to_vec());
         let elf = build_example_program_at_path(
             get_programs_dir!("tests/programs"),
             "constants",
@@ -101,7 +108,7 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )?;
 
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
@@ -110,7 +117,7 @@ mod tests {
         let moduli =
             ["52435875175126190479447740508185965837690552500527637822603658699938581184513"]
                 .map(|s| BigUint::from_str(s).unwrap());
-        let config = Rv32ModularConfig::new(moduli.to_vec());
+        let config = test_rv32modular_config(moduli.to_vec());
         let elf = build_example_program_at_path(
             get_programs_dir!("tests/programs"),
             "from_u128",
@@ -125,7 +132,7 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )?;
 
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
@@ -134,7 +141,7 @@ mod tests {
         let moduli =
             ["52435875175126190479447740508185965837690552500527637822603658699938581184513"]
                 .map(|s| BigUint::from_str(s).unwrap());
-        let config = Rv32ModularConfig::new(moduli.to_vec());
+        let config = test_rv32modular_config(moduli.to_vec());
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!("tests/programs"),
             "batch_inversion",
@@ -150,7 +157,7 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )?;
 
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
@@ -159,7 +166,7 @@ mod tests {
         let moduli =
             ["52435875175126190479447740508185965837690552500527637822603658699938581184513"]
                 .map(|s| BigUint::from_str(s).unwrap());
-        let config = Rv32ModularConfig::new(moduli.to_vec());
+        let config = test_rv32modular_config(moduli.to_vec());
         let elf = build_example_program_at_path(
             get_programs_dir!("tests/programs"),
             "operations",
@@ -174,7 +181,7 @@ mod tests {
                 .with_extension(ModularTranspilerExtension),
         )?;
 
-        air_test(config, openvm_exe);
+        air_test(Rv32ModularCpuBuilder, config, openvm_exe);
         Ok(())
     }
 }
diff --git a/guest-libs/k256/Cargo.toml b/guest-libs/k256/Cargo.toml
index 362df43b6f..d4862bd79c 100644
--- a/guest-libs/k256/Cargo.toml
+++ b/guest-libs/k256/Cargo.toml
@@ -32,19 +32,18 @@ num-bigint = { workspace = true }
 [dev-dependencies]
 openvm-circuit = { workspace = true, features = ["test-utils", "parallel"] }
 openvm-transpiler.workspace = true
-openvm-algebra-circuit.workspace = true
 openvm-algebra-transpiler.workspace = true
 openvm-ecc-transpiler.workspace = true
 openvm-ecc-circuit.workspace = true
 openvm-sha256-circuit.workspace = true
 openvm-sha256-transpiler.workspace = true
-openvm-rv32im-circuit.workspace = true
 openvm-rv32im-transpiler.workspace = true
 openvm-toolchain-tests.workspace = true
 
 openvm-stark-backend.workspace = true
 openvm-stark-sdk.workspace = true
 
+rand = { workspace = true }
 serde.workspace = true
 eyre.workspace = true
 derive_more = { workspace = true, features = ["from"] }
@@ -84,4 +83,5 @@ ignored = [
     "derive_more",
     "signature",
     "once_cell",
+    "rand",
 ]
diff --git a/guest-libs/k256/tests/lib.rs b/guest-libs/k256/tests/lib.rs
index e38675aa09..87ca7f853b 100644
--- a/guest-libs/k256/tests/lib.rs
+++ b/guest-libs/k256/tests/lib.rs
@@ -2,8 +2,13 @@ mod guest_tests {
     use ecdsa_config::EcdsaConfig;
     use eyre::Result;
     use openvm_algebra_transpiler::ModularTranspilerExtension;
-    use openvm_circuit::{arch::instructions::exe::VmExe, utils::air_test};
-    use openvm_ecc_circuit::{Rv32WeierstrassConfig, SECP256K1_CONFIG};
+    use openvm_circuit::{
+        arch::instructions::exe::VmExe,
+        utils::{air_test, test_system_config},
+    };
+    use openvm_ecc_circuit::{
+        CurveConfig, Rv32WeierstrassConfig, Rv32WeierstrassCpuBuilder, SECP256K1_CONFIG,
+    };
     use openvm_ecc_transpiler::EccTranspilerExtension;
     use openvm_rv32im_transpiler::{
         Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
@@ -13,11 +18,20 @@ mod guest_tests {
     use openvm_toolchain_tests::{build_example_program_at_path, get_programs_dir};
     use openvm_transpiler::{transpiler::Transpiler, FromElf};
 
+    use crate::guest_tests::ecdsa_config::EcdsaCpuBuilder;
+
     type F = BabyBear;
 
+    #[cfg(test)]
+    fn test_rv32weierstrass_config(curves: Vec<CurveConfig>) -> Rv32WeierstrassConfig {
+        let mut config = Rv32WeierstrassConfig::new(curves);
+        *config.as_mut() = test_system_config();
+        config
+    }
+
     #[test]
     fn test_add() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![SECP256K1_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![SECP256K1_CONFIG.clone()]);
         let elf =
             build_example_program_at_path(get_programs_dir!("tests/programs"), "add", &config)?;
         let openvm_exe = VmExe::from_elf(
@@ -29,13 +43,13 @@ mod guest_tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_mul() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![SECP256K1_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![SECP256K1_CONFIG.clone()]);
         let elf =
             build_example_program_at_path(get_programs_dir!("tests/programs"), "mul", &config)?;
         let openvm_exe = VmExe::from_elf(
@@ -47,13 +61,13 @@ mod guest_tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_linear_combination() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![SECP256K1_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![SECP256K1_CONFIG.clone()]);
         let elf = build_example_program_at_path(
             get_programs_dir!("tests/programs"),
             "linear_combination",
@@ -68,62 +82,45 @@ mod guest_tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
+    // TODO[jpw]: switch to using SDK to avoid this
     mod ecdsa_config {
-        use eyre::Result;
-        use openvm_algebra_circuit::{
-            ModularExtension, ModularExtensionExecutor, ModularExtensionPeriphery,
-        };
         use openvm_circuit::{
-            arch::{InitFileGenerator, SystemConfig},
+            arch::{
+                AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena,
+                SystemConfig, VmBuilder, VmChipComplex, VmProverExtension,
+            },
             derive::VmConfig,
+            system::SystemChipInventory,
         };
         use openvm_ecc_circuit::{
-            CurveConfig, WeierstrassExtension, WeierstrassExtensionExecutor,
-            WeierstrassExtensionPeriphery,
+            CurveConfig, Rv32WeierstrassConfig, Rv32WeierstrassConfigExecutor,
+            Rv32WeierstrassCpuBuilder,
         };
-        use openvm_rv32im_circuit::{
-            Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32Io, Rv32IoExecutor, Rv32IoPeriphery, Rv32M,
-            Rv32MExecutor, Rv32MPeriphery,
+        use openvm_sha256_circuit::{Sha256, Sha256Executor, Sha2CpuProverExt};
+        use openvm_stark_backend::{
+            config::{StarkGenericConfig, Val},
+            engine::StarkEngine,
+            p3_field::PrimeField32,
+            prover::cpu::{CpuBackend, CpuDevice},
         };
-        use openvm_sha256_circuit::{Sha256, Sha256Executor, Sha256Periphery};
-        use openvm_stark_backend::p3_field::PrimeField32;
         use serde::{Deserialize, Serialize};
 
         #[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
         pub struct EcdsaConfig {
-            #[system]
-            pub system: SystemConfig,
-            #[extension]
-            pub base: Rv32I,
-            #[extension]
-            pub mul: Rv32M,
-            #[extension]
-            pub io: Rv32Io,
-            #[extension]
-            pub modular: ModularExtension,
-            #[extension]
-            pub weierstrass: WeierstrassExtension,
+            #[config(generics = true)]
+            pub weierstrass: Rv32WeierstrassConfig,
             #[extension]
             pub sha256: Sha256,
         }
 
         impl EcdsaConfig {
             pub fn new(curves: Vec<CurveConfig>) -> Self {
-                let primes: Vec<_> = curves
-                    .iter()
-                    .flat_map(|c| [c.modulus.clone(), c.scalar.clone()])
-                    .collect();
                 Self {
-                    system: SystemConfig::default().with_continuations(),
-                    base: Default::default(),
-                    mul: Default::default(),
-                    io: Default::default(),
-                    modular: ModularExtension::new(primes),
-                    weierstrass: WeierstrassExtension::new(curves),
+                    weierstrass: Rv32WeierstrassConfig::new(curves),
                     sha256: Default::default(),
                 }
             }
@@ -133,11 +130,47 @@ mod guest_tests {
             fn generate_init_file_contents(&self) -> Option<String> {
                 Some(format!(
                     "// This file is automatically generated by cargo openvm. Do not rename or edit.\n{}\n{}\n",
-                    self.modular.generate_moduli_init(),
-                    self.weierstrass.generate_sw_init()
+                    self.weierstrass.modular.modular.generate_moduli_init(),
+                    self.weierstrass.weierstrass.generate_sw_init()
                 ))
             }
         }
+
+        #[derive(Clone)]
+        pub struct EcdsaCpuBuilder;
+
+        impl<E, SC> VmBuilder<E> for EcdsaCpuBuilder
+        where
+            SC: StarkGenericConfig,
+            E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+            Val<SC>: PrimeField32,
+        {
+            type VmConfig = EcdsaConfig;
+            type SystemChipInventory = SystemChipInventory<SC>;
+            type RecordArena = MatrixRecordArena<Val<SC>>;
+
+            fn create_chip_complex(
+                &self,
+                config: &EcdsaConfig,
+                circuit: AirInventory<SC>,
+            ) -> Result<
+                VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+                ChipInventoryError,
+            > {
+                let mut chip_complex = VmBuilder::<E>::create_chip_complex(
+                    &Rv32WeierstrassCpuBuilder,
+                    &config.weierstrass,
+                    circuit,
+                )?;
+                let inventory = &mut chip_complex.inventory;
+                VmProverExtension::<E, _, _>::extend_prover(
+                    &Sha2CpuProverExt,
+                    &config.sha256,
+                    inventory,
+                )?;
+                Ok(chip_complex)
+            }
+        }
     }
 
     #[test]
@@ -156,13 +189,13 @@ mod guest_tests {
                 .with_extension(ModularTranspilerExtension)
                 .with_extension(Sha256TranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(EcdsaCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_scalar_sqrt() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![SECP256K1_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![SECP256K1_CONFIG.clone()]);
         let elf = build_example_program_at_path(
             get_programs_dir!("tests/programs"),
             "scalar_sqrt",
@@ -177,7 +210,7 @@ mod guest_tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 }
diff --git a/guest-libs/keccak256/tests/lib.rs b/guest-libs/keccak256/tests/lib.rs
index 836d158a4c..3c000d8e2b 100644
--- a/guest-libs/keccak256/tests/lib.rs
+++ b/guest-libs/keccak256/tests/lib.rs
@@ -3,7 +3,7 @@ mod tests {
     use eyre::Result;
     use openvm_circuit::utils::air_test;
     use openvm_instructions::exe::VmExe;
-    use openvm_keccak256_circuit::Keccak256Rv32Config;
+    use openvm_keccak256_circuit::{Keccak256Rv32Config, Keccak256Rv32CpuBuilder};
     use openvm_keccak256_transpiler::Keccak256TranspilerExtension;
     use openvm_rv32im_transpiler::{
         Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
@@ -27,7 +27,7 @@ mod tests {
                 .with_extension(Rv32MTranspilerExtension)
                 .with_extension(Rv32IoTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Keccak256Rv32CpuBuilder, config, openvm_exe);
         Ok(())
     }
 }
diff --git a/guest-libs/p256/Cargo.toml b/guest-libs/p256/Cargo.toml
index e54a7d22d6..3b2210f400 100644
--- a/guest-libs/p256/Cargo.toml
+++ b/guest-libs/p256/Cargo.toml
@@ -29,19 +29,18 @@ ff = { workspace = true }
 [dev-dependencies]
 openvm-circuit = { workspace = true, features = ["test-utils", "parallel"] }
 openvm-transpiler.workspace = true
-openvm-algebra-circuit.workspace = true
 openvm-algebra-transpiler.workspace = true
 openvm-ecc-transpiler.workspace = true
 openvm-ecc-circuit.workspace = true
 openvm-sha256-circuit.workspace = true
 openvm-sha256-transpiler.workspace = true
-openvm-rv32im-circuit.workspace = true
 openvm-rv32im-transpiler.workspace = true
 openvm-toolchain-tests.workspace = true
 
 openvm-stark-backend.workspace = true
 openvm-stark-sdk.workspace = true
 
+rand = { workspace = true }
 serde.workspace = true
 eyre.workspace = true
 derive_more = { workspace = true, features = ["from"] }
@@ -70,4 +69,4 @@ voprf = ["elliptic-curve/voprf"]
 num-bigint = { workspace = true }
 
 [package.metadata.cargo-shear]
-ignored = ["openvm", "serde", "num-bigint", "derive_more"]
+ignored = ["openvm", "serde", "num-bigint", "derive_more", "rand"]
diff --git a/guest-libs/p256/tests/lib.rs b/guest-libs/p256/tests/lib.rs
index f11cb63325..2bfc98cbad 100644
--- a/guest-libs/p256/tests/lib.rs
+++ b/guest-libs/p256/tests/lib.rs
@@ -2,8 +2,13 @@ mod guest_tests {
     use ecdsa_config::EcdsaConfig;
     use eyre::Result;
     use openvm_algebra_transpiler::ModularTranspilerExtension;
-    use openvm_circuit::{arch::instructions::exe::VmExe, utils::air_test};
-    use openvm_ecc_circuit::{Rv32WeierstrassConfig, P256_CONFIG};
+    use openvm_circuit::{
+        arch::instructions::exe::VmExe,
+        utils::{air_test, test_system_config},
+    };
+    use openvm_ecc_circuit::{
+        CurveConfig, Rv32WeierstrassConfig, Rv32WeierstrassCpuBuilder, P256_CONFIG,
+    };
     use openvm_ecc_transpiler::EccTranspilerExtension;
     use openvm_rv32im_transpiler::{
         Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
@@ -13,11 +18,20 @@ mod guest_tests {
     use openvm_toolchain_tests::{build_example_program_at_path, get_programs_dir};
     use openvm_transpiler::{transpiler::Transpiler, FromElf};
 
+    use crate::guest_tests::ecdsa_config::EcdsaCpuBuilder;
+
     type F = BabyBear;
 
+    #[cfg(test)]
+    fn test_rv32weierstrass_config(curves: Vec<CurveConfig>) -> Rv32WeierstrassConfig {
+        let mut config = Rv32WeierstrassConfig::new(curves);
+        *config.as_mut() = test_system_config();
+        config
+    }
+
     #[test]
     fn test_add() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![P256_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![P256_CONFIG.clone()]);
         let elf =
             build_example_program_at_path(get_programs_dir!("tests/programs"), "add", &config)?;
         let openvm_exe = VmExe::from_elf(
@@ -29,13 +43,13 @@ mod guest_tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_mul() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![P256_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![P256_CONFIG.clone()]);
         let elf =
             build_example_program_at_path(get_programs_dir!("tests/programs"), "mul", &config)?;
         let openvm_exe = VmExe::from_elf(
@@ -47,13 +61,13 @@ mod guest_tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_linear_combination() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![P256_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![P256_CONFIG.clone()]);
         let elf = build_example_program_at_path(
             get_programs_dir!("tests/programs"),
             "linear_combination",
@@ -68,62 +82,45 @@ mod guest_tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
+    // TODO[jpw]: switch to using SDK to avoid this
     mod ecdsa_config {
-        use eyre::Result;
-        use openvm_algebra_circuit::{
-            ModularExtension, ModularExtensionExecutor, ModularExtensionPeriphery,
-        };
         use openvm_circuit::{
-            arch::{InitFileGenerator, SystemConfig},
+            arch::{
+                AirInventory, ChipInventoryError, InitFileGenerator, MatrixRecordArena,
+                SystemConfig, VmBuilder, VmChipComplex, VmProverExtension,
+            },
             derive::VmConfig,
+            system::SystemChipInventory,
         };
         use openvm_ecc_circuit::{
-            CurveConfig, WeierstrassExtension, WeierstrassExtensionExecutor,
-            WeierstrassExtensionPeriphery,
+            CurveConfig, Rv32WeierstrassConfig, Rv32WeierstrassConfigExecutor,
+            Rv32WeierstrassCpuBuilder,
         };
-        use openvm_rv32im_circuit::{
-            Rv32I, Rv32IExecutor, Rv32IPeriphery, Rv32Io, Rv32IoExecutor, Rv32IoPeriphery, Rv32M,
-            Rv32MExecutor, Rv32MPeriphery,
+        use openvm_sha256_circuit::{Sha256, Sha256Executor, Sha2CpuProverExt};
+        use openvm_stark_backend::{
+            config::{StarkGenericConfig, Val},
+            engine::StarkEngine,
+            p3_field::PrimeField32,
+            prover::cpu::{CpuBackend, CpuDevice},
         };
-        use openvm_sha256_circuit::{Sha256, Sha256Executor, Sha256Periphery};
-        use openvm_stark_backend::p3_field::PrimeField32;
         use serde::{Deserialize, Serialize};
 
         #[derive(Clone, Debug, VmConfig, Serialize, Deserialize)]
         pub struct EcdsaConfig {
-            #[system]
-            pub system: SystemConfig,
-            #[extension]
-            pub base: Rv32I,
-            #[extension]
-            pub mul: Rv32M,
-            #[extension]
-            pub io: Rv32Io,
-            #[extension]
-            pub modular: ModularExtension,
-            #[extension]
-            pub weierstrass: WeierstrassExtension,
+            #[config(generics = true)]
+            pub weierstrass: Rv32WeierstrassConfig,
             #[extension]
             pub sha256: Sha256,
         }
 
         impl EcdsaConfig {
             pub fn new(curves: Vec<CurveConfig>) -> Self {
-                let primes: Vec<_> = curves
-                    .iter()
-                    .flat_map(|c| [c.modulus.clone(), c.scalar.clone()])
-                    .collect();
                 Self {
-                    system: SystemConfig::default().with_continuations(),
-                    base: Default::default(),
-                    mul: Default::default(),
-                    io: Default::default(),
-                    modular: ModularExtension::new(primes),
-                    weierstrass: WeierstrassExtension::new(curves),
+                    weierstrass: Rv32WeierstrassConfig::new(curves),
                     sha256: Default::default(),
                 }
             }
@@ -133,11 +130,47 @@ mod guest_tests {
             fn generate_init_file_contents(&self) -> Option<String> {
                 Some(format!(
                     "// This file is automatically generated by cargo openvm. Do not rename or edit.\n{}\n{}\n",
-                    self.modular.generate_moduli_init(),
-                    self.weierstrass.generate_sw_init()
+                    self.weierstrass.modular.modular.generate_moduli_init(),
+                    self.weierstrass.weierstrass.generate_sw_init()
                 ))
             }
         }
+
+        #[derive(Clone)]
+        pub struct EcdsaCpuBuilder;
+
+        impl<E, SC> VmBuilder<E> for EcdsaCpuBuilder
+        where
+            SC: StarkGenericConfig,
+            E: StarkEngine<SC = SC, PB = CpuBackend<SC>, PD = CpuDevice<SC>>,
+            Val<SC>: PrimeField32,
+        {
+            type VmConfig = EcdsaConfig;
+            type SystemChipInventory = SystemChipInventory<SC>;
+            type RecordArena = MatrixRecordArena<Val<SC>>;
+
+            fn create_chip_complex(
+                &self,
+                config: &EcdsaConfig,
+                circuit: AirInventory<SC>,
+            ) -> Result<
+                VmChipComplex<SC, Self::RecordArena, E::PB, Self::SystemChipInventory>,
+                ChipInventoryError,
+            > {
+                let mut chip_complex = VmBuilder::<E>::create_chip_complex(
+                    &Rv32WeierstrassCpuBuilder,
+                    &config.weierstrass,
+                    circuit,
+                )?;
+                let inventory = &mut chip_complex.inventory;
+                VmProverExtension::<E, _, _>::extend_prover(
+                    &Sha2CpuProverExt,
+                    &config.sha256,
+                    inventory,
+                )?;
+                Ok(chip_complex)
+            }
+        }
     }
 
     #[test]
@@ -156,13 +189,13 @@ mod guest_tests {
                 .with_extension(ModularTranspilerExtension)
                 .with_extension(Sha256TranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(EcdsaCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
     #[test]
     fn test_scalar_sqrt() -> Result<()> {
-        let config = Rv32WeierstrassConfig::new(vec![P256_CONFIG.clone()]);
+        let config = test_rv32weierstrass_config(vec![P256_CONFIG.clone()]);
         let elf = build_example_program_at_path(
             get_programs_dir!("tests/programs"),
             "scalar_sqrt",
@@ -177,7 +210,7 @@ mod guest_tests {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 }
diff --git a/guest-libs/pairing/Cargo.toml b/guest-libs/pairing/Cargo.toml
index 1e0bcbc80b..cccc2bb7d5 100644
--- a/guest-libs/pairing/Cargo.toml
+++ b/guest-libs/pairing/Cargo.toml
@@ -53,6 +53,7 @@ rand.workspace = true
 num-bigint.workspace = true
 num-traits.workspace = true
 halo2curves-axiom = { workspace = true }
+openvm-pairing = { path = ".", features = ["halo2curves"] }
 
 [features]
 default = []
diff --git a/guest-libs/pairing/tests/lib.rs b/guest-libs/pairing/tests/lib.rs
index 6e55834b77..1dbac5fd28 100644
--- a/guest-libs/pairing/tests/lib.rs
+++ b/guest-libs/pairing/tests/lib.rs
@@ -9,20 +9,23 @@ mod bn254 {
         bn256::{Fq12, Fq2, Fr, G1Affine, G2Affine},
         ff::Field,
     };
-    use openvm_algebra_circuit::{Fp2Extension, ModularExtension};
+    use openvm_algebra_circuit::{Fp2Extension, Rv32ModularConfig};
     use openvm_algebra_transpiler::{Fp2TranspilerExtension, ModularTranspilerExtension};
-    use openvm_circuit::{
-        arch::SystemConfig,
-        utils::{air_test, air_test_impl, air_test_with_min_segments},
+    use openvm_circuit::utils::{
+        air_test, air_test_impl, air_test_with_min_segments, test_system_config,
+    };
+    use openvm_ecc_circuit::{
+        CurveConfig, Rv32WeierstrassConfig, Rv32WeierstrassCpuBuilder, WeierstrassExtension,
     };
-    use openvm_ecc_circuit::{Rv32WeierstrassConfig, WeierstrassExtension};
     use openvm_ecc_guest::{
         algebra::{field::FieldExtension, IntMod},
         AffinePoint,
     };
     use openvm_ecc_transpiler::EccTranspilerExtension;
     use openvm_instructions::exe::VmExe;
-    use openvm_pairing_circuit::{PairingCurve, PairingExtension, Rv32PairingConfig};
+    use openvm_pairing_circuit::{
+        PairingCurve, PairingExtension, Rv32PairingConfig, Rv32PairingCpuBuilder,
+    };
     use openvm_pairing_guest::{
         bn254::{BN254_COMPLEX_STRUCT_NAME, BN254_MODULUS},
         halo2curves_shims::bn254::Bn254,
@@ -32,7 +35,11 @@ mod bn254 {
     use openvm_rv32im_transpiler::{
         Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
     };
-    use openvm_stark_sdk::{openvm_stark_backend::p3_field::FieldAlgebra, p3_baby_bear::BabyBear};
+    use openvm_stark_sdk::{
+        config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
+        openvm_stark_backend::p3_field::FieldAlgebra,
+        p3_baby_bear::BabyBear,
+    };
     use openvm_toolchain_tests::{build_example_program_at_path_with_features, get_programs_dir};
     use openvm_transpiler::{transpiler::Transpiler, FromElf};
     use rand::SeedableRng;
@@ -48,21 +55,24 @@ mod bn254 {
             .zip(primes.clone())
             .collect::<Vec<_>>();
         Rv32PairingConfig {
-            system: SystemConfig::default().with_continuations(),
-            base: Default::default(),
-            mul: Default::default(),
-            io: Default::default(),
-            modular: ModularExtension::new(primes.to_vec()),
+            modular: Rv32ModularConfig::new(primes.to_vec()),
             fp2: Fp2Extension::new(primes_with_names),
             weierstrass: WeierstrassExtension::new(vec![]),
             pairing: PairingExtension::new(vec![PairingCurve::Bn254]),
         }
     }
 
+    #[cfg(test)]
+    fn test_rv32weierstrass_config(curves: Vec<CurveConfig>) -> Rv32WeierstrassConfig {
+        let mut config = Rv32WeierstrassConfig::new(curves);
+        *config.as_mut() = test_system_config();
+        config
+    }
+
     #[test]
     fn test_bn_ec() -> Result<()> {
         let curve = PairingCurve::Bn254.curve_config();
-        let config = Rv32WeierstrassConfig::new(vec![curve]);
+        let config = test_rv32weierstrass_config(vec![curve]);
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!("tests/programs"),
             "bn_ec",
@@ -78,7 +88,7 @@ mod bn254 {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
@@ -111,10 +121,10 @@ mod bn254 {
             .into_iter()
             .flat_map(|fp12| fp12.to_coeffs())
             .flat_map(|fp2| fp2.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io], 1);
         Ok(())
     }
 
@@ -155,7 +165,7 @@ mod bn254 {
             .chain(r0)
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         // Test mul_by_01234
@@ -167,12 +177,12 @@ mod bn254 {
             .chain(r1.to_coeffs())
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io_all], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io_all], 1);
         Ok(())
     }
 
@@ -208,7 +218,7 @@ mod bn254 {
         let io0 = [s.x, s.y, pt.x, pt.y, l.b, l.c]
             .into_iter()
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         // Test miller_double_and_add_step
@@ -216,12 +226,12 @@ mod bn254 {
         let io1 = [s.x, s.y, q.x, q.y, pt.x, pt.y, l0.b, l0.c, l1.b, l1.c]
             .into_iter()
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io_all], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io_all], 1);
         Ok(())
     }
 
@@ -260,7 +270,7 @@ mod bn254 {
         let io0 = s
             .into_iter()
             .flat_map(|pt| [pt.x, pt.y].into_iter().flat_map(|fp| fp.to_bytes()))
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io1 = q
@@ -269,12 +279,12 @@ mod bn254 {
             .chain(f.to_coeffs())
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io_all], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io_all], 1);
         Ok(())
     }
 
@@ -318,7 +328,7 @@ mod bn254 {
         let io0 = s
             .into_iter()
             .flat_map(|pt| [pt.x, pt.y].into_iter().flat_map(|fp| fp.to_bytes()))
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io1 = q
@@ -326,12 +336,12 @@ mod bn254 {
             .flat_map(|pt| [pt.x, pt.y].into_iter())
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io_all], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io_all], 1);
         Ok(())
     }
 
@@ -375,7 +385,7 @@ mod bn254 {
         let io0 = s
             .into_iter()
             .flat_map(|pt| [pt.x, pt.y].into_iter().flat_map(|fp| fp.to_bytes()))
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io1 = q
@@ -383,12 +393,20 @@ mod bn254 {
             .flat_map(|pt| [pt.x, pt.y].into_iter())
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
         // Don't run debugger because it's slow
-        air_test_impl(get_testing_config(), openvm_exe, vec![io_all], 1, false);
+        air_test_impl::<BabyBearPoseidon2Engine, _>(
+            FriParameters::new_for_testing(1),
+            Rv32PairingCpuBuilder,
+            get_testing_config(),
+            openvm_exe,
+            vec![io_all],
+            1,
+            false,
+        )?;
         Ok(())
     }
 
@@ -442,7 +460,7 @@ mod bn254 {
             .flat_map(|w| w.to_le_bytes())
             .map(F::from_canonical_u8)
             .collect();
-        air_test_with_min_segments(config, openvm_exe, vec![io], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io], 1);
         Ok(())
     }
 }
@@ -456,19 +474,23 @@ mod bls12_381 {
     };
     use num_bigint::BigUint;
     use num_traits::{self, FromPrimitive};
-    use openvm_algebra_circuit::{Fp2Extension, ModularExtension};
+    use openvm_algebra_circuit::{Fp2Extension, Rv32ModularConfig};
     use openvm_algebra_transpiler::{Fp2TranspilerExtension, ModularTranspilerExtension};
     use openvm_circuit::{
-        arch::{instructions::exe::VmExe, SystemConfig},
-        utils::{air_test, air_test_impl, air_test_with_min_segments},
+        arch::instructions::exe::VmExe,
+        utils::{air_test, air_test_impl, air_test_with_min_segments, test_system_config},
+    };
+    use openvm_ecc_circuit::{
+        CurveConfig, Rv32WeierstrassConfig, Rv32WeierstrassCpuBuilder, WeierstrassExtension,
     };
-    use openvm_ecc_circuit::{CurveConfig, Rv32WeierstrassConfig, WeierstrassExtension};
     use openvm_ecc_guest::{
         algebra::{field::FieldExtension, IntMod},
         AffinePoint,
     };
     use openvm_ecc_transpiler::EccTranspilerExtension;
-    use openvm_pairing_circuit::{PairingCurve, PairingExtension, Rv32PairingConfig};
+    use openvm_pairing_circuit::{
+        PairingCurve, PairingExtension, Rv32PairingConfig, Rv32PairingCpuBuilder,
+    };
     use openvm_pairing_guest::{
         bls12_381::{
             BLS12_381_COMPLEX_STRUCT_NAME, BLS12_381_ECC_STRUCT_NAME, BLS12_381_MODULUS,
@@ -481,7 +503,11 @@ mod bls12_381 {
     use openvm_rv32im_transpiler::{
         Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
     };
-    use openvm_stark_sdk::{openvm_stark_backend::p3_field::FieldAlgebra, p3_baby_bear::BabyBear};
+    use openvm_stark_sdk::{
+        config::{baby_bear_poseidon2::BabyBearPoseidon2Engine, FriParameters},
+        openvm_stark_backend::p3_field::FieldAlgebra,
+        p3_baby_bear::BabyBear,
+    };
     use openvm_toolchain_tests::{build_example_program_at_path_with_features, get_programs_dir};
     use openvm_transpiler::{transpiler::Transpiler, FromElf};
     use rand::SeedableRng;
@@ -497,17 +523,20 @@ mod bls12_381 {
             .zip(primes.clone())
             .collect::<Vec<_>>();
         Rv32PairingConfig {
-            system: SystemConfig::default().with_continuations(),
-            base: Default::default(),
-            mul: Default::default(),
-            io: Default::default(),
-            modular: ModularExtension::new(primes.to_vec()),
+            modular: Rv32ModularConfig::new(primes.to_vec()),
             fp2: Fp2Extension::new(primes_with_names),
             weierstrass: WeierstrassExtension::new(vec![]),
             pairing: PairingExtension::new(vec![PairingCurve::Bls12_381]),
         }
     }
 
+    #[cfg(test)]
+    fn test_rv32weierstrass_config(curves: Vec<CurveConfig>) -> Rv32WeierstrassConfig {
+        let mut config = Rv32WeierstrassConfig::new(curves);
+        *config.as_mut() = test_system_config();
+        config
+    }
+
     #[test]
     fn test_bls_ec() -> Result<()> {
         let curve = CurveConfig {
@@ -517,7 +546,7 @@ mod bls12_381 {
             a: BigUint::ZERO,
             b: BigUint::from_u8(4).unwrap(),
         };
-        let config = Rv32WeierstrassConfig::new(vec![curve]);
+        let config = test_rv32weierstrass_config(vec![curve]);
         let elf = build_example_program_at_path_with_features(
             get_programs_dir!("tests/programs"),
             "bls_ec",
@@ -533,7 +562,7 @@ mod bls12_381 {
                 .with_extension(EccTranspilerExtension)
                 .with_extension(ModularTranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Rv32WeierstrassCpuBuilder, config, openvm_exe);
         Ok(())
     }
 
@@ -566,10 +595,10 @@ mod bls12_381 {
             .into_iter()
             .flat_map(|fp12| fp12.to_coeffs())
             .flat_map(|fp2| fp2.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io], 1);
         Ok(())
     }
 
@@ -610,7 +639,7 @@ mod bls12_381 {
             .chain(r0)
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         // Test mul_by_02345
@@ -623,12 +652,12 @@ mod bls12_381 {
             .chain(r1.to_coeffs())
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io_all], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io_all], 1);
         Ok(())
     }
 
@@ -664,7 +693,7 @@ mod bls12_381 {
         let io0 = [s.x, s.y, pt.x, pt.y, l.b, l.c]
             .into_iter()
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         // Test miller_double_and_add_step
@@ -672,12 +701,12 @@ mod bls12_381 {
         let io1 = [s.x, s.y, q.x, q.y, pt.x, pt.y, l0.b, l0.c, l1.b, l1.c]
             .into_iter()
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io_all], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io_all], 1);
         Ok(())
     }
 
@@ -722,7 +751,7 @@ mod bls12_381 {
         let io0 = s
             .into_iter()
             .flat_map(|pt| [pt.x, pt.y].into_iter().flat_map(|fp| fp.to_bytes()))
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io1 = q
@@ -731,12 +760,12 @@ mod bls12_381 {
             .chain(f.to_coeffs())
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io_all], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io_all], 1);
         Ok(())
     }
 
@@ -779,7 +808,7 @@ mod bls12_381 {
         let io0 = s
             .into_iter()
             .flat_map(|pt| [pt.x, pt.y].into_iter().flat_map(|fp| fp.to_bytes()))
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io1 = q
@@ -787,12 +816,12 @@ mod bls12_381 {
             .flat_map(|pt| [pt.x, pt.y].into_iter())
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
 
-        air_test_with_min_segments(config, openvm_exe, vec![io_all], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io_all], 1);
         Ok(())
     }
 
@@ -836,7 +865,7 @@ mod bls12_381 {
         let io0 = s
             .into_iter()
             .flat_map(|pt| [pt.x, pt.y].into_iter().flat_map(|fp| fp.to_bytes()))
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io1 = q
@@ -844,12 +873,20 @@ mod bls12_381 {
             .flat_map(|pt| [pt.x, pt.y].into_iter())
             .flat_map(|fp2| fp2.to_coeffs())
             .flat_map(|fp| fp.to_bytes())
-            .map(FieldAlgebra::from_canonical_u8)
+            .map(F::from_canonical_u8)
             .collect::<Vec<_>>();
 
         let io_all = io0.into_iter().chain(io1).collect::<Vec<_>>();
         // Don't run debugger because it's slow
-        air_test_impl(get_testing_config(), openvm_exe, vec![io_all], 1, false);
+        air_test_impl::<BabyBearPoseidon2Engine, _>(
+            FriParameters::new_for_testing(1),
+            Rv32PairingCpuBuilder,
+            get_testing_config(),
+            openvm_exe,
+            vec![io_all],
+            1,
+            false,
+        )?;
         Ok(())
     }
 
@@ -903,7 +940,7 @@ mod bls12_381 {
             .flat_map(|w| w.to_le_bytes())
             .map(F::from_canonical_u8)
             .collect();
-        air_test_with_min_segments(config, openvm_exe, vec![io], 1);
+        air_test_with_min_segments(Rv32PairingCpuBuilder, config, openvm_exe, vec![io], 1);
         Ok(())
     }
 }
diff --git a/guest-libs/ruint/ruint-macro/src/lib.rs b/guest-libs/ruint/ruint-macro/src/lib.rs
index 86dc5afcf0..67660292f4 100644
--- a/guest-libs/ruint/ruint-macro/src/lib.rs
+++ b/guest-libs/ruint/ruint-macro/src/lib.rs
@@ -1,5 +1,6 @@
 #![doc = include_str!("../README.md")]
 #![warn(clippy::all, clippy::pedantic, clippy::nursery)]
+#![allow(clippy::manual_div_ceil)]
 
 use proc_macro::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
 use std::fmt::{self, Write};
diff --git a/guest-libs/ruint/tests/lib.rs b/guest-libs/ruint/tests/lib.rs
index 3db2697775..fec78d47f7 100644
--- a/guest-libs/ruint/tests/lib.rs
+++ b/guest-libs/ruint/tests/lib.rs
@@ -1,7 +1,7 @@
 #[cfg(test)]
 mod tests {
     use eyre::Result;
-    use openvm_bigint_circuit::Int256Rv32Config;
+    use openvm_bigint_circuit::{Int256Rv32Config, Int256Rv32CpuBuilder};
     use openvm_bigint_transpiler::Int256TranspilerExtension;
     use openvm_circuit::utils::air_test;
     use openvm_instructions::exe::VmExe;
@@ -30,7 +30,7 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension)
                 .with_extension(Int256TranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Int256Rv32CpuBuilder, config, openvm_exe);
         Ok(())
     }
 }
diff --git a/guest-libs/ruint/tests/programs/examples/matrix_power.rs b/guest-libs/ruint/tests/programs/examples/matrix_power.rs
index 95826d32de..6a874bc35e 100644
--- a/guest-libs/ruint/tests/programs/examples/matrix_power.rs
+++ b/guest-libs/ruint/tests/programs/examples/matrix_power.rs
@@ -123,6 +123,11 @@ pub fn main() {
         panic!();
     }
 
+    if U256::from_limbs([u64::MAX; 4]) + one != zero {
+        print("FAIL: U256::MAX == 0 test failed");
+        panic!();
+    }
+
     if two_to_200 != two_to_200 {
         print("FAIL: 2^200 clone test failed");
         panic!();
diff --git a/guest-libs/sha2/tests/lib.rs b/guest-libs/sha2/tests/lib.rs
index 9ebab5ac02..669c2c3db6 100644
--- a/guest-libs/sha2/tests/lib.rs
+++ b/guest-libs/sha2/tests/lib.rs
@@ -6,7 +6,7 @@ mod tests {
     use openvm_rv32im_transpiler::{
         Rv32ITranspilerExtension, Rv32IoTranspilerExtension, Rv32MTranspilerExtension,
     };
-    use openvm_sha256_circuit::Sha256Rv32Config;
+    use openvm_sha256_circuit::{Sha256Rv32Config, Sha256Rv32CpuBuilder};
     use openvm_sha256_transpiler::Sha256TranspilerExtension;
     use openvm_stark_sdk::p3_baby_bear::BabyBear;
     use openvm_toolchain_tests::{build_example_program_at_path, get_programs_dir};
@@ -27,7 +27,7 @@ mod tests {
                 .with_extension(Rv32IoTranspilerExtension)
                 .with_extension(Sha256TranspilerExtension),
         )?;
-        air_test(config, openvm_exe);
+        air_test(Sha256Rv32CpuBuilder, config, openvm_exe);
         Ok(())
     }
 }
diff --git a/guest-libs/verify_stark/Cargo.toml b/guest-libs/verify_stark/Cargo.toml
index 070083edad..66f13731d2 100644
--- a/guest-libs/verify_stark/Cargo.toml
+++ b/guest-libs/verify_stark/Cargo.toml
@@ -21,4 +21,4 @@ openvm-circuit = { workspace = true, features = ["parallel"] }
 openvm-stark-sdk = { workspace = true }
 openvm-native-compiler.workspace = true
 openvm-verify-stark.workspace = true
-eyre.workspace = true
\ No newline at end of file
+eyre.workspace = true
diff --git a/guest-libs/verify_stark/tests/integration_test.rs b/guest-libs/verify_stark/tests/integration_test.rs
index e05b5f12a1..ea1150976a 100644
--- a/guest-libs/verify_stark/tests/integration_test.rs
+++ b/guest-libs/verify_stark/tests/integration_test.rs
@@ -1,123 +1,92 @@
-#[cfg(test)]
-mod tests {
-    use std::{path::PathBuf, sync::Arc};
+use std::path::PathBuf;
 
-    use eyre::Result;
-    use openvm_circuit::arch::{SystemConfig, DEFAULT_MAX_NUM_PUBLIC_VALUES};
-    use openvm_native_compiler::conversion::CompilerOptions;
-    use openvm_sdk::{
-        commit::AppExecutionCommit,
-        config::{AggStarkConfig, AppConfig, SdkSystemConfig, SdkVmConfig},
-        keygen::AggStarkProvingKey,
-        Sdk, StdIn,
-    };
-    use openvm_stark_sdk::config::FriParameters;
-    use openvm_verify_stark::host::{
-        compute_hint_key_for_verify_openvm_stark, encode_proof_to_kv_store_value,
-    };
-
-    const LEAF_LOG_BLOWUP: usize = 2;
-    const INTERNAL_LOG_BLOWUP: usize = 3;
-    const ROOT_LOG_BLOWUP: usize = 4;
-
-    #[test]
-    fn test_verify_openvm_stark_e2e() -> Result<()> {
-        const ASM_FILENAME: &str = "root_verifier.asm";
-        let sdk = Sdk::new();
-        let mut pkg_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
-        pkg_dir.pop();
-        pkg_dir.pop();
-        pkg_dir.push("crates/sdk/guest/fib");
-
-        let vm_config = SdkVmConfig::builder()
-            .system(SdkSystemConfig {
-                config: SystemConfig::default().with_continuations(),
-            })
-            .rv32i(Default::default())
-            .rv32m(Default::default())
-            .io(Default::default())
-            .native(Default::default())
-            .build();
-        assert!(vm_config.system.config.continuation_enabled);
-        let elf = sdk.build(
-            Default::default(),
-            &vm_config,
-            pkg_dir,
-            &Default::default(),
-            None,
-        )?;
+use eyre::Result;
+use openvm_circuit::arch::{SystemConfig, DEFAULT_MAX_NUM_PUBLIC_VALUES};
+use openvm_native_compiler::conversion::CompilerOptions;
+use openvm_sdk::{
+    config::{AggregationConfig, AppConfig, SdkSystemConfig, SdkVmConfig},
+    keygen::AggProvingKey,
+    Sdk, StdIn,
+};
+use openvm_stark_sdk::config::FriParameters;
+use openvm_verify_stark::host::{
+    compute_hint_key_for_verify_openvm_stark, encode_proof_to_kv_store_value,
+};
 
-        let app_exe = sdk.transpile(elf, vm_config.transpiler())?;
-        let fri_params = FriParameters::new_for_testing(LEAF_LOG_BLOWUP);
-        let app_config =
-            AppConfig::new_with_leaf_fri_params(fri_params, vm_config.clone(), fri_params);
+const LEAF_LOG_BLOWUP: usize = 2;
+const INTERNAL_LOG_BLOWUP: usize = 3;
+const ROOT_LOG_BLOWUP: usize = 4;
 
-        let app_pk = sdk.app_keygen(app_config.clone())?;
-        let committed_app_exe = sdk.commit_app_exe(fri_params, app_exe.clone())?;
+#[test]
+fn test_verify_openvm_stark_e2e() -> Result<()> {
+    const ASM_FILENAME: &str = "root_verifier.asm";
+    let mut pkg_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
+    pkg_dir.pop();
+    pkg_dir.pop();
+    pkg_dir.push("crates/sdk/guest/fib");
 
-        let commits =
-            AppExecutionCommit::compute(&vm_config, &committed_app_exe, &app_pk.leaf_committed_exe);
-        let exe_commit = commits.app_exe_commit.to_u32_digest();
-        let vm_commit = commits.app_vm_commit.to_u32_digest();
+    let vm_config = SdkVmConfig::builder()
+        .system(SdkSystemConfig {
+            config: SystemConfig::default(),
+        })
+        .rv32i(Default::default())
+        .rv32m(Default::default())
+        .io(Default::default())
+        .native(Default::default())
+        .build();
+    let fri_params = FriParameters::new_for_testing(LEAF_LOG_BLOWUP);
+    let app_config = AppConfig::new_with_leaf_fri_params(fri_params, vm_config.clone(), fri_params);
+    let sdk = Sdk::new(app_config)?;
+    assert!(vm_config.system.config.continuation_enabled);
+    let elf = sdk.build(Default::default(), pkg_dir, &None, None)?;
 
-        let agg_pk = AggStarkProvingKey::keygen(AggStarkConfig {
-            max_num_user_public_values: DEFAULT_MAX_NUM_PUBLIC_VALUES,
-            leaf_fri_params: FriParameters::new_for_testing(LEAF_LOG_BLOWUP),
-            internal_fri_params: FriParameters::new_for_testing(INTERNAL_LOG_BLOWUP),
-            root_fri_params: FriParameters::new_for_testing(ROOT_LOG_BLOWUP),
-            profiling: false,
-            compiler_options: CompilerOptions {
-                enable_cycle_tracker: true,
-                ..Default::default()
-            },
-            root_max_constraint_degree: (1 << ROOT_LOG_BLOWUP) + 1,
-        });
-        let asm = sdk.generate_root_verifier_asm(&agg_pk);
-        let asm_path = format!(
-            "{}/examples/verify_openvm_stark/{}",
-            env!("CARGO_MANIFEST_DIR"),
-            ASM_FILENAME
-        );
-        std::fs::write(asm_path, asm)?;
+    let (e2e_stark_proof, app_commit) = sdk.prove(elf, StdIn::default())?;
+    let exe_commit = app_commit.app_exe_commit.to_u32_digest();
+    let vm_commit = app_commit.app_vm_commit.to_u32_digest();
 
-        let e2e_stark_proof = sdk.generate_e2e_stark_proof(
-            Arc::new(app_pk),
-            committed_app_exe,
-            agg_pk,
-            StdIn::default(),
-        )?;
+    let agg_pk = AggProvingKey::keygen(AggregationConfig {
+        max_num_user_public_values: DEFAULT_MAX_NUM_PUBLIC_VALUES,
+        leaf_fri_params: FriParameters::new_for_testing(LEAF_LOG_BLOWUP),
+        internal_fri_params: FriParameters::new_for_testing(INTERNAL_LOG_BLOWUP),
+        root_fri_params: FriParameters::new_for_testing(ROOT_LOG_BLOWUP),
+        profiling: false,
+        compiler_options: CompilerOptions {
+            enable_cycle_tracker: true,
+            ..Default::default()
+        },
+        root_max_constraint_degree: (1 << ROOT_LOG_BLOWUP) + 1,
+    })?;
+    let _ = sdk.set_agg_pk(agg_pk);
+    let asm = sdk.generate_root_verifier_asm();
+    let asm_path = format!(
+        "{}/examples/verify_openvm_stark/{}",
+        env!("CARGO_MANIFEST_DIR"),
+        ASM_FILENAME
+    );
+    std::fs::write(asm_path, asm)?;
 
-        let verify_exe = {
-            let mut pkg_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
-            pkg_dir.push("examples/verify_openvm_stark");
-            let elf = sdk.build(
-                Default::default(),
-                &vm_config,
-                pkg_dir,
-                &Default::default(),
-                None,
-            )?;
-            sdk.transpile(elf, vm_config.transpiler())?
-        };
+    let verify_elf = {
+        let mut pkg_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")).to_path_buf();
+        pkg_dir.push("examples/verify_openvm_stark");
+        sdk.build(Default::default(), pkg_dir, &None, None)?
+    };
 
-        // app_exe publishes 7th and 8th fibonacci numbers.
-        let pvs: Vec<u8> = [13u32, 21, 0, 0, 0, 0, 0, 0]
-            .iter()
-            .flat_map(|x| x.to_le_bytes())
-            .collect();
+    // app_exe publishes 31st and 32nd fibonacci numbers.
+    let pvs: Vec<u8> = [1346269, 2178309, 0, 0, 0, 0, 0, 0u32]
+        .iter()
+        .flat_map(|x| x.to_le_bytes())
+        .collect();
 
-        let mut stdin = StdIn::default();
-        let key =
-            compute_hint_key_for_verify_openvm_stark(ASM_FILENAME, &exe_commit, &vm_commit, &pvs);
-        let value = encode_proof_to_kv_store_value(&e2e_stark_proof.proof);
-        stdin.add_key_value(key, value);
+    let mut stdin = StdIn::default();
+    let key = compute_hint_key_for_verify_openvm_stark(ASM_FILENAME, &exe_commit, &vm_commit, &pvs);
+    let value = encode_proof_to_kv_store_value(&e2e_stark_proof.inner);
+    stdin.add_key_value(key, value);
 
-        stdin.write(&exe_commit);
-        stdin.write(&vm_commit);
-        stdin.write(&pvs);
+    stdin.write(&exe_commit);
+    stdin.write(&vm_commit);
+    stdin.write(&pvs);
 
-        sdk.execute(verify_exe, vm_config, stdin)?;
+    sdk.execute(verify_elf, stdin)?;
 
-        Ok(())
-    }
+    Ok(())
 }
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
index 35e9b966ed..8825102061 100644
--- a/rust-toolchain.toml
+++ b/rust-toolchain.toml
@@ -1,3 +1,3 @@
 [toolchain]
-channel = "1.85.1"
+channel = "1.86.0"
 components = ["clippy", "rustfmt"]