diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
index b5e89a2b5e..63c14ad7ad 100644
--- a/.github/workflows/bench.yml
+++ b/.github/workflows/bench.yml
@@ -97,10 +97,11 @@ jobs:
         run: |
           cat pr/bench-${{ matrix.device }}.* 2>/dev/null || true
           cat master/bench-${{ matrix.device }}.* 2>/dev/null || true
-      
-      - name: Archive Logs
+
+      # All other runners (non-Phoenix) just run without special env
+      - name: Archive Logs (Frontier)
+        if: always() && matrix.cluster != 'phoenix'
         uses: actions/upload-artifact@v4
-        if: always()
         with:
           name: ${{ matrix.cluster }}-${{ matrix.device }}
           path: |
diff --git a/.github/workflows/phoenix/bench.sh b/.github/workflows/phoenix/bench.sh
index f58ef44721..a0e93f9052 100644
--- a/.github/workflows/phoenix/bench.sh
+++ b/.github/workflows/phoenix/bench.sh
@@ -2,7 +2,8 @@
 
 n_ranks=12
 
-if [ "$job_device" = "gpu" ]; then
+echo "My benchmarking device is:" $device
+if [ "$device" = "gpu" ]; then
     n_ranks=$(nvidia-smi -L | wc -l)        # number of GPUs on node
     gpu_ids=$(seq -s ' ' 0 $(($n_ranks-1))) # 0,1,2,...,gpu_count-1
     device_opts="--gpu -g $gpu_ids"
@@ -15,7 +16,7 @@ mkdir -p $currentdir
 
 export TMPDIR=$currentdir
 
-if [ "$job_device" = "gpu" ]; then
+if [ "$device" = "gpu" ]; then
     ./mfc.sh bench --mem 12 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
 else
     ./mfc.sh bench --mem 1 -j $(nproc) -o "$job_slug.yaml" -- -c phoenix-bench $device_opts -n $n_ranks
diff --git a/.github/workflows/phoenix/submit-bench.sh b/.github/workflows/phoenix/submit-bench.sh
index e8b6dd3484..91160dd73a 100644
--- a/.github/workflows/phoenix/submit-bench.sh
+++ b/.github/workflows/phoenix/submit-bench.sh
@@ -1,64 +1,107 @@
-#!/bin/bash
-
-set -e
+#!/usr/bin/env bash
+set -euo pipefail
 
 usage() {
     echo "Usage: $0 [script.sh] [cpu|gpu]"
+    exit 1
 }
 
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
+[[ $# -eq 2 ]] || usage
 
-sbatch_cpu_opts="\
+sbatch_script="$1"
+
+device="$2"
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
+
+# read the body of the user script
+sbatch_body=$(<"$sbatch_script")
+
+# common SBATCH directives
+sbatch_common_opts="\
+#SBATCH -J shb-${sbatch_script%%.sh}-$device    # job name
+#SBATCH --account=gts-sbryngelson3              # account
+#SBATCH -N1                                     # nodes
+#SBATCH -t 02:00:00                             # walltime
+#SBATCH -q embers                               # QOS
+#SBATCH -o $job_slug.out                        # stdout+stderr
+#SBATCH --mem-per-cpu=2G                        # default mem (overridden below)
+"
+
+# CPU vs GPU overrides
+if [[ "$device" == "cpu" ]]; then
+  sbatch_device_opts="\
 #SBATCH -p cpu-small               # partition
 #SBATCH --ntasks-per-node=24       # Number of cores per node required
 #SBATCH --mem-per-cpu=2G           # Memory per core\
 "
-
-sbatch_gpu_opts="\
+elif [[ "$device" == "gpu" ]]; then
+  sbatch_device_opts="\
 #SBATCH -CL40S
 #SBATCH --ntasks-per-node=4       # Number of cores per node required
 #SBATCH -G2\
 "
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="$sbatch_cpu_opts"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="$sbatch_gpu_opts"
 else
-    usage
-    exit 1
+  usage
 fi
 
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
+# submit and capture the JobID
+JOBID=$(sbatch <<-EOT | awk '{print $4}'
+	#!/usr/bin/env bash
+	${sbatch_common_opts}
+	${sbatch_device_opts}
+ 
+	export job_slug="${job_slug}"
+	export device="${device}"
 
-sbatch <<EOT
-#!/bin/bash
-#SBATCH -Jshb-$job_slug            # Job name
-#SBATCH --account=gts-sbryngelson3 # charge account
-#SBATCH -N1                        # Number of nodes required
-$sbatch_device_opts
-#SBATCH -t 02:00:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -q embers                  # QOS Name
-#SBATCH -o$job_slug.out            # Combined output and error messages file
-#SBATCH -W                         # Do not exit until the submitted job terminates.
+	echo "Job slug is:" $job_slug
+ 	echo "Device is:" $device
+ 
+	set -e -x
 
-set -e
-set -x
+	cd "\$SLURM_SUBMIT_DIR"
+	echo "Running in \$(pwd):"
 
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
+	# load your modules & env
+	. ./mfc.sh load -c p -m $device
 
-job_slug="$job_slug"
-job_device="$2"
+	# user script contents
+	${sbatch_body}
+EOT
+)
 
-. ./mfc.sh load -c p -m $2
+echo "🚀 Submitted SLURM job $JOBID"
 
-$sbatch_script_contents
+# if this wrapper is killed/canceled, make sure SLURM job is cleaned up
+trap '[[ -n "${JOBID:-}" ]] && scancel "$JOBID" >/dev/null 2>&1 || :' EXIT
 
-EOT
+# ────────── Poll until SLURM job finishes ──────────
+while :; do
+  # Try sacct first
+  STATE=$(sacct -j "$JOBID" --format=State --noheader --parsable2 | head -n1)
+
+  # Fallback to squeue if sacct is empty
+  if [[ -z "$STATE" ]]; then
+    STATE=$(squeue -j "$JOBID" -h -o "%T" || echo "")
+  fi
+
+  # If it’s one of SLURM’s terminal states, break immediately
+  case "$STATE" in
+    COMPLETED|FAILED|CANCELLED|TIMEOUT)
+      echo "✅ SLURM job $JOBID reached terminal state: $STATE"
+      break
+      ;;
+    "")
+      echo "✅ SLURM job $JOBID no longer in queue; assuming finished"
+      break
+      ;;
+    *)
+      echo "⏳ SLURM job $JOBID state: $STATE"
+      sleep 10
+      ;;
+  esac
+done
 
+# Now retrieve the exit code and exit with it
+EXIT_CODE=$(sacct -j "$JOBID" --noheader --format=ExitCode | head -1 | cut -d: -f1)
+echo "🔚 SLURM job $JOBID exit code: $EXIT_CODE"
+exit "$EXIT_CODE"
diff --git a/.github/workflows/phoenix/submit.sh b/.github/workflows/phoenix/submit.sh
index 6700e38c50..182d7a2532 100644
--- a/.github/workflows/phoenix/submit.sh
+++ b/.github/workflows/phoenix/submit.sh
@@ -1,64 +1,100 @@
-#!/bin/bash
-
-set -e
+#!/usr/bin/env bash
+set -euo pipefail
 
 usage() {
     echo "Usage: $0 [script.sh] [cpu|gpu]"
+    exit 1
 }
 
-if [ ! -z "$1" ]; then
-    sbatch_script_contents=`cat $1`
-else
-    usage
-    exit 1
-fi
+[[ $# -eq 2 ]] || usage
+
+sbatch_script="$1"
+device="$2"
+
+job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
+
+# read the body of the user script
+sbatch_body=$(<"$sbatch_script")
 
-sbatch_cpu_opts="\
-#SBATCH -p cpu-small               # partition
-#SBATCH --ntasks-per-node=24       # Number of cores per node required
-#SBATCH --mem-per-cpu=2G           # Memory per core\
+# common SBATCH directives
+sbatch_common_opts="\
+#SBATCH -J shb-${sbatch_script%%.sh}-$device    # job name
+#SBATCH --account=gts-sbryngelson3              # account
+#SBATCH -N1                                     # nodes
+#SBATCH -t 03:00:00                             # walltime
+#SBATCH -q embers                               # QOS
+#SBATCH -o $job_slug.out                        # stdout+stderr
+#SBATCH --mem-per-cpu=2G                        # default mem (overridden below)
 "
 
-sbatch_gpu_opts="\
+# CPU vs GPU overrides
+if [[ "$device" == "cpu" ]]; then
+  sbatch_device_opts="\
+#SBATCH -p cpu-small
+#SBATCH --ntasks-per-node=24
+"
+elif [[ "$device" == "gpu" ]]; then
+  sbatch_device_opts="\
 #SBATCH -p gpu-v100,gpu-a100,gpu-h100,gpu-l40s
-#SBATCH --ntasks-per-node=4       # Number of cores per node required
-#SBATCH -G2\
+#SBATCH --ntasks-per-node=4
+#SBATCH -G2
 "
-
-if [ "$2" = "cpu" ]; then
-    sbatch_device_opts="$sbatch_cpu_opts"
-elif [ "$2" = "gpu" ]; then
-    sbatch_device_opts="$sbatch_gpu_opts"
 else
-    usage
-    exit 1
+  usage
 fi
 
-job_slug="`basename "$1" | sed 's/\.sh$//' | sed 's/[^a-zA-Z0-9]/-/g'`-$2"
+# submit and capture the JobID
+JOBID=$(sbatch <<-EOT | awk '{print $4}'
+	#!/usr/bin/env bash
+	${sbatch_common_opts}
+	${sbatch_device_opts}
 
-sbatch <<EOT
-#!/bin/bash
-#SBATCH -Jshb-$job_slug            # Job name
-#SBATCH --account=gts-sbryngelson3 # charge account
-#SBATCH -N1                        # Number of nodes required
-$sbatch_device_opts
-#SBATCH -t 03:00:00                # Duration of the job (Ex: 15 mins)
-#SBATCH -q embers                  # QOS Name
-#SBATCH -o$job_slug.out            # Combined output and error messages file
-#SBATCH -W                         # Do not exit until the submitted job terminates.
+	set -e -x
 
-set -e
-set -x
+	cd "\$SLURM_SUBMIT_DIR"
+	echo "Running in \$(pwd):"
 
-cd "\$SLURM_SUBMIT_DIR"
-echo "Running in $(pwd):"
+	# load your modules & env
+	. ./mfc.sh load -c p -m $device
 
-job_slug="$job_slug"
-job_device="$2"
+	# user script contents
+	${sbatch_body}
+EOT
+)
 
-. ./mfc.sh load -c p -m $2
+echo "🚀 Submitted SLURM job $JOBID"
 
-$sbatch_script_contents
+# if this wrapper is killed/canceled, make sure SLURM job is cleaned up
+trap '[[ -n "${JOBID:-}" ]] && scancel "$JOBID" >/dev/null 2>&1 || :' EXIT
 
-EOT
+# ────────── Poll until SLURM job finishes ──────────
+while :; do
+  # Try sacct first
+  STATE=$(sacct -j "$JOBID" --format=State --noheader --parsable2 | head -n1)
+
+  # Fallback to squeue if sacct is empty
+  if [[ -z "$STATE" ]]; then
+    STATE=$(squeue -j "$JOBID" -h -o "%T" || echo "")
+  fi
+
+  # If it’s one of SLURM’s terminal states, break immediately
+  case "$STATE" in
+    COMPLETED|FAILED|CANCELLED|TIMEOUT)
+      echo "✅ SLURM job $JOBID reached terminal state: $STATE"
+      break
+      ;;
+    "")
+      echo "✅ SLURM job $JOBID no longer in queue; assuming finished"
+      break
+      ;;
+    *)
+      echo "⏳ SLURM job $JOBID state: $STATE"
+      sleep 10
+      ;;
+  esac
+done
 
+# Now retrieve the exit code and exit with it
+EXIT_CODE=$(sacct -j "$JOBID" --noheader --format=ExitCode | head -1 | cut -d: -f1)
+echo "🔚 SLURM job $JOBID exit code: $EXIT_CODE"
+exit "$EXIT_CODE"
diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh
index 5582e9f6d5..60b9920f51 100644
--- a/.github/workflows/phoenix/test.sh
+++ b/.github/workflows/phoenix/test.sh
@@ -1,13 +1,19 @@
 #!/bin/bash
 
+tmpbuild=/storage/scratch1/6/sbryngelson3/mytmp_build
+currentdir=$tmpbuild/run-$(( RANDOM % 900 ))
+mkdir -p $tmpbuild
+mkdir -p $currentdir
+export TMPDIR=$currentdir
+
+n_test_threads=8
+
 build_opts=""
 if [ "$job_device" = "gpu" ]; then
     build_opts="--gpu"
 fi
 
-./mfc.sh test --dry-run -j 8 $build_opts
-
-n_test_threads=8
+./mfc.sh test --dry-run -j $n_test_threads $build_opts
 
 if [ "$job_device" = "gpu" ]; then
     gpu_count=$(nvidia-smi -L | wc -l)        # number of GPUs on node
@@ -18,4 +24,7 @@ fi
 
 ./mfc.sh test --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix
 
+sleep 10
+rm -rf "$currentdir" || true
 
+unset TMPDIR
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 2948b11333..db618bea46 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -101,6 +101,7 @@ jobs:
       group:  phoenix
       labels: ${{ matrix.lbl }}
     env:
+      NODE_OPTIONS: ${{ matrix.lbl == 'gt' && '--max-old-space-size=2048' || '' }}
       ACTIONS_RUNNER_FORCE_ACTIONS_NODE_VERSION: node16
       ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true
     steps:
@@ -125,7 +126,7 @@ jobs:
 
       - name: Archive Logs
         uses: actions/upload-artifact@v4
-        if:   always()
+        if:   matrix.lbl == 'frontier'
         with:
           name: logs-${{ strategy.job-index }}-${{ matrix.device }}
           path: test-${{ matrix.device }}.out
diff --git a/misc/starting-phoenix-runners.md b/misc/starting-phoenix-runners.md
new file mode 100644
index 0000000000..5e77fbd189
--- /dev/null
+++ b/misc/starting-phoenix-runners.md
@@ -0,0 +1,110 @@
+# Launching Phoenix Runners
+
+The Phoenix runners were repeatedly failing due to a network error.
+Spencer managed to fix it via [this PR](https://github.com/MFlowCode/MFC/pull/933) and by running things through a socks5 proxy on each login node that holds a runner.
+These are documented for Spencer or his next of kin.
+
+__The runners are started via the following process__
+
+1. Log in to the login node <x> via `ssh login-phoenix-rh9-<x>.pace.gatech.edu`. `<x>` can be `1` through `6` on Phoenix.
+  * Detour: Make sure no stray `ssh` daemons are sitting around: `pkill -9 sshd`.
+  * You can probably keep your terminal alive via `fuser -k -9 ~/nohup.out`, which kills (signal 9) whatever process is writing to that no-hangup file (the daemon we care about)
+2. Log back into the same login node because you may have just nuked your session
+  * Detour: Make sure stray runners on that login node are dead (one liner): `pkill -9 -f -E 'run.sh|Runner.listener|Runner.helper'`
+  * If cautious, check that no runner processes are left over. `top` followed by `u` and `<type your user name>` and return.
+3. Execute from your home directory: `nohup ssh -N -D 1080 -vvv login-phoenix-rh9-<x>.pace.gatech.edu &`, replacing `<x>` with the login node number
+  * This starts a proxy to tunnel a new ssh session through
+4. Navigate to your runner's directory (or create a runner directory if you need).
+  * Right now they are in Spencer's `scratch/mfc-runners/action-runner-<runner#>`
+5. Run the alias `start_runner`, which dumps output `~/runner.out`
+  * If one doesn't have this alias yet, create and source it in your `.bashrc` or similar:
+```bash
+alias start_runner=' \
+  http_proxy="socks5://localhost:1080" \
+  https_proxy="socks5://localhost:1080" \
+  no_proxy="localhost,127.0.0.1,github.com,api.github.com,pipelines.actions.githubusercontent.com,alive.github.com,pypi.org,files.pythonhosted.org,fftw.org,www.fftw.org" \
+  NO_PROXY="localhost,127.0.0.1,github.com,api.github.com,pipelines.actions.githubusercontent.com,alive.github.com,pypi.org,files.pythonhosted.org,fftw.org,www.fftw.org" \
+  RUNNER_DEBUG=1 \
+  ACTIONS_STEP_DEBUG=1 \
+  GITHUB_ACTIONS_RUNNER_PREFER_IP_FAMILY=ipv4 \
+  DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_TIME=00:01:00 \
+  DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_INTERVAL=00:00:20 \
+  DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_RETRYCOUNT=5 \
+  nohup ./run.sh > ~/runner.out 2>&1 &'
+```
+6. You're done
+
+
+### For inquisitive minds 
+
+__Why the `start_runner` alias?__
+
+1. `alias start_runner='…'`  
+   Defines a new shell alias named `start_runner`. Whenever you run `start_runner`, the shell will execute everything between the single quotes as if you’d typed it at the prompt.
+
+2. `http_proxy="socks5://localhost:1080"`  
+   Sets the `http_proxy` environment variable so that any HTTP traffic from the runner is sent through a SOCKS5 proxy listening on `localhost:1080`.
+
+3. `https_proxy="socks5://localhost:1080"`  
+   Tells HTTPS-aware tools to use that same local SOCKS5 proxy for HTTPS requests.
+
+4. `no_proxy="localhost,127.0.0.1,github.com,api.github.com,pipelines.actions.githubusercontent.com,alive.github.com,pypi.org,files.pythonhosted.org,fftw.org,www.fftw.org"`  
+   Lists hosts and domains that should bypass the proxy entirely. Commonly used for internal or high-volume endpoints where you don’t want proxy overhead.
+
+5. `NO_PROXY="localhost,127.0.0.1,github.com,api.github.com,pipelines.actions.githubusercontent.com,alive.github.com,pypi.org,files.pythonhosted.org,fftw.org,www.fftw.org"`  
+   Same list as `no_proxy`—some programs only check the uppercase `NO_PROXY` variable.
+
+6. `RUNNER_DEBUG=1`  
+   Enables debug-level logging in the GitHub Actions runner itself, so you’ll see more verbose internal messages in its logs.
+
+7. `ACTIONS_STEP_DEBUG=1`  
+   Turns on step-level debug logging for actions you invoke—handy if you need to trace exactly what each action is doing under the hood.
+
+8. `GITHUB_ACTIONS_RUNNER_PREFER_IP_FAMILY=ipv4`  
+   Forces the runner to resolve DNS names to IPv4 addresses only. Useful if your proxy or network has spotty IPv6 support.
+
+9. `DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_TIME=00:01:00`  
+   For .NET–based tasks: sets the initial TCP keepalive timeout to 1 minute (after 1 minute of idle, a keepalive probe is sent).
+
+10. `DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_INTERVAL=00:00:20`  
+    If the first keepalive probe gets no response, wait 20 seconds between subsequent probes.
+
+11. `DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_RETRYCOUNT=5`  
+    If probes continue to go unanswered, retry up to 5 times before declaring the connection dead.
+
+12. `nohup ./run.sh > ~/runner.out 2>&1 &`  
+    - `nohup … &` runs `./run.sh` in the background and makes it immune to hangups (so it keeps running if you log out).  
+    - `> ~/runner.out` redirects **stdout** to the file `runner.out` in your home directory.  
+    - `2>&1` redirects **stderr** into the same file, so you get a combined log of everything the script prints.
+
+__Why the extra ssh command?__
+
+1. `http_proxy="socks5://localhost:1080"`  
+   Routes all HTTP traffic through a local SOCKS5 proxy on port 1080.
+
+2. `https_proxy="socks5://localhost:1080"`  
+   Routes all HTTPS traffic through the same proxy.
+
+3. `no_proxy="localhost,127.0.0.1,github.com,api.github.com,pipelines.actions.githubusercontent.com,alive.github.com,pypi.org,files.pythonhosted.org,fftw.org,www.fftw.org"`  
+   Specifies hosts and domains that bypass the proxy entirely. Includes specific things that MFC's CMake will try to `wget` (e.g., `fftw`) or some other non `git` command. Allows `git clone` to work.
+
+4. `NO_PROXY="localhost,127.0.0.1,github.com,api.github.com,pipelines.actions.githubusercontent.com,alive.github.com,pypi.org,files.pythonhosted.org,fftw.org,www.fftw.org"`  
+   Same bypass list for applications that only check the uppercase variable.
+
+5. `RUNNER_DEBUG=1`  
+   Enables verbose internal logging in the GitHub Actions runner.
+
+6. `GITHUB_ACTIONS_RUNNER_PREFER_IP_FAMILY=ipv4`  
+   Forces DNS resolution to IPv4 to avoid IPv6 issues.
+
+7. `DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_TIME=00:01:00`  
+   (For .NET tasks) sends the first TCP keepalive probe after 1 minute of idle.
+
+8. `DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_INTERVAL=00:00:20`  
+   Waits 20 seconds between subsequent TCP keepalive probes.
+
+9. `DOTNET_SYSTEM_NET_SOCKETS_KEEPALIVE_RETRYCOUNT=5`  
+   Retries keepalive probes up to 5 times before closing the connection.
+
+10. `nohup ./run.sh > ~/runner.out 2>&1 &`  
+    Runs `run.sh` in the background, immune to hangups, redirecting both stdout and stderr to `~/runner.out`.