From 9e495b8eed306da57930c53005fc619b45fc0b98 Mon Sep 17 00:00:00 2001
From: "Jiyue (Jennifer) Wang" <jiyue@meta.com>
Date: Tue, 14 Oct 2025 16:46:12 -0400
Subject: [PATCH 1/8] initial commit

---
 .github/workflows/gpu_test.yaml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/gpu_test.yaml b/.github/workflows/gpu_test.yaml
index c2a4705e5..f39f89916 100644
--- a/.github/workflows/gpu_test.yaml
+++ b/.github/workflows/gpu_test.yaml
@@ -61,5 +61,10 @@ jobs:
           export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
           export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
           pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
+      - name: Run integration tests
+        run: |
+          export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
+          export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
+          python -m tests.integration_tests.test_vllm_policy_correctness
       - name: Upload Coverage to Codecov
         uses: codecov/codecov-action@v3

From cbd91bb29f7415fb0628f71e680857b6c6dc17e3 Mon Sep 17 00:00:00 2001
From: Jiyue Wang <jiyue@mac.lan>
Date: Wed, 15 Oct 2025 17:16:12 -0400
Subject: [PATCH 2/8] create a new workflow for continuous tests

---
 .../continuous_integration_test.yaml          | 60 +++++++++++++++++++
 .github/workflows/gpu_test.yaml               |  5 --
 .../test_vllm_policy_correctness.py           |  4 +-
 3 files changed, 62 insertions(+), 7 deletions(-)
 create mode 100644 .github/workflows/continuous_integration_test.yaml

diff --git a/.github/workflows/continuous_integration_test.yaml b/.github/workflows/continuous_integration_test.yaml
new file mode 100644
index 000000000..8f0f72cc8
--- /dev/null
+++ b/.github/workflows/continuous_integration_test.yaml
@@ -0,0 +1,60 @@
+name: Continuous Integration Tests
+
+on:
+  schedule:
+    # Runs every hour
+    - cron: '0 * * * *'
+  workflow_dispatch:
+
+concurrency:
+  group: continuous-integration-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  id-token: write
+  contents: read
+
+defaults:
+  run:
+    shell: bash -l -eo pipefail {0}
+
+jobs:
+  integration_test:
+    if: github.repository_owner == 'meta-pytorch'
+    runs-on: linux.g5.12xlarge.nvidia.gpu
+    timeout-minutes: 120
+    strategy:
+      matrix:
+        python-version: ['3.10']
+    steps:
+      - name: Check out repo
+        uses: actions/checkout@v4
+      - name: Setup conda env
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          auto-update-conda: true
+          miniconda-version: "latest"
+          activate-environment: test
+          python-version: ${{ matrix.python-version }}
+      - name: Update pip
+        run: python -m pip install --upgrade pip
+      - name: Install pinned torch nightly
+        run: python -m pip install --pre torch==2.9.0.dev20250905 --no-cache-dir --index-url https://download.pytorch.org/whl/nightly/cu129
+      - name: Download and install vLLM and its dependencies
+        # TODO: this honestly could not be hackier if I tried
+        run: |
+          python -m pip install -r .github/packaging/vllm_reqs.txt
+          python -m pip install vllm==0.10.1.dev0+g6d8d0a24c.d20251009.cu129 --no-cache-dir --index-url https://download.pytorch.org/whl/preview/forge
+      - name: Install Monarch
+        run: pip install torchmonarch==0.1.0rc1
+      - name: Install torchtitan and torchstore
+        run: |
+          python -m pip install git+https://github.com/pytorch/torchtitan.git
+          python -m pip install git+https://github.com/meta-pytorch/torchstore.git
+      - name: Install dependencies
+        run: python -m pip install --no-build-isolation -e ".[dev]"
+      - name: Run integration tests
+        run: |
+          export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
+          export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
+          pytest tests/integration_tests/test_vllm_policy_correctness.py --durations=20 -vv
diff --git a/.github/workflows/gpu_test.yaml b/.github/workflows/gpu_test.yaml
index f39f89916..c2a4705e5 100644
--- a/.github/workflows/gpu_test.yaml
+++ b/.github/workflows/gpu_test.yaml
@@ -61,10 +61,5 @@ jobs:
           export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
           export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
           pytest tests/unit_tests --cov=. --cov-report=xml --durations=20 -vv
-      - name: Run integration tests
-        run: |
-          export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
-          export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
-          python -m tests.integration_tests.test_vllm_policy_correctness
       - name: Upload Coverage to Codecov
         uses: codecov/codecov-action@v3
diff --git a/tests/integration_tests/test_vllm_policy_correctness.py b/tests/integration_tests/test_vllm_policy_correctness.py
index e2da9b068..2a47cf2b8 100644
--- a/tests/integration_tests/test_vllm_policy_correctness.py
+++ b/tests/integration_tests/test_vllm_policy_correctness.py
@@ -6,7 +6,7 @@
 
 import pytest
 
-from forge.actors.policy import Policy
+from forge.actors.generator import Generator
 from vllm import SamplingParams
 from vllm.engine.arg_utils import AsyncEngineArgs
 from vllm.sampling_params import RequestOutputKind
@@ -51,7 +51,7 @@ async def test_same_output():
         vllm_model = AsyncLLM.from_engine_args(args)
 
         # Setup Policy service
-        policy = await Policy.options(
+        policy = await Generator.options(
             procs=1, num_replicas=1, with_gpus=True
         ).as_service(
             engine_args={

From 714cfc62bf989d31567aa49e9eae1dbab82903ef Mon Sep 17 00:00:00 2001
From: Jiyue Wang <jiyue@mac.lan>
Date: Wed, 15 Oct 2025 17:25:40 -0400
Subject: [PATCH 3/8] ...

---
 .github/workflows/continuous_integration_test.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/continuous_integration_test.yaml b/.github/workflows/continuous_integration_test.yaml
index 8f0f72cc8..6b919c682 100644
--- a/.github/workflows/continuous_integration_test.yaml
+++ b/.github/workflows/continuous_integration_test.yaml
@@ -4,6 +4,8 @@ on:
   schedule:
     # Runs every hour
     - cron: '0 * * * *'
+  # TODO: remove this when merged to main
+  pull_request:
   workflow_dispatch:
 
 concurrency:

From c42d660fe412aeba299e5f38bc810d42305f7f45 Mon Sep 17 00:00:00 2001
From: Jiyue Wang <jiyue@mac.lan>
Date: Wed, 15 Oct 2025 17:35:09 -0400
Subject: [PATCH 4/8] fix one more place

---
 tests/integration_tests/test_vllm_policy_correctness.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration_tests/test_vllm_policy_correctness.py b/tests/integration_tests/test_vllm_policy_correctness.py
index 2a47cf2b8..0208e81aa 100644
--- a/tests/integration_tests/test_vllm_policy_correctness.py
+++ b/tests/integration_tests/test_vllm_policy_correctness.py
@@ -140,7 +140,7 @@ async def test_cache_usage():
         vllm_model = AsyncLLM.from_engine_args(args)
 
         # Setup Policy service
-        policy = await Policy.options(
+        policy = await Generator.options(
             procs=1, num_replicas=1, with_gpus=True
         ).as_service(
             engine_args={

From 7d02cd044025c45782eb28cf251292ca164fe2c8 Mon Sep 17 00:00:00 2001
From: Jiyue Wang <jiyue@MacBook-Air.local>
Date: Thu, 16 Oct 2025 09:28:52 -0400
Subject: [PATCH 5/8] add grpo run

---
 .github/workflows/continuous_integration_test.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/continuous_integration_test.yaml b/.github/workflows/continuous_integration_test.yaml
index 6b919c682..6f1a053fb 100644
--- a/.github/workflows/continuous_integration_test.yaml
+++ b/.github/workflows/continuous_integration_test.yaml
@@ -60,3 +60,4 @@ jobs:
           export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
           export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
           pytest tests/integration_tests/test_vllm_policy_correctness.py --durations=20 -vv
+          python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml

From 664de1cb50a9709f2f968d7e9de0517f41663768 Mon Sep 17 00:00:00 2001
From: Jiyue Wang <jiyue@MacBook-Air.local>
Date: Thu, 16 Oct 2025 09:48:14 -0400
Subject: [PATCH 6/8] add run_e2e_tests.py

---
 .../continuous_integration_test.yaml          |  10 +-
 tests/integration_tests/run_e2e_tests.py      | 129 ++++++++++++++++++
 2 files changed, 137 insertions(+), 2 deletions(-)
 create mode 100644 tests/integration_tests/run_e2e_tests.py

diff --git a/.github/workflows/continuous_integration_test.yaml b/.github/workflows/continuous_integration_test.yaml
index 6f1a053fb..34469710e 100644
--- a/.github/workflows/continuous_integration_test.yaml
+++ b/.github/workflows/continuous_integration_test.yaml
@@ -55,9 +55,15 @@ jobs:
           python -m pip install git+https://github.com/meta-pytorch/torchstore.git
       - name: Install dependencies
         run: python -m pip install --no-build-isolation -e ".[dev]"
-      - name: Run integration tests
+      - name: Run vLLM policy correctness tests
         run: |
           export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
           export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
           pytest tests/integration_tests/test_vllm_policy_correctness.py --durations=20 -vv
-          python -m apps.grpo.main --config apps/grpo/qwen3_1_7b.yaml
+        timeout-minutes: 20
+      - name: Run e2e GRPO training test
+        run: |
+          export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
+          export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
+          python tests/integration_tests/run_e2e_tests.py
+        timeout-minutes: 30
diff --git a/tests/integration_tests/run_e2e_tests.py b/tests/integration_tests/run_e2e_tests.py
new file mode 100644
index 000000000..baf8dcc1b
--- /dev/null
+++ b/tests/integration_tests/run_e2e_tests.py
@@ -0,0 +1,129 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+"""
+End-to-end integration test runner for Forge applications.
+
+This test runner validates that training can run without crashes or exceptions.
+Similar to TorchTitan's integration test approach, we focus on functional
+correctness (no crashes) rather than numerical validation.
+
+Usage:
+    python tests/integration_tests/run_e2e_tests.py
+"""
+
+import subprocess
+import sys
+import time
+from pathlib import Path
+
+
+def run_grpo_test(
+    config_path: str,
+    max_steps: int = 5,
+    timeout: int = 1800,
+    extra_args: list[str] | None = None,
+) -> subprocess.CompletedProcess:
+    """
+    Run GRPO training and verify it completes without crashes.
+
+    Args:
+        config_path: Path to YAML config file
+        max_steps: Number of training steps to run
+        timeout: Maximum time in seconds to wait
+        extra_args: Additional CLI arguments to pass
+
+    Returns:
+        CompletedProcess object with stdout/stderr
+
+    Raises:
+        Exception: If training fails with non-zero exit code
+    """
+    cmd = [
+        sys.executable,
+        "-m",
+        "apps.grpo.main",
+        "--config",
+        config_path,
+        "--trainer.training.steps",
+        str(max_steps),
+        # Disable WandB for CI to avoid auth issues - only use console logging
+        "--metric_logging",
+        '{"console": {"reduce_across_ranks": true}}',
+    ]
+
+    if extra_args:
+        cmd.extend(extra_args)
+
+    print(f"Running e2e test: {config_path}")
+    print(f"Command: {' '.join(cmd)}")
+    print(f"Max steps: {max_steps}, Timeout: {timeout}s")
+    print("-" * 80)
+
+    start_time = time.time()
+
+    try:
+        result = subprocess.run(
+            cmd,
+            timeout=timeout,
+            capture_output=True,
+            text=True,
+        )
+    except subprocess.TimeoutExpired:
+        elapsed = time.time() - start_time
+        raise Exception(
+            f"Training timed out after {elapsed:.1f}s (timeout={timeout}s)"
+        )
+
+    elapsed = time.time() - start_time
+
+    # Print output for debugging
+    if result.stdout:
+        print("STDOUT:")
+        print(result.stdout[-2000:])  # Print last 2000 chars to avoid overwhelming logs
+
+    if result.stderr:
+        print("\nSTDERR:")
+        print(result.stderr[-2000:])
+
+    print("-" * 80)
+
+    # Check for success
+    if result.returncode != 0:
+        raise Exception(
+            f"Training failed with return code {result.returncode} after {elapsed:.1f}s"
+        )
+
+    print(f"✓ Training completed successfully in {elapsed:.1f}s")
+    return result
+
+
+def main():
+    """Run all e2e tests."""
+    print("=" * 80)
+    print("Forge E2E Integration Tests")
+    print("=" * 80)
+
+    # Test 1: GRPO with smallest model
+    test_config = "apps/grpo/qwen3_1_7b.yaml"
+
+    if not Path(test_config).exists():
+        raise FileNotFoundError(f"Config file not found: {test_config}")
+
+    try:
+        run_grpo_test(test_config, max_steps=5, timeout=1800)
+        print("\n" + "=" * 80)
+        print("✓ All e2e tests passed!")
+        print("=" * 80)
+    except Exception as e:
+        print("\n" + "=" * 80)
+        print(f"✗ E2E test failed: {e}")
+        print("=" * 80)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()

From 86a1ca7f9088dcecf61d90c75da0d607a0ba0536 Mon Sep 17 00:00:00 2001
From: Jiyue Wang <jiyue@MacBook-Air.local>
Date: Thu, 16 Oct 2025 09:55:58 -0400
Subject: [PATCH 7/8] more changes

---
 .../continuous_integration_test.yaml          |  6 ++--
 .../{run_e2e_tests.py => test_grpo_e2e.py}    | 28 +++++++++----------
 2 files changed, 17 insertions(+), 17 deletions(-)
 rename tests/integration_tests/{run_e2e_tests.py => test_grpo_e2e.py} (77%)

diff --git a/.github/workflows/continuous_integration_test.yaml b/.github/workflows/continuous_integration_test.yaml
index 34469710e..e27d197f9 100644
--- a/.github/workflows/continuous_integration_test.yaml
+++ b/.github/workflows/continuous_integration_test.yaml
@@ -61,9 +61,9 @@ jobs:
           export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
           pytest tests/integration_tests/test_vllm_policy_correctness.py --durations=20 -vv
         timeout-minutes: 20
-      - name: Run e2e GRPO training test
+      - name: Run GRPO e2e test
         run: |
           export LD_PRELOAD=$CONDA/envs/test/lib/libpython3.10.so.1.0
           export LD_LIBRARY_PATH=$CONDA/envs/test/lib/libpython3.10.so.1.0
-          python tests/integration_tests/run_e2e_tests.py
-        timeout-minutes: 30
+          python tests/integration_tests/test_grpo_e2e.py
+        timeout-minutes: 20
diff --git a/tests/integration_tests/run_e2e_tests.py b/tests/integration_tests/test_grpo_e2e.py
similarity index 77%
rename from tests/integration_tests/run_e2e_tests.py
rename to tests/integration_tests/test_grpo_e2e.py
index baf8dcc1b..97d7f1827 100644
--- a/tests/integration_tests/run_e2e_tests.py
+++ b/tests/integration_tests/test_grpo_e2e.py
@@ -5,14 +5,14 @@
 # LICENSE file in the root directory of this source tree.
 
 """
-End-to-end integration test runner for Forge applications.
+End-to-end integration test for GRPO training.
 
-This test runner validates that training can run without crashes or exceptions.
+This test validates that GRPO training can run without crashes or exceptions.
 Similar to TorchTitan's integration test approach, we focus on functional
 correctness (no crashes) rather than numerical validation.
 
 Usage:
-    python tests/integration_tests/run_e2e_tests.py
+    python tests/integration_tests/test_grpo_e2e.py
 """
 
 import subprocess
@@ -21,7 +21,7 @@
 from pathlib import Path
 
 
-def run_grpo_test(
+def run_grpo_training(
     config_path: str,
     max_steps: int = 5,
     timeout: int = 1800,
@@ -58,7 +58,7 @@ def run_grpo_test(
     if extra_args:
         cmd.extend(extra_args)
 
-    print(f"Running e2e test: {config_path}")
+    print(f"Running GRPO e2e test: {config_path}")
     print(f"Command: {' '.join(cmd)}")
     print(f"Max steps: {max_steps}, Timeout: {timeout}s")
     print("-" * 80)
@@ -75,7 +75,7 @@ def run_grpo_test(
     except subprocess.TimeoutExpired:
         elapsed = time.time() - start_time
         raise Exception(
-            f"Training timed out after {elapsed:.1f}s (timeout={timeout}s)"
+            f"GRPO training timed out after {elapsed:.1f}s (timeout={timeout}s)"
         )
 
     elapsed = time.time() - start_time
@@ -94,33 +94,33 @@ def run_grpo_test(
     # Check for success
     if result.returncode != 0:
         raise Exception(
-            f"Training failed with return code {result.returncode} after {elapsed:.1f}s"
+            f"GRPO training failed with return code {result.returncode} after {elapsed:.1f}s"
         )
 
-    print(f"✓ Training completed successfully in {elapsed:.1f}s")
+    print(f"✓ GRPO training completed successfully in {elapsed:.1f}s")
     return result
 
 
 def main():
-    """Run all e2e tests."""
+    """Run GRPO e2e test."""
     print("=" * 80)
-    print("Forge E2E Integration Tests")
+    print("GRPO E2E Integration Test")
     print("=" * 80)
 
-    # Test 1: GRPO with smallest model
+    # Test GRPO with smallest model
     test_config = "apps/grpo/qwen3_1_7b.yaml"
 
     if not Path(test_config).exists():
         raise FileNotFoundError(f"Config file not found: {test_config}")
 
     try:
-        run_grpo_test(test_config, max_steps=5, timeout=1800)
+        run_grpo_training(test_config, max_steps=5, timeout=1800)
         print("\n" + "=" * 80)
-        print("✓ All e2e tests passed!")
+        print("✓ GRPO e2e test passed!")
         print("=" * 80)
     except Exception as e:
         print("\n" + "=" * 80)
-        print(f"✗ E2E test failed: {e}")
+        print(f"✗ GRPO e2e test failed: {e}")
         print("=" * 80)
         sys.exit(1)
 

From 65543f4e4e44f3cb8fdff4dbbf3bdd4881f96ab0 Mon Sep 17 00:00:00 2001
From: "Jiyue (Jennifer) Wang" <jiyue@meta.com>
Date: Thu, 16 Oct 2025 11:20:19 -0400
Subject: [PATCH 8/8] try fix the wandb issue

---
 tests/integration_tests/test_grpo_e2e.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tests/integration_tests/test_grpo_e2e.py b/tests/integration_tests/test_grpo_e2e.py
index 97d7f1827..f59b57eb5 100644
--- a/tests/integration_tests/test_grpo_e2e.py
+++ b/tests/integration_tests/test_grpo_e2e.py
@@ -48,11 +48,9 @@ def run_grpo_training(
         "apps.grpo.main",
         "--config",
         config_path,
-        "--trainer.training.steps",
-        str(max_steps),
+        f"trainer.training.steps={str(max_steps)}",
         # Disable WandB for CI to avoid auth issues - only use console logging
-        "--metric_logging",
-        '{"console": {"reduce_across_ranks": true}}',
+        "~metric_logging.wandb",
     ]
 
     if extra_args:
@@ -76,7 +74,7 @@ def run_grpo_training(
         elapsed = time.time() - start_time
         raise Exception(
             f"GRPO training timed out after {elapsed:.1f}s (timeout={timeout}s)"
-        )
+        ) from None
 
     elapsed = time.time() - start_time