Copybara import of the project:

wang2yn84 · The tunix Authors · commit 72d00a897e51 · 2025-10-18T09:21:21.000-07:00
-- a407ff2 by Lance Wang <lancewang@google.com>: Add vLLM to dependency list since it's OSS-ed. COPYBARA_INTEGRATE_REVIEW=#582 from google:lance-add-vllm a407ff2 PiperOrigin-RevId: 821078416
diff --git a/.github/workflows/tpu-tests.yml b/.github/workflows/tpu-tests.yml
@@ -29,8 +29,12 @@ concurrency:
   group: ${{ github.event_name == 'pull_request' && format('{0}-pr-{1}', github.workflow, github.event.pull_request.number) || github.event_name == 'schedule' && format('{0}-schedule', github.workflow) || github.run_id }}
   cancel-in-progress: true
 
+env:
+  HF_HOME: ~/.cache/huggingface
+  HF_HUB_ENABLE_HF_TRANSFER: "1"
+
 jobs:
-  run:
+  run_prod:
     runs-on: [linux-x86-ct5lp-224-8tpu]
     environment: testing
     container:
@@ -40,15 +44,25 @@ jobs:
         CLOUD_TPU_ACCELERATOR: v5e-8
         JAX_PLATFORMS: tpu
     steps:
+
+    # Cache Hugging Face hub
+    - name: Cache HF hub
+      uses: actions/cache@v4
+      with:
+        path: ~/.cache/huggingface
+        key: hf-${{ runner.os }}-${{ hashFiles('pyproject.toml', 'requirements*.txt', 'constraints*.txt') }}
+        restore-keys: |
+          hf-${{ runner.os }}-
+
     - name: Checkout code
       uses: actions/checkout@v4
       with:
         fetch-depth: 0
 
     - name: Install tunix dependencies
       run: |
-        pip install -e .
-        pip install pytest pytest-xdist jinja2
+        pip install -e .[prod]
+        pip install pytest pytest-xdist
 
     - name: Verify TPU availability
       run: |
@@ -85,7 +99,6 @@ jobs:
 
     - name: Run tunix generation tests (PASSED only)
       run: |
-        # vllm_sampler_test depends on vllm TPU which is not OSS yet
         # tokenizer_adapter_test requires access to gated repo
         python -m pytest tests/generate/ -v --tb=short \
           --ignore=tests/generate/vllm_sampler_test.py \
@@ -94,7 +107,7 @@ jobs:
     - name: Run tunix SFT tests
       run: |
         python -m pytest tests/sft/ -v --tb=short
-    
+
     - name: Run tunix SFT integration tests
       env:
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -115,28 +128,28 @@ jobs:
       env:
         HF_TOKEN: ${{ secrets.HF_TOKEN }}
       run: |
-        
+
         # Download GSM8K dataset
         mkdir -p /tmp/grpo_test/rl/grpo/data
         python3 -c "
         from datasets import load_dataset
         import json
-        
+
         # Download and save GSM8K train split
         dataset = load_dataset('openai/gsm8k', 'main', split='train')
         train_data = [{'question': item['question'], 'answer': item['answer']} for item in dataset]
         with open('/tmp/grpo_test/rl/grpo/data/gsm8k_train.json', 'w') as f:
             json.dump(train_data, f)
-        
+
         # Download and save GSM8K test split
         dataset = load_dataset('openai/gsm8k', 'main', split='test')
         test_data = [{'question': item['question'], 'answer': item['answer']} for item in dataset]
         with open('/tmp/grpo_test/rl/grpo/data/gsm8k_test.json', 'w') as f:
             json.dump(test_data, f)
-        
+
         print('GSM8K dataset downloaded successfully')
         "
-        
+
         # Run GRPO demo script with minimal configuration
         python3 scripts/grpo_demo_llama3_qwen2.py \
           --root-dir=/tmp/grpo_test \
@@ -156,3 +169,48 @@ jobs:
           exit "${code:-0}"
         fi
 
+  run_dev:
+    runs-on: [linux-x86-ct5lp-224-8tpu]
+    environment: testing
+    container:
+      image: vllm/vllm-tpu:v0.11.1
+      options: --privileged
+      env:
+        CLOUD_TPU_ACCELERATOR: v5e-8
+        JAX_PLATFORMS: tpu
+    steps:
+      # Cache Hugging Face hub
+      - name: Cache HF hub
+        uses: actions/cache@v4
+        with:
+          path: ~/.cache/huggingface
+          key: hf-${{ runner.os }}-${{ hashFiles('pyproject.toml', 'requirements*.txt', 'constraints*.txt') }}
+          restore-keys: |
+            hf-${{ runner.os }}-
+
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Setup Tunix and tpu-inference
+        run: |
+          echo "Current directory:"
+          pwd
+          pip install --upgrade pip setuptools wheel
+
+          # Install Tunix
+          pip uninstall torch torch-xla libtpu jax jaxlib -y
+          pip install -e .[dev]
+
+          # Install tpu-inference
+          pip uninstall torch libtpu jax jaxlib -y
+          pip install tpu-inference==v0.11.1 --force-reinstall
+          pip install pytest pytest-xdist
+
+      - name: Run tests
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: |
+          pytest tests/generate/vllm_sampler_test.py -v --tb=short
+
diff --git a/README.md b/README.md
@@ -83,13 +83,19 @@ pip install git+https://github.com/google/tunix
 ```
 
 3. From source (editable install) If you plan to modify the codebase and run it
-   in development mode:
+   in development mode. If you'd like to install vllm, the tpu-inference
+   supported version is not released yet, please follow the instructions to
+   install manually
+   (https://docs.vllm.ai/en/latest/getting_started/installation/google_tpu.html)
+   or download the docker image (vllm/vllm-tpu:v0.11.1) then
+   `pip install tpu-inference` for TPU backend:
 
 ```sh
 git clone https://github.com/google/tunix.git
 cd tunix
 pip install -e ".[dev]"
 
+# Then install vLLM and tpu-inference
 ```
 
 ## Getting Started
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,21 +19,23 @@ classifiers = [
 ]
 dependencies = [
   "datasets",
+  "flax>=0.11.1",
   "gcsfs",
   "grain",
   "huggingface_hub",
-  "jax[tpu]>=0.6.0,!=0.7.2", # Jax 0.7.2 has performance regression on OSS
   "jaxtyping",
+  "jinja2",  # Huggingface chat template
   "kagglehub",
-  "omegaconf",
+  "numba",
+  "omegaconf", # CLI config
+  "python-dotenv",  # Huggingface API key
   "qwix",
   "sentencepiece",
   "tensorboardX",
   "tensorflow_datasets",
   "tqdm",
   "transformers",
-  "python-dotenv",
-  "jinja2",
+  "hf_transfer",  # Huggingface caching in CI
 ]
 
 [project.optional-dependencies]
@@ -49,12 +51,10 @@ docs = [
     "sphinx_contributors",
 ]
 prod = [
-    "flax>=0.11.2",
+  "jax[tpu]>=0.6.0,!=0.7.2", # Jax 0.7.2 has performance regression on OSS
 ]
 dev = [
-    "flax>=0.11.2",
-    "numba",
-    "vllm",
+  # Manully install vLLM & tpu-inferece, which depends on jax[tpu]==0.7.2
 ]
 
 [project.urls]