chore: Upgrade trl, vllm, and transformers in grpo_trl example (#1498)

charlesfrye · web-flow · commit eae2b7e7713b · 2026-02-16T16:42:15.000-08:00
- trl[vllm]: 0.19.1 -&gt; 0.28.0
- vllm: 0.9.1 -&gt; 0.12.0 (max supported by trl 0.28.0)
- transformers: 4.52.4 -&gt; 4.57
- flashinfer-python: 0.2.6.post1 -&gt; 0.5.3
diff --git a/06_gpu_and_ml/reinforcement-learning/grpo_trl.py b/06_gpu_and_ml/reinforcement-learning/grpo_trl.py
@@ -24,9 +24,9 @@
 # We define an image where we install the TRL library.
 # We also install vLLM for the next part of this example. We also use Weights & Biases for logging.
 image: modal.Image = modal.Image.debian_slim().uv_pip_install(
-    "trl[vllm]==0.19.1",
-    "vllm==0.9.1",
-    "transformers==4.52.4",
+    "trl[vllm]==0.28.0",
+    "vllm==0.12.0",
+    "transformers==4.57",
     "datasets==3.5.1",
     "wandb==0.17.6",
 )
@@ -236,8 +236,8 @@ def get_latest_checkpoint_file_path():
 vllm_image = (
     modal.Image.debian_slim(python_version="3.12")
     .uv_pip_install(
-        "vllm==0.9.1",
-        "flashinfer-python==0.2.6.post1",
+        "vllm==0.12.0",
+        "flashinfer-python==0.5.3",
         extra_index_url="https://download.pytorch.org/whl/cu128",
         extra_options="--index-strategy unsafe-best-match",
     )