[CI] Fix SDL install (#2978)

vmoens · web-flow · commit 74d6cbcbe41d · 2025-05-30T10:50:09.000+01:00
diff --git a/.github/unittest/linux/scripts/run_all.sh b/.github/unittest/linux/scripts/run_all.sh
@@ -12,11 +12,11 @@ if [[ $OSTYPE != 'darwin'* ]]; then
   apt-get install -y vim git wget cmake
 
   # Enable universe repository
-  apt-get install -y software-properties-common
-  add-apt-repository universe
-  apt-get update
+  # apt-get install -y software-properties-common
+  # add-apt-repository universe
+  # apt-get update
 
-  apt-get install -y libsdl2-dev libsdl2-2.0-0
+  # apt-get install -y libsdl2-dev libsdl2-2.0-0
 
   apt-get install -y libglfw3 libgl1-mesa-glx libosmesa6 libglew-dev
   apt-get install -y libglvnd0 libgl1 libglx0 libegl1 libgles2 xvfb
diff --git a/.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh b/.github/unittest/linux_olddeps/scripts_gym_0_13/run_test.sh
@@ -28,10 +28,13 @@ python .github/unittest/helpers/coverage_run_parallel.py -m pytest test/smoke_te
 export DISPLAY=:99
 Xvfb :99 -screen 0 1400x900x24 > /dev/null 2>&1 &
 
-CKPT_BACKEND=torch MUJOCO_GL=egl python .github/unittest/helpers/coverage_run_parallel.py -m pytest --instafail -v --durations 200 --ignore test/test_distributed.py \
---ignore test/test_rlhf.py \
---ignore test/llm \
---timeout=120 --mp_fork_if_no_cuda
+CKPT_BACKEND=torch MUJOCO_GL=egl python .github/unittest/helpers/coverage_run_parallel.py -m pytest \
+    --instafail -v \
+    --durations 200 \
+    --ignore test/test_distributed.py \
+    --ignore test/test_rlhf.py \
+    --ignore test/llm \
+    --mp_fork_if_no_cuda
 
 #pytest --instafail -v --durations 200
 #python test/test_libs.py
diff --git a/test/test_specs.py b/test/test_specs.py
@@ -2537,8 +2537,8 @@ def test_stack_choice(self, input_type, shape, stack_dim):
                 choices = [NonTensorData("a"), NonTensorData("b"), NonTensorData("c")]
             else:
                 choices = [
-                    NonTensorStack("a").expand(shape + (1,)).squeeze(-1),
-                    NonTensorStack("d").expand(shape + (1,)).squeeze(-1),
+                    NonTensorData("a", batch_size=shape),
+                    NonTensorData("d", batch_size=shape),
                 ]
 
         spec0 = Choice(choices)
diff --git a/test/test_transforms.py b/test/test_transforms.py
@@ -11616,7 +11616,7 @@ def _make_transform_env(self, out_key, base_env):
         transform = KLRewardTransform(actor, out_keys=out_key)
         return Compose(
             TensorDictPrimer(
-                sample_log_prob=Unbounded(shape=base_env.action_spec.shape[:-1]),
+                action_log_prob=Unbounded(shape=base_env.action_spec.shape[:-1]),
                 shape=base_env.shape,
             ),
             transform,
@@ -11640,7 +11640,7 @@ def test_transform_no_env(self, in_key, out_key):
             {
                 "action": torch.randn(*batch, 7),
                 "observation": torch.randn(*batch, 7),
-                "sample_log_prob": torch.randn(*batch),
+                "action_log_prob": torch.randn(*batch),
             },
             batch,
         )
@@ -11658,7 +11658,7 @@ def test_transform_compose(self):
                 "action": torch.randn(*batch, 7),
                 "observation": torch.randn(*batch, 7),
                 "next": {t[0].in_keys[0]: torch.zeros(*batch, 1)},
-                "sample_log_prob": torch.randn(*batch),
+                "action_log_prob": torch.randn(*batch),
             },
             batch,
         )
@@ -11678,7 +11678,7 @@ def test_transform_env(self, out_key):
         base_env = self.envclass()
         torch.manual_seed(0)
         actor = self._make_actor()
-        # we need to patch the env and create a sample_log_prob spec to make check_env_specs happy
+        # we need to patch the env and create a action_log_prob spec to make check_env_specs happy
         env = TransformedEnv(
             base_env,
             Compose(
@@ -11711,7 +11711,7 @@ def update(x):
     @pytest.mark.parametrize("out_key", [None, "some_stuff", ["some_stuff"]])
     def test_single_trans_env_check(self, out_key):
         base_env = self.envclass()
-        # we need to patch the env and create a sample_log_prob spec to make check_env_specs happy
+        # we need to patch the env and create a action_log_prob spec to make check_env_specs happy
         env = TransformedEnv(base_env, self._make_transform_env(out_key, base_env))
         check_env_specs(env)
 
@@ -11776,7 +11776,7 @@ def test_transform_model(self):
                 "action": torch.randn(*batch, 7),
                 "observation": torch.randn(*batch, 7),
                 "next": {t.in_keys[0]: torch.zeros(*batch, 1)},
-                "sample_log_prob": torch.randn(*batch),
+                "action_log_prob": torch.randn(*batch),
             },
             batch,
         )
@@ -11796,7 +11796,7 @@ def test_transform_rb(self, rbclass):
                 "action": torch.randn(*batch, 7),
                 "observation": torch.randn(*batch, 7),
                 "next": {t.in_keys[0]: torch.zeros(*batch, 1)},
-                "sample_log_prob": torch.randn(*batch),
+                "action_log_prob": torch.randn(*batch),
             },
             batch,
         )
diff --git a/torchrl/data/tensor_specs.py b/torchrl/data/tensor_specs.py
@@ -6489,7 +6489,10 @@ def _stack_specs(list_of_spec, dim=0, out=None):
             if dim < 0:
                 dim += len(shape) + 1
             shape.insert(dim, len(list_of_spec))
-            return spec0.clone().unsqueeze(dim).expand(shape)
+            spec0 = spec0.clone()
+            spec0 = spec0.unsqueeze(dim)
+            spec0 = spec0.expand(shape)
+            return spec0
         return Stacked(*list_of_spec, dim=dim)
     else:
         raise NotImplementedError
diff --git a/torchrl/envs/transforms/llm.py b/torchrl/envs/transforms/llm.py
@@ -232,6 +232,10 @@ def _step(
         reward_key = self.in_keys[0]
         reward = next_tensordict.get(reward_key)
         curr_log_prob = tensordict.get(self.sample_log_prob_key)
+        if curr_log_prob is None:
+            raise KeyError(
+                f"log_prob key {self.sample_log_prob_key} not found in tensordict with keys {list(tensordict.keys(True))}"
+            )
         log_prob = log_prob.to(curr_log_prob.device)
         # We want the log-probs to have a similar dim to the reward
         curr_log_prob = curr_log_prob.unsqueeze(-1)