add optional action for gym for initial step

gautierdag · gautierdag · commit 7605373f7e42 · 2025-03-24T18:50:22.000Z
diff --git a/plancraft/simple.py b/plancraft/simple.py
@@ -1,6 +1,6 @@
 import json
 import os
-from typing import Any
+from typing import Any, Optional
 
 from plancraft.config import PlancraftExample
 from plancraft.environment.actions import (
@@ -86,7 +86,7 @@ def parse_raw_model_response(self, generated_text: str) -> str:
         return f"Only select actions from the following: {', '.join(action_names)}"
 
     def step(
-        self, action: str
+        self, action: Optional[str] = None
     ) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
         """
         Execute action and return next observation, reward, termination status, truncation status, and info
@@ -102,6 +102,19 @@ def step(
             truncated: Whether the episode is done due to external limits (e.g. max steps reached)
             info: Additional diagnostic information (helpful for debugging)
         """
+        # Handle initial step
+        if not action:
+            observation = self.environment.step()
+            observation["target"] = self.example.target
+            if self.use_text_inventory:
+                text = target_and_inventory_to_text_obs(
+                    target=self.example.target, inventory=observation["inventory"]
+                )
+            else:
+                text = get_objective_str(self.example.target)
+            observation["text"] = text
+            return observation, 0.0, False, False, {"steps": self.current_step}
+
         action = self.parse_raw_model_response(action)
         self.current_step += 1
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "plancraft"
-version = "0.4.0"
+version = "0.4.1"
 description = "Plancraft: an evaluation dataset for planning with LLM agents"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/uv.lock b/uv.lock