Support playing programmatically

yanfengliu · yanfengliu · commit 4ce00d4cffa9 · 2026-02-08T18:53:00.000-08:00
diff --git a/.cursor/rules/update-progress.mdc b/.cursor/rules/update-progress.mdc
@@ -0,0 +1,6 @@
+---
+alwaysApply: true
+---
+Before you do anything, check `PROGRESS.md` to check what is already done.
+After you do anything, update `PROGRESS.md` after to keep track of what is done.
+This serves as long-term context.
diff --git a/PROGRESS.md b/PROGRESS.md
@@ -0,0 +1,8 @@
+## 2026-02-08
+
+- Added Gym-like programmatic play interface in `src/env.py` with structured and numpy observations.
+- Added high-level programmatic actions in `src/mediator.py` (create/remove paths, pause/resume, step time).
+- Expanded programmatic-play tests in `test/test_env.py` for loops, invalid actions, limits, reward delivery, and observations.
+
+Tests:
+- `python -m unittest -v test.test_env`
diff --git a/README.md b/README.md
@@ -17,5 +17,19 @@ This repo uses `pygame-ce` to implement Mini Metro, a fun 2D strategic game wher
 * The number of grey circles on top of the screen is the number of availabel metro lines left.
 * Click on the colored circle at the top to cancel an established line.
 
+# Programmatic play
+Use the Gym-like environment in `src/env.py`:
+
+```
+from env import MiniMetroEnv
+
+env = MiniMetroEnv(dt_ms=16)
+obs = env.reset(seed=42)
+obs, reward, done, info = env.step(
+    {"type": "create_path", "stations": [0, 1, 2], "loop": False}
+)
+obs, reward, done, info = env.step({"type": "remove_path", "path_index": 0})
+```
+
 # Testing
 `python -m unittest -v`
diff --git a/src/env.py b/src/env.py
@@ -0,0 +1,218 @@
+import random
+from typing import Any, Dict, List, Tuple
+
+import numpy as np
+
+from mediator import Mediator
+
+
+class MiniMetroEnv:
+    def __init__(self, dt_ms: int | None = None) -> None:
+        self.dt_ms_default = dt_ms
+        self.mediator = Mediator()
+        self.last_score = self.mediator.score
+
+    def reset(self, seed: int | None = None) -> Dict[str, Any]:
+        if seed is not None:
+            random.seed(seed)
+            np.random.seed(seed)
+        self.mediator = Mediator()
+        self.last_score = self.mediator.score
+        return self.observe()
+
+    def step(
+        self, action: Dict[str, Any] | None = None, dt_ms: int | None = None
+    ) -> Tuple[Dict[str, Any], int, bool, Dict[str, Any]]:
+        if action is None:
+            action = {"type": "noop"}
+        action_ok = self.mediator.apply_action(action)
+
+        if dt_ms is None:
+            dt_ms = self.dt_ms_default
+        if dt_ms is not None:
+            self.mediator.step_time(dt_ms)
+
+        obs = self.observe()
+        reward = self.mediator.score - self.last_score
+        self.last_score = self.mediator.score
+        done = False
+        info = {"action_ok": action_ok}
+        return obs, reward, done, info
+
+    def observe(self) -> Dict[str, Any]:
+        station_id_to_index = {
+            station.id: idx for idx, station in enumerate(self.mediator.stations)
+        }
+        path_id_to_index = {
+            path.id: idx for idx, path in enumerate(self.mediator.paths)
+        }
+        metro_id_to_index = {
+            metro.id: idx for idx, metro in enumerate(self.mediator.metros)
+        }
+        passenger_id_to_index = {
+            passenger.id: idx for idx, passenger in enumerate(self.mediator.passengers)
+        }
+
+        passenger_locations: Dict[str, Tuple[str, str] | None] = {
+            passenger.id: None for passenger in self.mediator.passengers
+        }
+        for station in self.mediator.stations:
+            for passenger in station.passengers:
+                passenger_locations[passenger.id] = ("station", station.id)
+        for metro in self.mediator.metros:
+            for passenger in metro.passengers:
+                passenger_locations[passenger.id] = ("metro", metro.id)
+
+        structured = {
+            "stations": [
+                {
+                    "id": station.id,
+                    "position": (station.position.left, station.position.top),
+                    "shape_type": station.shape.type,
+                    "passenger_ids": [p.id for p in station.passengers],
+                    "passenger_count": len(station.passengers),
+                }
+                for station in self.mediator.stations
+            ],
+            "paths": [
+                {
+                    "id": path.id,
+                    "station_ids": [s.id for s in path.stations],
+                    "is_looped": path.is_looped,
+                    "color": path.color,
+                }
+                for path in self.mediator.paths
+            ],
+            "metros": [
+                {
+                    "id": metro.id,
+                    "path_id": metro.path_id,
+                    "position": (
+                        (metro.position.left, metro.position.top)
+                        if metro.position is not None
+                        else None
+                    ),
+                    "current_station_id": (
+                        metro.current_station.id if metro.current_station else None
+                    ),
+                    "passenger_ids": [p.id for p in metro.passengers],
+                }
+                for metro in self.mediator.metros
+            ],
+            "passengers": [
+                {
+                    "id": passenger.id,
+                    "destination_shape_type": passenger.destination_shape.type,
+                    "is_at_destination": passenger.is_at_destination,
+                    "location": passenger_locations[passenger.id],
+                }
+                for passenger in self.mediator.passengers
+            ],
+            "score": self.mediator.score,
+            "time_ms": self.mediator.time_ms,
+            "steps": self.mediator.steps,
+            "is_paused": self.mediator.is_paused,
+            "index": {
+                "station_id_to_index": station_id_to_index,
+                "path_id_to_index": path_id_to_index,
+                "metro_id_to_index": metro_id_to_index,
+                "passenger_id_to_index": passenger_id_to_index,
+            },
+        }
+
+        arrays = self._encode_numpy(
+            station_id_to_index,
+            path_id_to_index,
+            metro_id_to_index,
+            passenger_id_to_index,
+        )
+
+        return {"structured": structured, "arrays": arrays}
+
+    def _encode_numpy(
+        self,
+        station_id_to_index: Dict[str, int],
+        path_id_to_index: Dict[str, int],
+        metro_id_to_index: Dict[str, int],
+        passenger_id_to_index: Dict[str, int],
+    ) -> Dict[str, Any]:
+        station_positions = np.array(
+            [
+                [station.position.left, station.position.top]
+                for station in self.mediator.stations
+            ],
+            dtype=np.float32,
+        )
+        station_shape_types = np.array(
+            [int(station.shape.type.value) for station in self.mediator.stations],
+            dtype=np.int64,
+        )
+        station_passenger_counts = np.array(
+            [len(station.passengers) for station in self.mediator.stations],
+            dtype=np.int64,
+        )
+        path_station_indices = [
+            np.array(
+                [station_id_to_index[s.id] for s in path.stations], dtype=np.int64
+            )
+            for path in self.mediator.paths
+        ]
+        path_is_looped = np.array(
+            [int(path.is_looped) for path in self.mediator.paths], dtype=np.int64
+        )
+
+        metro_positions_list = [
+            [metro.position.left, metro.position.top]
+            if metro.position is not None
+            else [-1, -1]
+            for metro in self.mediator.metros
+        ]
+        if metro_positions_list:
+            metro_positions = np.array(metro_positions_list, dtype=np.float32)
+        else:
+            metro_positions = np.zeros((0, 2), dtype=np.float32)
+        metro_path_indices = np.array(
+            [
+                path_id_to_index.get(metro.path_id, -1)
+                for metro in self.mediator.metros
+            ],
+            dtype=np.int64,
+        )
+
+        passenger_destination_types = np.array(
+            [
+                int(passenger.destination_shape.type.value)
+                for passenger in self.mediator.passengers
+            ],
+            dtype=np.int64,
+        )
+        passenger_station_indices = np.full(
+            (len(self.mediator.passengers),), -1, dtype=np.int64
+        )
+        passenger_metro_indices = np.full(
+            (len(self.mediator.passengers),), -1, dtype=np.int64
+        )
+
+        for station in self.mediator.stations:
+            for passenger in station.passengers:
+                idx = passenger_id_to_index.get(passenger.id)
+                if idx is not None:
+                    passenger_station_indices[idx] = station_id_to_index[station.id]
+        for metro in self.mediator.metros:
+            for passenger in metro.passengers:
+                idx = passenger_id_to_index.get(passenger.id)
+                if idx is not None:
+                    passenger_metro_indices[idx] = metro_id_to_index[metro.id]
+
+        return {
+            "station_positions": station_positions,
+            "station_shape_types": station_shape_types,
+            "station_passenger_counts": station_passenger_counts,
+            "path_station_indices": path_station_indices,
+            "path_is_looped": path_is_looped,
+            "metro_positions": metro_positions,
+            "metro_path_indices": metro_path_indices,
+            "passenger_destination_types": passenger_destination_types,
+            "passenger_station_indices": passenger_station_indices,
+            "passenger_metro_indices": passenger_metro_indices,
+        }
diff --git a/src/mediator.py b/src/mediator.py
@@ -74,6 +74,9 @@ def __init__(self) -> None:
         self.is_paused = False
         self.score = 0
 
+    def step_time(self, dt_ms: int) -> None:
+        self.increment_time(dt_ms)
+
     def assign_paths_to_buttons(self) -> None:
         for path_button in self.path_buttons:
             path_button.remove_path()
@@ -162,6 +165,19 @@ def remove_path(self, path: Path) -> None:
         self.assign_paths_to_buttons()
         self.find_travel_plan_for_passengers()
 
+    def remove_path_by_id(self, path_id: str) -> bool:
+        for path in self.paths:
+            if path.id == path_id:
+                self.remove_path(path)
+                return True
+        return False
+
+    def remove_path_by_index(self, path_index: int) -> bool:
+        if 0 <= path_index < len(self.paths):
+            self.remove_path(self.paths[path_index])
+            return True
+        return False
+
     def start_path_on_station(self, station: Station) -> None:
         if len(self.paths) < self.num_paths:
             self.is_creating_path = True
@@ -178,6 +194,30 @@ def start_path_on_station(self, station: Station) -> None:
             self.path_being_created = path
             self.paths.append(path)
 
+    def create_path_from_station_indices(
+        self, station_indices: List[int], loop: bool = False
+    ) -> Path | None:
+        if len(station_indices) < 2 or len(self.paths) >= self.num_paths:
+            return None
+        if any(
+            idx < 0 or idx >= len(self.stations) for idx in station_indices
+        ):
+            return None
+
+        self.start_path_on_station(self.stations[station_indices[0]])
+        if not self.path_being_created:
+            return None
+
+        for idx in station_indices[1:-1]:
+            self.add_station_to_path(self.stations[idx])
+
+        if loop:
+            self.end_path_on_station(self.stations[station_indices[0]])
+        else:
+            self.end_path_on_station(self.stations[station_indices[-1]])
+
+        return self.paths[-1] if self.paths else None
+
     def add_station_to_path(self, station: Station) -> None:
         assert self.path_being_created is not None
         if self.path_being_created.stations[-1] == station:
@@ -217,6 +257,31 @@ def finish_path_creation(self) -> None:
         self.path_being_created = None
         self.assign_paths_to_buttons()
 
+    def set_paused(self, paused: bool) -> None:
+        self.is_paused = paused
+
+    def apply_action(self, action: Dict) -> bool:
+        action_type = action.get("type")
+        if action_type == "create_path":
+            stations = action.get("stations", [])
+            loop = bool(action.get("loop", False))
+            return self.create_path_from_station_indices(stations, loop) is not None
+        if action_type == "remove_path":
+            if "path_id" in action:
+                return self.remove_path_by_id(action["path_id"])
+            if "path_index" in action:
+                return self.remove_path_by_index(action["path_index"])
+            return False
+        if action_type == "pause":
+            self.set_paused(True)
+            return True
+        if action_type == "resume":
+            self.set_paused(False)
+            return True
+        if action_type == "noop" or action_type is None:
+            return True
+        return False
+
     def end_path_on_station(self, station: Station) -> None:
         assert self.path_being_created is not None
         # current station de-dupe
diff --git a/test/test_env.py b/test/test_env.py