Initial commit from Openverse UI

2025-11-21 08:04:39 +00:00
commit 7551dfaf12
3 changed files with 469 additions and 0 deletions
--- a/env.py
+++ b/env.py
@@ -0,0 +1,229 @@
+```python
+import re
+import random
+from typing import Any, Dict, List, Optional, Tuple
+
+import textarena as ta
+
+
+class LabyrinthConquestEnv(ta.Env):
+    """
+    Environment implementation for the Labyrinth Conquest game (Stage 1 design).
+    Two-player deterministic turn-based grid navigation game.
+    """
+
+    def __init__(self, grid_size: int = 5, max_turns: int = 80):
+        self.grid_size = grid_size
+        self.max_turns = max_turns
+        self.move_pattern = re.compile(r'^\[Move: (N|S|E|W)\]$')
+        self.rotate_pattern = re.compile(r'^\[Rotate: ([0-9]+),([0-9]+),(CW|CCW)\]$')
+        self.activate_pattern = re.compile(r'^\[Activate: (Bridge|TrapDisarm|RowShift)\]$')
+
+    # === Helper to extract boxed command ======================================
+    def _extract_answer_content(self, action: str) -> str:
+        """Extract content inside \\boxed{...}. Returns stripped content string."""
+        match = re.search(r'\\boxed\{\{?([^}]*)\}?\}', action, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return action.strip()
+
+    # === Reset ===============================================================
+    def reset(self, num_players: int, seed: Optional[int] = None):
+        """
+        Resets the environment to an initial state.
+
+        Args:
+            num_players (int): Must be 2.
+            seed (Optional[int]): Optional seed for deterministic setup.
+
+        Returns:
+            Optional: self.state for chaining if needed.
+        """
+        if num_players != 2:
+            raise ValueError("Labyrinth Conquest is a two-player game.")
+
+        self.random = random.Random(seed)
+        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
+
+        size = self.grid_size
+        tiles = [["floor" for _ in range(size)] for _ in range(size)]
+
+        for i in range(size):
+            for j in range(size):
+                if (i, j) == (0, 0):
+                    tiles[i][j] = "startA"
+                elif (i, j) == (size - 1, size - 1):
+                    tiles[i][j] = "startB"
+                elif (i, j) == (size // 2, size // 2):
+                    tiles[i][j] = "relic"
+                else:
+                    r = self.random.random()
+                    if r < 0.1:
+                        tiles[i][j] = "wall"
+                    elif r < 0.2:
+                        tiles[i][j] = "trap"
+
+        all_gadgets = ["Bridge", "TrapDisarm", "RowShift"]
+        gA = self.random.sample(all_gadgets, k=2)
+        gB = self.random.sample(all_gadgets, k=2)
+
+        player_states = {
+            "A": {"position": [0, 0], "gadgets": gA, "moves_taken": 0, "distance_to_relic": self._manhattan([0, 0], [size // 2, size // 2])},
+            "B": {"position": [size - 1, size - 1], "gadgets": gB, "moves_taken": 0, "distance_to_relic": self._manhattan([size - 1, size - 1], [size // 2, size // 2])},
+        }
+
+        game_state = {
+            "grid_size": size,
+            "tiles": tiles,
+            "player_states": player_states,
+            "turn_number": 0,
+            "current_player": "A",
+            "seed": seed,
+            "action_history": [],
+            "winner": None,
+            "terminated": False,
+            "invalid_reason": None,
+            "observations": ["Game begins. Players start in opposite corners."],
+        }
+
+        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
+
+        layout_str = "\n".join(" ".join(row) for row in tiles)
+        self.state.add_observation(f"Initial labyrinth layout:\n{layout_str}", ta.ObservationType.GAME_BOARD)
+
+        return self.state
+
+    # === Step ================================================================
+    def step(self, action: str) -> Tuple[bool, ta.Info]:
+        """
+        Perform a single environment step for the current player.
+
+        Args:
+            action (str): The action text submitted by the current player.
+
+        Returns:
+            Tuple[bool, ta.Info]: done flag and info object from the state.
+        """
+        pid = self.state.current_player_id
+        player_key = "A" if pid == 0 else "B"
+        opp_key = "B" if player_key == "A" else "A"
+        game_state = self.state.game_state
+        player_state = game_state["player_states"][player_key]
+        relic_pos = [self.grid_size // 2, self.grid_size // 2]
+        tiles = game_state["tiles"]
+
+        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=pid, to_id=-1)
+        content = self._extract_answer_content(action)
+
+        # Validate action format
+        if not (self.move_pattern.match(content) or self.rotate_pattern.match(content) or self.activate_pattern.match(content)):
+            self.state.set_invalid_move(reason="Invalid action format")
+            return self.state.step()
+
+        if self.move_pattern.match(content):
+            direction = self.move_pattern.match(content).group(1)
+            new_pos = player_state["position"].copy()
+            if direction == "N":
+                new_pos[0] -= 1
+            elif direction == "S":
+                new_pos[0] += 1
+            elif direction == "E":
+                new_pos[1] += 1
+            elif direction == "W":
+                new_pos[1] -= 1
+
+            if not (0 <= new_pos[0] < self.grid_size and 0 <= new_pos[1] < self.grid_size):
+                self.state.set_invalid_move(reason="Tile out of bounds")
+                return self.state.step()
+
+            if tiles[new_pos[0]][new_pos[1]] == "wall":
+                self.state.set_invalid_move(reason="Wall blocks path")
+                return self.state.step()
+
+            player_state["position"] = new_pos
+            player_state["moves_taken"] += 1
+            action_desc = f"{player_key} moved {direction}."
+            self.state.add_observation(action_desc, ta.ObservationType.GAME_MESSAGE)
+            game_state["action_history"].append(f"{player_key}: {content}")
+            game_state["observations"].append(action_desc)
+        elif self.rotate_pattern.match(content):
+            x, y, dir_rot = self.rotate_pattern.match(content).groups()
+            x, y = int(x), int(y)
+            if not (0 <= x < self.grid_size and 0 <= y < self.grid_size):
+                self.state.set_invalid_move(reason="Tile out of bounds")
+                return self.state.step()
+            desc = f"{player_key} rotated tile ({x},{y}) {dir_rot}."
+            self.state.add_observation(desc, ta.ObservationType.GAME_MESSAGE)
+            game_state["action_history"].append(f"{player_key}: {content}")
+            game_state["observations"].append(desc)
+        elif self.activate_pattern.match(content):
+            gadget = self.activate_pattern.match(content).group(1)
+            if gadget not in player_state["gadgets"]:
+                self.state.set_invalid_move(reason="Gadget unavailable")
+                return self.state.step()
+            player_state["gadgets"].remove(gadget)
+            desc = f"{player_key} activated {gadget}."
+            self.state.add_observation(desc, ta.ObservationType.GAME_MESSAGE)
+            game_state["action_history"].append(f"{player_key}: {content}")
+            game_state["observations"].append(desc)
+
+        player_state["distance_to_relic"] = self._manhattan(player_state["position"], relic_pos)
+        game_state["turn_number"] += 1
+        game_state["current_player"] = opp_key
+
+        if self._same_pos(player_state["position"], relic_pos):
+            game_state["winner"] = player_key
+            self.state.set_winner(player_id=pid, reason=f"{player_key} reached the relic first.")
+            game_state["terminated"] = True
+            return self.state.step()
+
+        if game_state["turn_number"] >= self.max_turns:
+            dA = game_state["player_states"]["A"]["distance_to_relic"]
+            dB = game_state["player_states"]["B"]["distance_to_relic"]
+            if dA < dB:
+                self.state.set_winner(player_id=0, reason="Player A closer to the relic.")
+                game_state["winner"] = "A"
+            elif dB < dA:
+                self.state.set_winner(player_id=1, reason="Player B closer to the relic.")
+                game_state["winner"] = "B"
+            else:
+                self.state.set_draw(reason="Equal distance to the relic.")
+                game_state["winner"] = None
+            game_state["terminated"] = True
+            return self.state.step()
+
+        return self.state.step()
+
+    # === Prompt ==============================================================
+    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
+        player_key = "A" if player_id == 0 else "B"
+        player_info = game_state["player_states"][player_key]
+        relic_pos = (self.grid_size // 2, self.grid_size // 2)
+        return (
+            "You are an Explorer navigating a shifting labyrinth.\n"
+            "Your goal is to reach the Relic Tile before your opponent by issuing one of the allowed commands.\n\n"
+            "Available actions (case-sensitive):\n"
+            "- [Move: N|S|E|W] — Move one tile in a direction if no wall blocks the way.\n"
+            "- [Rotate: x,y,CW|CCW] — Rotate the tile at coordinates (x,y).\n"
+            "- [Activate: Bridge|TrapDisarm|RowShift] — Use one of your gadgets (if available).\n\n"
+            f"Current Turn: {game_state['turn_number']}\n"
+            f"You are Player {player_key}. Opponent is Player {'B' if player_key == 'A' else 'A'}.\n"
+            f"Your position: {tuple(player_info['position'])}\n"
+            f"Relic position: {relic_pos}\n"
+            f"Available gadgets: {', '.join(player_info['gadgets']) if player_info['gadgets'] else 'None'}\n\n"
+            "Respond with exactly one valid action token.\n"
+            "Put your final answer within \\boxed{{}} at the end of your response.\n\n"
+            "Example valid response:\n"
+            "I will move north to progress toward the relic.\n"
+            "\\boxed{{[Move: N]}}\n\n"
+            "Example invalid response:\n"
+            "\\boxed{{Move north}}  ← Invalid format; must include brackets and colon."
+        )
+
+    # === Utility =============================================================
+    def _manhattan(self, a: List[int], b: List[int]) -> int:
+        return abs(a[0] - b[0]) + abs(a[1] - b[1])
+
+    def _same_pos(self, a: List[int], b: List[int]) -> bool:
+        return a[0] == b[0] and a[1] == b[1]
+```