Add env.py from Openverse builder

2001-01-01 00:00:00 +00:00
commit 1e40154fa0
1 changed files with 233 additions and 0 deletions
--- a/env.py
+++ b/env.py
@@ -0,0 +1,233 @@
+```python
+import re
+from typing import Any, Dict, Optional, Tuple, List
+
+import textarena as ta
+
+
+class StarGridDuelEnv(ta.Env):
+    """
+    Implementation of the 'StarGrid Duel' game environment.
+    Deterministic two-player strategy game where navigators place energy beacons
+    on a 3x3 grid aiming to align three of their own beacons in a row, column, or diagonal.
+    """
+
+    def __init__(self, max_turns: int = 9):
+        self.max_turns = max_turns
+        # Compile regex patterns once
+        self.place_pattern = re.compile(r"^\[Place:\s*(A|B|C)(1|2|3)\]$")
+        # Cell labels in order
+        self.all_cells = [f"{r}{c}" for r in "ABC" for c in "123"]
+
+    # ------------------------ Helper Methods ------------------------
+
+    def _extract_answer_content(self, action: str) -> str:
+        """
+        Extract content inside \boxed{} for machine parsing.
+        Falls back to entire content (trimmed) if no match.
+        """
+        match = re.search(r"\\boxed\{\{([^}]*)\}\}", action)
+        if not match:  # Also support single braces in case formatting differs
+            match = re.search(r"\\boxed\{([^}]*)\}", action)
+        if match:
+            return match.group(1).strip()
+        return action.strip()
+
+    def _check_victory(self, board: Dict[str, Optional[str]], color: str) -> bool:
+        """Check all 8 winning line combinations for the specified color."""
+        lines = [
+            ["A1", "A2", "A3"],
+            ["B1", "B2", "B3"],
+            ["C1", "C2", "C3"],
+            ["A1", "B1", "C1"],
+            ["A2", "B2", "C2"],
+            ["A3", "B3", "C3"],
+            ["A1", "B2", "C3"],
+            ["A3", "B2", "C1"],
+        ]
+        for line in lines:
+            if all(board[cell] == color for cell in line):
+                return True
+        return False
+
+    def _generate_board_str(self, board: Dict[str, Optional[str]]) -> str:
+        """Render the 3x3 StarGrid as a simple text table."""
+        rows = []
+        for r in "ABC":
+            row_cells = []
+            for c in "123":
+                val = board[f"{r}{c}"]
+                if val is None:
+                    row_cells.append(f"{r}{c}")
+                else:
+                    symbol = "B" if val == "Blue" else "C"
+                    row_cells.append(symbol)
+            rows.append(" | ".join(row_cells))
+        return "\n".join(rows)
+
+    def _get_active_player_label(self, player_id: int) -> str:
+        return "Navigator Alpha" if player_id == 0 else "Navigator Beta"
+
+    def _cell_valid(self, cell: str) -> bool:
+        return cell in self.all_cells
+
+    # ------------------------ Core Env API ------------------------
+
+    def reset(self, num_players: int, seed: Optional[int] = None):
+        """
+        Resets the environment to an initial state.
+
+        Args:
+            num_players: Number of players in the game. Must be 2.
+            seed: Optional seed for determinism.
+
+        Returns:
+            None (or self.state for compatibility)
+        """
+        if num_players != 2:
+            raise ValueError("StarGrid Duel requires exactly 2 players.")
+
+        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
+
+        empty_board = {cell: None for cell in self.all_cells}
+        game_state: Dict[str, Any] = {
+            "turn_index": 0,
+            "active_player": "A",
+            "board": empty_board,
+            "player_symbols": {"A": "Blue", "B": "Crimson"},
+            "move_history": [],
+            "winner": None,
+            "is_draw": False,
+            "observations": {"A": "", "B": ""},
+            "seed": seed,
+        }
+
+        role_mapping = {0: "Navigator Alpha", 1: "Navigator Beta"}
+
+        # Initialize internal game state
+        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
+
+        # Onboarding observations
+        onboarding_msg = (
+            "Welcome to StarGrid Duel!\n"
+            "Two navigators will alternately place energy beacons on the 3×3 StarGrid.\n"
+            "Your mission is to align three of your beacons in a line before your rival."
+        )
+        self.state.add_observation(onboarding_msg, ta.ObservationType.GAME_MESSAGE)
+
+        board_msg = self._generate_board_str(empty_board)
+        self.state.add_observation(board_msg, ta.ObservationType.GAME_BOARD)
+
+        return self.state
+
+    def step(self, action: str) -> Tuple[bool, ta.Info]:
+        """
+        Perform a single environment step for the current player.
+
+        Args:
+            action: The action text submitted by the current player.
+
+        Returns:
+            A tuple (done, info) where:
+                done: True if the episode has concluded
+                info: A ta.Info object with auxiliary details
+        """
+        player_id = self.state.current_player_id
+        player_key = "A" if player_id == 0 else "B"
+        player_color = self.state.game_state["player_symbols"][player_key]
+
+        # 1. Log the raw player action
+        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
+
+        # 2. Extract the boxed content
+        extracted = self._extract_answer_content(action)
+
+        # 3. Validate the action pattern
+        match = self.place_pattern.match(extracted)
+        if not match:
+            self.state.set_invalid_move(reason="MalformedAction: The action must strictly follow '[Place: <cell_id>]' format.")
+            return self.state.step()
+
+        cell_id = f"{match.group(1)}{match.group(2)}"
+
+        if not self._cell_valid(cell_id):
+            self.state.set_invalid_move(reason=f"CellOutOfRange: {cell_id} is not a valid StarGrid coordinate.")
+            return self.state.step()
+
+        board = self.state.game_state["board"]
+        if board[cell_id] is not None:
+            self.state.set_invalid_move(reason=f"CellOccupied: {cell_id} is already occupied.")
+            return self.state.step()
+
+        # 4. Execute valid action: place beacon
+        board[cell_id] = player_color
+        self.state.game_state["board"] = board
+
+        # Record move
+        self.state.game_state["move_history"].append(
+            {"player": player_key, "action": extracted}
+        )
+
+        # Increment turn index and rotate active player (unless terminal)
+        self.state.game_state["turn_index"] += 1
+
+        # 5. Check for victory
+        if self._check_victory(board, player_color):
+            self.state.game_state["winner"] = player_key
+            winner_str = self._get_active_player_label(player_id)
+            self.state.set_winner(player_id=player_id, reason=f"{winner_str} aligned three energy beacons in a line.")
+            board_str = self._generate_board_str(board)
+            self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
+            return self.state.step()
+
+        # 6. Check for draw (grid filled, no winner)
+        if all(v is not None for v in board.values()):
+            self.state.game_state["is_draw"] = True
+            self.state.set_draw(reason="All cells filled with no aligned beacons—a balanced standoff.")
+            board_str = self._generate_board_str(board)
+            self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
+            return self.state.step()
+
+        # 7. Update board observation for next player
+        board_str = self._generate_board_str(board)
+        self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
+
+        # 8. Rotate turn
+        self.state.game_state["active_player"] = "B" if player_key == "A" else "A"
+
+        # Proceed to next step
+        return self.state.step()
+
+    # -------------------- Player Prompt Generation --------------------
+
+    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
+        """
+        Generate a detailed prompt for each player at the start or on every turn.
+        """
+        role = "Navigator Alpha" if player_id == 0 else "Navigator Beta"
+        color = game_state["player_symbols"]["A" if player_id == 0 else "B"]
+        active_pid = game_state["active_player"]
+        active_label = "Navigator Alpha" if active_pid == "A" else "Navigator Beta"
+
+        board_repr = self._generate_board_str(game_state["board"])
+        open_cells = [cell for cell, val in game_state["board"].items() if val is None]
+        allowed_actions = [f"[Place: {cell}]" for cell in open_cells]
+
+        prompt = (
+            f"You are {role}, commanding the {color} energy.\n"
+            "Your goal: deploy energy beacons across the StarGrid to align three of your own in a straight line before your opponent.\n\n"
+            f"Current Board:\n{board_repr}\n\n"
+            f"Your Color: {color}\nActive Navigator: {active_label}\n\n"
+            f"Allowed Actions:\nFormat: [Place: <cell_id>]\nAvailable cells: {', '.join(open_cells) if open_cells else 'None'}\n\n"
+            "Response Format:\n"
+            "You may describe your reasoning, then finalize your move as:\n\n"
+            "Example valid response:\n"
+            "I will claim the center of the grid to control diagonals.\n"
+            "\\boxed{{[Place: B2]}}\n\n"
+            "Example invalid response:\n"
+            "I think I'll move now.\n"
+            "\\boxed{{[Move: B2]}}\n\n"
+            "Put your final answer within \\boxed{{}} at the end of your response."
+        )
+        return prompt
+```