Add env.py from Openverse builder

2001-01-01 00:00:00 +00:00
commit 2ce0405988
1 changed files with 240 additions and 0 deletions
--- a/env.py
+++ b/env.py
@@ -0,0 +1,240 @@
+```python
+import re
+import random
+from typing import Any, Dict, List, Optional, Tuple
+
+import textarena as ta
+
+
+class TicTacTrailEnv(ta.Env):
+    """
+    Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game.
+
+    Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid.
+    The first team to align three of their emblems horizontally, vertically, or diagonally wins.
+    """
+
+    def __init__(self, max_turns: int = 9):
+        self.max_turns = max_turns
+        # Define regex patterns for allowed actions
+        self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$")
+        self.pass_pattern = re.compile(r"^\[Pass\]$")
+        self.num_players = 2
+
+    # ----------------------------------------------------------------
+    # Helper: Extract boxed content
+    # ----------------------------------------------------------------
+    def _extract_answer_content(self, action: str) -> str:
+        """
+        Extract content from \\boxed{{}}. Returns stripped text.
+        """
+        match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
+        if not match:
+            # Try single braces fallback (\boxed{})
+            match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
+        return match.group(1).strip() if match else action.strip()
+
+    # ----------------------------------------------------------------
+    # Helper: Board display utility
+    # ----------------------------------------------------------------
+    def _board_to_str(self, board: List[List[str]]) -> str:
+        """Convert board to a readable string representation."""
+        return "\n".join([" ".join(row) for row in board])
+
+    # ----------------------------------------------------------------
+    # Helper: Compute available (empty) cells
+    # ----------------------------------------------------------------
+    def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]:
+        moves: List[List[int]] = []
+        for r in range(3):
+            for c in range(3):
+                if board[r][c] == "_":
+                    moves.append([r, c])
+        return moves
+
+    # ----------------------------------------------------------------
+    # Helper: Check for winner
+    # ----------------------------------------------------------------
+    def _check_winner(self, board: List[List[str]]) -> Optional[str]:
+        """Return 'S' or 'M' if a symbol wins, else None."""
+        lines = []
+        # Rows and cols
+        for i in range(3):
+            lines.append(board[i])
+            lines.append([board[r][i] for r in range(3)])
+        # Diagonals
+        lines.append([board[i][i] for i in range(3)])
+        lines.append([board[i][2 - i] for i in range(3)])
+
+        for line in lines:
+            if line[0] != "_" and line.count(line[0]) == 3:
+                return line[0]
+        return None
+
+    # ----------------------------------------------------------------
+    # Player Prompt Generator
+    # ----------------------------------------------------------------
+    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
+        """
+        Build instructions for a player based on the current board state.
+        """
+        team_name = "Sun" if player_id == 0 else "Moon"
+        symbol = game_state["player_symbols"][team_name]
+        board_view = self._board_to_str(game_state["board_state"])
+
+        prompt = (
+            f"You are an explorer representing Team {team_name} "
+            f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n"
+            f"Current board state:\n{board_view}\n\n"
+            "You may take one of the following actions:\n"
+            " - [Mark:<row>,<col>] to claim an unmarked tile (rows and cols 0–2)\n"
+            " - [Pass] if no unclaimed tiles remain\n\n"
+            "Victory condition: Align three of your emblems in a straight line.\n"
+            "All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n"
+            "Example valid response:\n"
+            "I should take the center stone before my rival.\n"
+            "\\boxed{{[Mark:1,1]}}\n\n"
+            "Example valid response (no moves left):\n"
+            "No moves left; I will pass.\n"
+            "\\boxed{{[Pass]}}\n"
+        )
+        return prompt
+
+    # ----------------------------------------------------------------
+    # Reset
+    # ----------------------------------------------------------------
+    def reset(self, num_players: int, seed: Optional[int] = None):
+        """
+        Resets the environment to an initial state.
+
+        Args:
+            num_players: must be 2 (Sun, Moon)
+            seed: random seed (stored but unused for determinism)
+        """
+        if num_players != 2:
+            raise ValueError("Tic-Tac-Trail requires exactly 2 players.")
+
+        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
+
+        if seed is not None:
+            random.seed(seed)
+
+        empty_board = [["_"] * 3 for _ in range(3)]
+
+        game_state: Dict[str, Any] = {
+            "seed": seed or 42,
+            "turn_count": 1,
+            "current_player": "Sun",
+            "board_state": empty_board,
+            "player_symbols": {"Sun": "S", "Moon": "M"},
+            "history": [{"player": "System", "message": "The ancient board awaits."}],
+            "winner": None,
+            "status": "ongoing",
+            "available_moves": self._get_available_moves(empty_board),
+            "scores": {"Sun": 0, "Moon": 0},
+        }
+
+        role_mapping = {0: "Sun", 1: "Moon"}
+
+        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
+
+        self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1)
+        self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD)
+        return self.state
+
+    # ----------------------------------------------------------------
+    # Step
+    # ----------------------------------------------------------------
+    def step(self, action: str) -> Tuple[bool, ta.Info]:
+        """
+        Perform a single environment step for the current player.
+
+        Args:
+            action: The action text submitted by the current player.
+
+        Returns:
+            A tuple (done, info)
+        """
+        player_id = self.state.current_player_id
+        role_names = {0: "Sun", 1: "Moon"}
+        current_team = role_names[player_id]
+        other_team = role_names[1 - player_id]
+
+        # Log player action
+        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
+
+        extracted = self._extract_answer_content(action)
+
+        # ---- Validation ----
+        if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)):
+            self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].")
+            return self.state.step()
+
+        game_state = self.state.game_state
+        board = game_state["board_state"]
+
+        if self.mark_pattern.match(extracted):
+            m = self.mark_pattern.match(extracted)
+            r, c = int(m.group(1)), int(m.group(2))
+            if r not in range(3) or c not in range(3):
+                self.state.set_invalid_move("Row or column index out of range.")
+                return self.state.step()
+            if board[r][c] != "_":
+                self.state.set_invalid_move("Chosen cell already occupied.")
+                return self.state.step()
+
+            # Apply the move
+            board[r][c] = game_state["player_symbols"][current_team]
+            game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."})
+        else:
+            # [Pass]
+            available = self._get_available_moves(board)
+            if len(available) > 0:
+                self.state.set_invalid_move("Cannot pass while moves still available.")
+                return self.state.step()
+            game_state["history"].append({"player": current_team, "message": "Passed."})
+
+        # Update game_state
+        game_state["available_moves"] = self._get_available_moves(board)
+
+        # ---- Check terminal conditions ----
+        symbol_winner = self._check_winner(board)
+        if symbol_winner:
+            winning_team = "Sun" if symbol_winner == "S" else "Moon"
+            game_state["winner"] = winning_team
+            game_state["status"] = "finished"
+            game_state["scores"][winning_team] = 1
+            game_state["scores"][other_team] = 0
+            self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!")
+            return self.state.step()
+
+        if not game_state["available_moves"]:
+            game_state["winner"] = None
+            game_state["status"] = "draw"
+            game_state["scores"]["Sun"] = 0.5
+            game_state["scores"]["Moon"] = 0.5
+            self.state.set_draw(reason="All tiles filled without a winning alignment.")
+            return self.state.step()
+
+        # If ongoing
+        game_state["turn_count"] += 1
+        game_state["current_player"] = other_team
+        game_state["status"] = "ongoing"
+        self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD)
+
+        return self.state.step()
+
+    # ----------------------------------------------------------------
+    # Observation Retrieval
+    # ----------------------------------------------------------------
+    def get_observation(self) -> Tuple[int, List]:
+        """Return (player_id, observation_list) for current player."""
+        return (self.state.current_player_id, self.state.observations)
+
+    # ----------------------------------------------------------------
+    # Close
+    # ----------------------------------------------------------------
+    def close(self) -> Tuple[Dict, Dict]:
+        """Return final rewards and game info."""
+        return self.state.rewards, self.state.game_info
+```