env.py

```python
import re
import random
from typing import Any, Dict, List, Optional, Tuple

import textarena as ta


class TicTacTrailEnv(ta.Env):
    """
    Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game.

    Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid.
    The first team to align three of their emblems horizontally, vertically, or diagonally wins.
    """

    def __init__(self, max_turns: int = 9):
        self.max_turns = max_turns
        # Define regex patterns for allowed actions
        self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$")
        self.pass_pattern = re.compile(r"^\[Pass\]$")
        self.num_players = 2

    # ----------------------------------------------------------------
    # Helper: Extract boxed content
    # ----------------------------------------------------------------
    def _extract_answer_content(self, action: str) -> str:
        """
        Extract content from \\boxed{{}}. Returns stripped text.
        """
        match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
        if not match:
            # Try single braces fallback (\boxed{})
            match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
        return match.group(1).strip() if match else action.strip()

    # ----------------------------------------------------------------
    # Helper: Board display utility
    # ----------------------------------------------------------------
    def _board_to_str(self, board: List[List[str]]) -> str:
        """Convert board to a readable string representation."""
        return "\n".join([" ".join(row) for row in board])

    # ----------------------------------------------------------------
    # Helper: Compute available (empty) cells
    # ----------------------------------------------------------------
    def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]:
        moves: List[List[int]] = []
        for r in range(3):
            for c in range(3):
                if board[r][c] == "_":
                    moves.append([r, c])
        return moves

    # ----------------------------------------------------------------
    # Helper: Check for winner
    # ----------------------------------------------------------------
    def _check_winner(self, board: List[List[str]]) -> Optional[str]:
        """Return 'S' or 'M' if a symbol wins, else None."""
        lines = []
        # Rows and cols
        for i in range(3):
            lines.append(board[i])
            lines.append([board[r][i] for r in range(3)])
        # Diagonals
        lines.append([board[i][i] for i in range(3)])
        lines.append([board[i][2 - i] for i in range(3)])

        for line in lines:
            if line[0] != "_" and line.count(line[0]) == 3:
                return line[0]
        return None

    # ----------------------------------------------------------------
    # Player Prompt Generator
    # ----------------------------------------------------------------
    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """
        Build instructions for a player based on the current board state.
        """
        team_name = "Sun" if player_id == 0 else "Moon"
        symbol = game_state["player_symbols"][team_name]
        board_view = self._board_to_str(game_state["board_state"])

        prompt = (
            f"You are an explorer representing Team {team_name} "
            f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n"
            f"Current board state:\n{board_view}\n\n"
            "You may take one of the following actions:\n"
            " - [Mark:<row>,<col>] to claim an unmarked tile (rows and cols 0–2)\n"
            " - [Pass] if no unclaimed tiles remain\n\n"
            "Victory condition: Align three of your emblems in a straight line.\n"
            "All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n"
            "Example valid response:\n"
            "I should take the center stone before my rival.\n"
            "\\boxed{{[Mark:1,1]}}\n\n"
            "Example valid response (no moves left):\n"
            "No moves left; I will pass.\n"
            "\\boxed{{[Pass]}}\n"
        )
        return prompt

    # ----------------------------------------------------------------
    # Reset
    # ----------------------------------------------------------------
    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.

        Args:
            num_players: must be 2 (Sun, Moon)
            seed: random seed (stored but unused for determinism)
        """
        if num_players != 2:
            raise ValueError("Tic-Tac-Trail requires exactly 2 players.")

        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)

        if seed is not None:
            random.seed(seed)

        empty_board = [["_"] * 3 for _ in range(3)]

        game_state: Dict[str, Any] = {
            "seed": seed or 42,
            "turn_count": 1,
            "current_player": "Sun",
            "board_state": empty_board,
            "player_symbols": {"Sun": "S", "Moon": "M"},
            "history": [{"player": "System", "message": "The ancient board awaits."}],
            "winner": None,
            "status": "ongoing",
            "available_moves": self._get_available_moves(empty_board),
            "scores": {"Sun": 0, "Moon": 0},
        }

        role_mapping = {0: "Sun", 1: "Moon"}

        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)

        self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1)
        self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD)
        return self.state

    # ----------------------------------------------------------------
    # Step
    # ----------------------------------------------------------------
    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.

        Args:
            action: The action text submitted by the current player.

        Returns:
            A tuple (done, info)
        """
        player_id = self.state.current_player_id
        role_names = {0: "Sun", 1: "Moon"}
        current_team = role_names[player_id]
        other_team = role_names[1 - player_id]

        # Log player action
        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)

        extracted = self._extract_answer_content(action)

        # ---- Validation ----
        if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)):
            self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].")
            return self.state.step()

        game_state = self.state.game_state
        board = game_state["board_state"]

        if self.mark_pattern.match(extracted):
            m = self.mark_pattern.match(extracted)
            r, c = int(m.group(1)), int(m.group(2))
            if r not in range(3) or c not in range(3):
                self.state.set_invalid_move("Row or column index out of range.")
                return self.state.step()
            if board[r][c] != "_":
                self.state.set_invalid_move("Chosen cell already occupied.")
                return self.state.step()

            # Apply the move
            board[r][c] = game_state["player_symbols"][current_team]
            game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."})
        else:
            # [Pass]
            available = self._get_available_moves(board)
            if len(available) > 0:
                self.state.set_invalid_move("Cannot pass while moves still available.")
                return self.state.step()
            game_state["history"].append({"player": current_team, "message": "Passed."})

        # Update game_state
        game_state["available_moves"] = self._get_available_moves(board)

        # ---- Check terminal conditions ----
        symbol_winner = self._check_winner(board)
        if symbol_winner:
            winning_team = "Sun" if symbol_winner == "S" else "Moon"
            game_state["winner"] = winning_team
            game_state["status"] = "finished"
            game_state["scores"][winning_team] = 1
            game_state["scores"][other_team] = 0
            self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!")
            return self.state.step()

        if not game_state["available_moves"]:
            game_state["winner"] = None
            game_state["status"] = "draw"
            game_state["scores"]["Sun"] = 0.5
            game_state["scores"]["Moon"] = 0.5
            self.state.set_draw(reason="All tiles filled without a winning alignment.")
            return self.state.step()

        # If ongoing
        game_state["turn_count"] += 1
        game_state["current_player"] = other_team
        game_state["status"] = "ongoing"
        self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD)

        return self.state.step()

    # ----------------------------------------------------------------
    # Observation Retrieval
    # ----------------------------------------------------------------
    def get_observation(self) -> Tuple[int, List]:
        """Return (player_id, observation_list) for current player."""
        return (self.state.current_player_id, self.state.observations)

    # ----------------------------------------------------------------
    # Close
    # ----------------------------------------------------------------
    def close(self) -> Tuple[Dict, Dict]:
        """Return final rewards and game info."""
        return self.state.rewards, self.state.game_info
```