env.py

```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List

import textarena as ta


class GlyphGridDuelEnv(ta.Env):
    """
    GlyphGrid Duel: a deterministic two-player abstract logic game.

    Players alternate inscribing glyphs ("X" or "O") on a 3×3 grid.
    The first to align three identical glyphs along any row, column, or diagonal wins.
    """

    VALID_ACTION_PATTERN = re.compile(r"^\[Inscribe:(1|2|3),(1|2|3)\]$")

    def __init__(self):
        """Initialize reusable attributes."""
        self.state: Optional[ta.TwoPlayerState] = None

    # -------------------------------------------------------------------------
    # Helper: Extract content inside <answer> tags
    # -------------------------------------------------------------------------
    def _extract_answer_content(self, action: str) -> str:
        """
        Extract content from <answer></answer> tags.
        If tags are missing, fallback to stripping the entire action string.
        """
        match = re.search(r"<answer>(.*?)</answer>", action, re.DOTALL | re.IGNORECASE)
        if match:
            return match.group(1).strip()
        return action.strip()

    # -------------------------------------------------------------------------
    # Reset environment
    # -------------------------------------------------------------------------
    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.

        Args:
            num_players: Number of players (must be 2 for GlyphGrid Duel).
            seed: Optional seed for deterministic behavior.

        Notes:
            • Must construct a State object and pass game_state and player_prompt_function
            • Must call self.state.reset(...)
            • Should emit initial observations if helpful
        """
        if num_players != 2:
            raise ValueError("GlyphGrid Duel requires exactly 2 players.")

        if seed is None:
            seed = random.randint(0, 99999)

        # Create a reproducible state manager
        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)

        # Determine which player starts (based on seed parity)
        starting_player_id = 0 if seed % 2 == 0 else 1
        starting_player_name = f"Player {starting_player_id + 1}"

        # Construct initial 3x3 board (each cell empty string)
        board = [["" for _ in range(3)] for _ in range(3)]

        # Define player info
        players = {
            "Player 1": {"symbol": "X", "moves_made": 0},
            "Player 2": {"symbol": "O", "moves_made": 0},
        }

        # Build game_state dictionary
        game_state: Dict[str, Any] = {
            "turn_count": 0,
            "current_player": starting_player_name,
            "seed": seed,
            "board": board,
            "players": players,
            "winner": None,
            "is_terminal": False,
            "last_action": None,
            "observation_log": [f"{starting_player_name} begins the glyph duel."],
        }

        # Initialize internal environment state
        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)

        # Initial observations
        self.state.add_observation(
            from_id=-1,
            message=f"{starting_player_name} begins the glyph duel.",
            observation_type=ta.ObservationType.GAME_MESSAGE,
        )
        self.state.add_observation(
            from_id=-1,
            message=self._render_board(board),
            observation_type=ta.ObservationType.GAME_BOARD,
        )

        return self.state.game_state

    # -------------------------------------------------------------------------
    # Board and State Helpers
    # -------------------------------------------------------------------------
    def _render_board(self, board: List[List[str]]) -> str:
        """Format the 3×3 board for display."""
        header = "   1   2   3"
        rows = []
        for i, row in enumerate(board):
            cells = [cell if cell else "." for cell in row]
            rows.append(f"{i+1}  " + " | ".join(cells))
        return f"{header}\n" + "\n".join(rows)

    def _check_winner(self, symbol: str, board: List[List[str]]) -> bool:
        """Check if the given symbol has three in a row (row, column, diagonal)."""
        # Rows and columns
        for i in range(3):
            if all(board[i][j] == symbol for j in range(3)):
                return True
            if all(board[j][i] == symbol for j in range(3)):
                return True
        # Diagonals
        if all(board[i][i] == symbol for i in range(3)):
            return True
        if all(board[i][2 - i] == symbol for i in range(3)):
            return True
        return False

    def _is_board_full(self, board: List[List[str]]) -> bool:
        """Return True if no empty cells remain."""
        return all(cell != "" for row in board for cell in row)

    # -------------------------------------------------------------------------
    # Step Action
    # -------------------------------------------------------------------------
    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.

        Args:
            action: The action text submitted by the current player.

        Returns:
            A tuple (done, info) where:
                done: True if the episode has concluded
                info: A ta.Info object with auxiliary details
        """
        player_id = self.state.current_player_id
        player_name = f"Player {player_id + 1}"

        # Log player's raw action
        self.state.add_observation(
            from_id=player_id,
            to_id=-1,
            message=action,
            observation_type=ta.ObservationType.PLAYER_ACTION,
        )

        # Extract meaningful token content
        answer_content = self._extract_answer_content(action)

        # Validate action format
        if not self.VALID_ACTION_PATTERN.match(answer_content):
            self.state.set_invalid_move(reason="Invalid action format. Must match [Inscribe:x,y].")
            return self.state.step()

        # Parse coordinates
        x_str, y_str = re.findall(r"(1|2|3)", answer_content)
        x, y = int(x_str) - 1, int(y_str) - 1

        # Access current game_state
        g = self.state.game_state
        board = g["board"]

        # Check if cell already occupied
        if board[x][y] != "":
            self.state.set_invalid_move(reason="Cell already occupied.")
            return self.state.step()

        # Check current player
        if g["current_player"] != player_name:
            self.state.set_invalid_move(reason="Not your turn.")
            return self.state.step()

        # Apply move
        symbol = g["players"][player_name]["symbol"]
        board[x][y] = symbol
        g["players"][player_name]["moves_made"] += 1
        g["turn_count"] += 1
        g["last_action"] = answer_content
        g["observation_log"].append(f"{player_name} placed at ({x+1},{y+1})")

        # Add observation for move and board update
        self.state.add_observation(
            from_id=player_id,
            message=f"{player_name} inscribed glyph '{symbol}' at ({x+1},{y+1})",
            observation_type=ta.ObservationType.GAME_MESSAGE,
        )
        self.state.add_observation(
            from_id=-1,
            message=self._render_board(board),
            observation_type=ta.ObservationType.GAME_BOARD,
        )

        # Check win condition
        if self._check_winner(symbol, board):
            g["winner"] = player_name
            g["is_terminal"] = True
            self.state.set_winner(player_id=player_id, reason=f"{player_name} aligned three glyphs and won the duel.")
            return self.state.step()

        # Check draw condition
        if self._is_board_full(board):
            g["winner"] = "Draw"
            g["is_terminal"] = True
            self.state.set_draw(reason="The grid is full. The duel ends in a draw.")
            return self.state.step()

        # Switch turns
        next_player_id = 1 - player_id
        g["current_player"] = f"Player {next_player_id + 1}"

        # End step (non-terminal)
        return self.state.step()

    # -------------------------------------------------------------------------
    # Prompt Generation
    # -------------------------------------------------------------------------
    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """
        Produce role-appropriate instructions for a player at episode start.

        Args:
            player_id: The integer ID of the player.
            game_state: The shared game state.

        Returns:
            A string prompt describing the current state, rules, and expected format.
        """
        player_name = f"Player {player_id + 1}"
        symbol = game_state["players"][player_name]["symbol"]
        board_str = self._render_board(game_state["board"])
        current_turn_name = game_state["current_player"]

        prompt = (
            f"You are {player_name}, bearer of the glyph '{symbol}', in the abstract digital arena.\n"
            "Your goal is to align three of your runes (glyphs) in a straight line—row, column, or diagonal—before your opponent does.\n\n"
            f"Current arena state:\n{board_str}\n\n"
            f"It is currently {current_turn_name}'s turn.\n"
            "On your turn, inscribe your glyph in any unoccupied cell.\n\n"
            "Action grammar (must be exact): [Inscribe:x,y]\n"
            "  - x, y ∈ {1, 2, 3}\n"
            "  - Example: [Inscribe:2,3] inscribes at row 2, column 3.\n\n"
            "Formatting rules:\n"
            "  - Put private reasoning inside <think></think>.\n"
            "  - Put your chosen action inside <answer></answer>.\n\n"
            "Example valid response:\n"
            "<think>I will take the center to prepare a diagonal line.</think>\n"
            "<answer>[Inscribe:2,2]</answer>\n\n"
            "Example invalid response:\n"
            "<think>I'll use a lowercase tag.</think>\n"
            "<answer>[inscribe:2,2]</answer>  <-- Invalid keyword\n"
        )

        return prompt
```