env.py

```python
import re
from typing import Any, Dict, List, Optional, Tuple

import textarena as ta


class StellarTriadEnv(ta.Env):
    """
    Stellar Triad — Turn-based two-player abstract alignment game-themed around cosmic architecture.
    Implementation strictly follows the Stage 1 design document.
    """

    def __init__(self):
        """Initialize regex pattern and constants."""
        super().__init__()
        self.action_pattern = re.compile(r'^\[Channel:(?:[1-3])-(?:[1-3])\]$')
        self.max_turns = 9  # Maximum number of turns (one per matrix cell)
        self.state: Optional[ta.TwoPlayerState] = None

    def _extract_answer_content(self, action: str) -> str:
        """
        Extract the content within \boxed{{ ... }}. Return empty string if not found.
        """
        match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL)
        if match:
            return match.group(1).strip()
        # Also support single brace variant if it may occur
        match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL)
        if match:
            return match.group(1).strip()
        return ""

    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.

        Args:
            num_players: Number of players (must be 2).
            seed: Optional seed for deterministic starting player order.

        Returns:
            None
        """
        if num_players != 2:
            raise ValueError("Stellar Triad requires exactly 2 players.")

        # Initialize state
        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)

        # Determine starting player based on seed parity (even seed => ArchitectA starts)
        if seed is None:
            seed = 0
        starting_player = 0 if seed % 2 == 0 else 1

        matrix_state = [[None for _ in range(3)] for _ in range(3)]
        player_symbols = {"ArchitectA": "A", "ArchitectB": "B"}
        active_player = "ArchitectA" if starting_player == 0 else "ArchitectB"

        game_state = {
            "matrix_state": matrix_state,
            "player_symbols": player_symbols,
            "turn_count": 0,
            "active_player": active_player,
            "last_action": None,
            "move_history": [],
            "game_result": None,
            "winner": None,
            "draw": False,
            "seed": seed,
        }

        role_mapping = {0: "ArchitectA", 1: "ArchitectB"}

        # Initialize the underlying textarena state
        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)

        # Manually set starting player based on seed
        self.state.manually_set_current_player_id(starting_player)

        # Add initial observations
        self.state.add_observation("Welcome to Stellar Triad!", ta.ObservationType.GAME_MESSAGE)
        self.state.add_observation("A 3x3 orbital matrix awaits your channeling commands.", ta.ObservationType.GAME_MESSAGE)
        return None

    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform one environment step for the active player.

        Args:
            action: The response text from the agent containing a \boxed{{[Channel:X-Y]}} token.

        Returns:
            (done, info)
        """
        player_id = self.state.current_player_id
        player_role = "ArchitectA" if player_id == 0 else "ArchitectB"

        # Log raw action as a player action observation
        self.state.add_observation(message=action, observation_type=ta.ObservationType.PLAYER_ACTION,
                                   from_id=player_id, to_id=-1)

        extracted = self._extract_answer_content(action)
        if not extracted:
            self.state.set_invalid_move("Action missing or not boxed")
            return self.state.step()

        # Validate action format
        if not self.action_pattern.match(extracted):
            self.state.set_invalid_move("Malformed token: does not match [Channel:X-Y] pattern")
            return self.state.step()

        # Parse coordinates
        try:
            coords = extracted.strip("[]").split(":")[1].split("-")
            x = int(coords[0]) - 1  # columns 1–3; convert to 0–2
            y = int(coords[1]) - 1  # rows 1–3; convert to 0–2
        except Exception:
            self.state.set_invalid_move("Malformed token: cannot parse coordinates")
            return self.state.step()

        # Validate range
        if not (0 <= x <= 2 and 0 <= y <= 2):
            self.state.set_invalid_move("Coordinates out of range")
            return self.state.step()

        # Validate occupancy
        matrix_state = self.state.game_state["matrix_state"]
        if matrix_state[y][x] is not None:
            self.state.set_invalid_move("Target cell occupied")
            return self.state.step()

        # Apply move
        symbol = self.state.game_state["player_symbols"][player_role]
        matrix_state[y][x] = symbol

        # Update state-based info
        self.state.game_state["matrix_state"] = matrix_state
        self.state.game_state["last_action"] = extracted
        self.state.game_state["turn_count"] += 1
        self.state.game_state["move_history"].append(f"{player_role}:{extracted}")

        # Check for win or draw
        if self._check_alignment(matrix_state, symbol):
            winner_id = 0 if player_role == "ArchitectA" else 1
            self.state.game_state["game_result"] = f"{player_role}_won"
            self.state.game_state["winner"] = player_role
            self.state.state_done = True if hasattr(self.state, "state_done") else None
            self.state.set_winner(winner_id, reason=f"{player_role} achieved Stellar Alignment.")
            return self.state.step()

        if self.state.game_state["turn_count"] >= 9:
            self.state.game_state["draw"] = True
            self.state.game_state["game_result"] = "Stellar_Collapse"
            self.state.set_draw(reason="Orbital grid full without alignment (Stellar Collapse).")
            return self.state.step()

        # Switch active player
        self.state.game_state["active_player"] = "ArchitectA" if player_role == "ArchitectB" else "ArchitectB"

        return self.state.step()

    def _check_alignment(self, board: List[List[Optional[str]]], symbol: str) -> bool:
        """
        Check if the given symbol has achieved a Stellar Alignment (three-in-a-line).
        """
        # Rows and columns
        for i in range(3):
            if all(cell == symbol for cell in board[i]):
                return True
            if all(board[row][i] == symbol for row in range(3)):
                return True
        # Diagonals
        if all(board[i][i] == symbol for i in range(3)):
            return True
        if all(board[i][2 - i] == symbol for i in range(3)):
            return True
        return False

    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """
        Generates the textual game prompt for each Architect player.
        """
        role = "ArchitectA" if player_id == 0 else "ArchitectB"
        symbol = game_state["player_symbols"][role]
        active_marker = " (You start.)" if game_state["active_player"] == role else ""
        matrix_str = self._format_matrix(game_state["matrix_state"])

        prompt = (
            f"You are {role}, a cosmic architect channeling energy Nodes around a dying star.\n"
            f"Your symbol: '{symbol}'.{active_marker}\n\n"
            "Below is the current 3×3 orbital matrix. Empty slots are shown as '.' :\n"
            f"{matrix_str}\n\n"
            "Your goal: Achieve a *Stellar Alignment* — three of your Nodes in any straight line (horizontal, vertical, or diagonal).\n"
            "If the matrix fills without alignment, the star collapses and both architects fail.\n\n"
            "Each turn, choose one unoccupied cell to channel your energy Node into.\n"
            "Use the exact format: [Channel:X-Y]  (columns and rows from 1 to 3).\n"
            "Example: [Channel:2-3]  → channel into column 2, row 3.\n"
            "Invalid formats include [Deploy:2-3] or [Channel:4-1].\n\n"
            "Place your chosen command within \\boxed{{}} at the end of your response.\n\n"
            "Example valid response:\n"
            "I will project energy into the lower middle conduit.\n"
            "\\boxed{{[Channel:2-3]}}\n\n"
            "Example invalid response:\n"
            "I channel energy south-east.\n"
            "\\boxed{{Channel:SE}}\n"
        )
        return prompt

    def _format_matrix(self, matrix: List[List[Optional[str]]]) -> str:
        """Return textual representation of the matrix."""
        lines = []
        for row in matrix:
            line = " ".join(cell if cell is not None else "." for cell in row)
            lines.append(line)
        return "\n".join(lines)
```