blahblahblah/env.py

```python
import re
from typing import Any, Dict, Optional, Tuple, List

import textarena as ta


class StarGridDuelEnv(ta.Env):
    """
    Implementation of the 'StarGrid Duel' game environment.
    Deterministic two-player strategy game where navigators place energy beacons
    on a 3x3 grid aiming to align three of their own beacons in a row, column, or diagonal.
    """

    def __init__(self, max_turns: int = 9):
        self.max_turns = max_turns
        # Compile regex patterns once
        self.place_pattern = re.compile(r"^\[Place:\s*(A|B|C)(1|2|3)\]$")
        # Cell labels in order
        self.all_cells = [f"{r}{c}" for r in "ABC" for c in "123"]

    # ------------------------ Helper Methods ------------------------

    def _extract_answer_content(self, action: str) -> str:
        """
        Extract content inside \boxed{} for machine parsing.
        Falls back to entire content (trimmed) if no match.
        """
        match = re.search(r"\\boxed\{\{([^}]*)\}\}", action)
        if not match:  # Also support single braces in case formatting differs
            match = re.search(r"\\boxed\{([^}]*)\}", action)
        if match:
            return match.group(1).strip()
        return action.strip()

    def _check_victory(self, board: Dict[str, Optional[str]], color: str) -> bool:
        """Check all 8 winning line combinations for the specified color."""
        lines = [
            ["A1", "A2", "A3"],
            ["B1", "B2", "B3"],
            ["C1", "C2", "C3"],
            ["A1", "B1", "C1"],
            ["A2", "B2", "C2"],
            ["A3", "B3", "C3"],
            ["A1", "B2", "C3"],
            ["A3", "B2", "C1"],
        ]
        for line in lines:
            if all(board[cell] == color for cell in line):
                return True
        return False

    def _generate_board_str(self, board: Dict[str, Optional[str]]) -> str:
        """Render the 3x3 StarGrid as a simple text table."""
        rows = []
        for r in "ABC":
            row_cells = []
            for c in "123":
                val = board[f"{r}{c}"]
                if val is None:
                    row_cells.append(f"{r}{c}")
                else:
                    symbol = "B" if val == "Blue" else "C"
                    row_cells.append(symbol)
            rows.append(" | ".join(row_cells))
        return "\n".join(rows)

    def _get_active_player_label(self, player_id: int) -> str:
        return "Navigator Alpha" if player_id == 0 else "Navigator Beta"

    def _cell_valid(self, cell: str) -> bool:
        return cell in self.all_cells

    # ------------------------ Core Env API ------------------------

    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.

        Args:
            num_players: Number of players in the game. Must be 2.
            seed: Optional seed for determinism.

        Returns:
            None (or self.state for compatibility)
        """
        if num_players != 2:
            raise ValueError("StarGrid Duel requires exactly 2 players.")

        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)

        empty_board = {cell: None for cell in self.all_cells}
        game_state: Dict[str, Any] = {
            "turn_index": 0,
            "active_player": "A",
            "board": empty_board,
            "player_symbols": {"A": "Blue", "B": "Crimson"},
            "move_history": [],
            "winner": None,
            "is_draw": False,
            "observations": {"A": "", "B": ""},
            "seed": seed,
        }

        role_mapping = {0: "Navigator Alpha", 1: "Navigator Beta"}

        # Initialize internal game state
        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)

        # Onboarding observations
        onboarding_msg = (
            "Welcome to StarGrid Duel!\n"
            "Two navigators will alternately place energy beacons on the 3×3 StarGrid.\n"
            "Your mission is to align three of your beacons in a line before your rival."
        )
        self.state.add_observation(onboarding_msg, ta.ObservationType.GAME_MESSAGE)

        board_msg = self._generate_board_str(empty_board)
        self.state.add_observation(board_msg, ta.ObservationType.GAME_BOARD)

        return self.state

    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.

        Args:
            action: The action text submitted by the current player.

        Returns:
            A tuple (done, info) where:
                done: True if the episode has concluded
                info: A ta.Info object with auxiliary details
        """
        player_id = self.state.current_player_id
        player_key = "A" if player_id == 0 else "B"
        player_color = self.state.game_state["player_symbols"][player_key]

        # 1. Log the raw player action
        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)

        # 2. Extract the boxed content
        extracted = self._extract_answer_content(action)

        # 3. Validate the action pattern
        match = self.place_pattern.match(extracted)
        if not match:
            self.state.set_invalid_move(reason="MalformedAction: The action must strictly follow '[Place: <cell_id>]' format.")
            return self.state.step()

        cell_id = f"{match.group(1)}{match.group(2)}"

        if not self._cell_valid(cell_id):
            self.state.set_invalid_move(reason=f"CellOutOfRange: {cell_id} is not a valid StarGrid coordinate.")
            return self.state.step()

        board = self.state.game_state["board"]
        if board[cell_id] is not None:
            self.state.set_invalid_move(reason=f"CellOccupied: {cell_id} is already occupied.")
            return self.state.step()

        # 4. Execute valid action: place beacon
        board[cell_id] = player_color
        self.state.game_state["board"] = board

        # Record move
        self.state.game_state["move_history"].append(
            {"player": player_key, "action": extracted}
        )

        # Increment turn index and rotate active player (unless terminal)
        self.state.game_state["turn_index"] += 1

        # 5. Check for victory
        if self._check_victory(board, player_color):
            self.state.game_state["winner"] = player_key
            winner_str = self._get_active_player_label(player_id)
            self.state.set_winner(player_id=player_id, reason=f"{winner_str} aligned three energy beacons in a line.")
            board_str = self._generate_board_str(board)
            self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
            return self.state.step()

        # 6. Check for draw (grid filled, no winner)
        if all(v is not None for v in board.values()):
            self.state.game_state["is_draw"] = True
            self.state.set_draw(reason="All cells filled with no aligned beacons—a balanced standoff.")
            board_str = self._generate_board_str(board)
            self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
            return self.state.step()

        # 7. Update board observation for next player
        board_str = self._generate_board_str(board)
        self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)

        # 8. Rotate turn
        self.state.game_state["active_player"] = "B" if player_key == "A" else "A"

        # Proceed to next step
        return self.state.step()

    # -------------------- Player Prompt Generation --------------------

    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """
        Generate a detailed prompt for each player at the start or on every turn.
        """
        role = "Navigator Alpha" if player_id == 0 else "Navigator Beta"
        color = game_state["player_symbols"]["A" if player_id == 0 else "B"]
        active_pid = game_state["active_player"]
        active_label = "Navigator Alpha" if active_pid == "A" else "Navigator Beta"

        board_repr = self._generate_board_str(game_state["board"])
        open_cells = [cell for cell, val in game_state["board"].items() if val is None]
        allowed_actions = [f"[Place: {cell}]" for cell in open_cells]

        prompt = (
            f"You are {role}, commanding the {color} energy.\n"
            "Your goal: deploy energy beacons across the StarGrid to align three of your own in a straight line before your opponent.\n\n"
            f"Current Board:\n{board_repr}\n\n"
            f"Your Color: {color}\nActive Navigator: {active_label}\n\n"
            f"Allowed Actions:\nFormat: [Place: <cell_id>]\nAvailable cells: {', '.join(open_cells) if open_cells else 'None'}\n\n"
            "Response Format:\n"
            "You may describe your reasoning, then finalize your move as:\n\n"
            "Example valid response:\n"
            "I will claim the center of the grid to control diagonals.\n"
            "\\boxed{{[Place: B2]}}\n\n"
            "Example invalid response:\n"
            "I think I'll move now.\n"
            "\\boxed{{[Move: B2]}}\n\n"
            "Put your final answer within \\boxed{{}} at the end of your response."
        )
        return prompt
```