dummy-test-kw-v2/env.py

```python
import re, random
from typing import Any, Dict, Optional, Tuple
import textarena as ta


class ElementalChampionsEnv(ta.Env):
    """
    Implementation of 'Elemental Champions: The Tournament of Triads'
    as defined in the Stage 1 design document.
    """

    def __init__(self):
        # Game constants
        self.valid_elements = ["Flame", "Tide", "Gale"]
        self.pattern = re.compile(r"^\[Channel:\s*(Flame|Tide|Gale)\]$")
        self.max_rounds = 5
        self.score_to_win = 3
        self.dominance = {
            "Flame": "Gale",  # Flame over Gale
            "Gale": "Tide",   # Gale over Tide
            "Tide": "Flame",  # Tide over Flame
        }
        self.state = None

    def _extract_answer_content(self, action: str) -> str:
        """Extract content from \\boxed{} for parsing."""
        match = re.search(r'\\boxed\{\{([^}]*)\}\}', action, re.DOTALL)
        if match:
            return match.group(1).strip()
        # Fallback single bracket variant
        match2 = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
        if match2:
            return match2.group(1).strip()
        return action.strip()

    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.

        Args:
            num_players: Number of players (must be 2)
            seed: Optional seed for deterministic behavior
        """
        if num_players != 2:
            raise ValueError("Elemental Champions is strictly a 2-player game.")

        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_rounds)

        game_state: Dict[str, Any] = {
            "seed": seed or random.randint(1, 9999999),
            "current_round": 0,
            "max_rounds": self.max_rounds,
            "score_to_win": self.score_to_win,
            "duelist_A": {
                "name": "duelist_A",
                "essence_points": 0,
                "last_action": None
            },
            "duelist_B": {
                "name": "duelist_B",
                "essence_points": 0,
                "last_action": None
            },
            "transcript": [],
            "winner": None,
            "is_terminal": False,
            "invalid_reason": None
        }

        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)

        welcome_message = (
            "Welcome to the Tournament of Triads! First to 3 Essence Points wins.\n"
            "Choose your elemental channel each round: Flame, Tide, or Gale."
        )
        self.state.add_observation(
            welcome_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1
        )

        # Prepare round buffer for simultaneous actions
        self._round_actions = {0: None, 1: None}

        return self.state

    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """Generates per-player prompts consistent with Stage 1 document."""
        opponent_id = 1 - player_id
        player_key = "duelist_A" if player_id == 0 else "duelist_B"
        opponent_key = "duelist_B" if player_id == 0 else "duelist_A"

        info_section = (
            f"Current round: {game_state['current_round']} / {game_state['max_rounds']}\n"
            f"Your Essence Points: {game_state[player_key]['essence_points']}\n"
            f"Opponent Essence Points: {game_state[opponent_key]['essence_points']}\n"
        )
        if game_state[player_key]["last_action"]:
            info_section += f"Your last channel: {game_state[player_key]['last_action']}\n"
        if game_state[opponent_key]["last_action"]:
            info_section += f"Opponent's last channel was: {game_state[opponent_key]['last_action']}\n"

        rules_section = (
            "You are a mystical duelist in the Tournament of Triads, channeling elemental forces "
            "of Flame, Tide, and Gale. Each round, select one element to channel.\n\n"
            "Rules of Elemental Dominance:\n"
            " - Flame defeats Gale\n"
            " - Gale defeats Tide\n"
            " - Tide defeats Flame\n\n"
            "Use the following exact tokens (placed inside \\boxed{{}}):\n"
            "[Channel: Flame]\n[Channel: Tide]\n[Channel: Gale]\n\n"
            "Put your final answer within \\boxed{{}} at the end of your response.\n\n"
            "Example valid response:\n"
            "Flame is powerful this turn; I trust its strength.\n"
            "\\boxed{{[Channel: Flame]}}\n\n"
            "Example invalid response:\n"
            "I summon Fire!\n"
            "\\boxed{{[Cast: Fire]}}\n(reason: invalid token or element)\n"
        )

        transcript_section = ""
        if game_state["transcript"]:
            transcript_lines = []
            for t in game_state["transcript"]:
                transcript_lines.append(
                    f"Round {t['round']}: Duelist A={t['A']} Duelist B={t['B']} → {t['outcome']}"
                )
            transcript_section = "Battle Log:\n" + "\n".join(transcript_lines) + "\n\n"

        return info_section + transcript_section + rules_section

    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.
        Handles simultaneous element selection, outcome computation,
        scoring, and termination.
        """
        player_id = self.state.current_player_id
        player_key = "duelist_A" if player_id == 0 else "duelist_B"

        self.state.add_observation(
            action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1
        )
        action_token = self._extract_answer_content(action)

        # Validate format
        if not self.pattern.match(action_token):
            self.state.set_invalid_move(reason="Malformed or unsupported action format.")
            self.state.game_state["invalid_reason"] = "Malformed or unsupported action format."
            return self.state.step()

        # Store move
        game_state = self.state.game_state
        game_state[player_key]["last_action"] = action_token
        self._round_actions[player_id] = action_token

        # Wait until both actions received
        if None in self._round_actions.values():
            # Not both actions received yet, just advance turn but not round
            return self.state.step()

        # Both actions are in, resolve the round
        a_action = self._round_actions[0]
        b_action = self._round_actions[1]
        element_A = re.search(r"(Flame|Tide|Gale)", a_action).group(1)
        element_B = re.search(r"(Flame|Tide|Gale)", b_action).group(1)

        # Determine winner for the round
        outcome_text = "Draw"
        if element_A == element_B:
            outcome_text = "Draw"
        elif self.dominance[element_A] == element_B:
            outcome_text = "A wins"
            game_state["duelist_A"]["essence_points"] += 1
        elif self.dominance[element_B] == element_A:
            outcome_text = "B wins"
            game_state["duelist_B"]["essence_points"] += 1

        # Record transcript
        game_state["current_round"] += 1
        game_state["transcript"].append(
            {
                "round": game_state["current_round"],
                "A": a_action,
                "B": b_action,
                "outcome": outcome_text,
            }
        )

        # Check for tournament winner
        a_points = game_state["duelist_A"]["essence_points"]
        b_points = game_state["duelist_B"]["essence_points"]

        if a_points >= self.score_to_win:
            game_state["winner"] = "duelist_A"
            game_state["is_terminal"] = True
            self.state.set_winner(player_id=0, reason="Duelist A reached 3 Essence Points.")

        elif b_points >= self.score_to_win:
            game_state["winner"] = "duelist_B"
            game_state["is_terminal"] = True
            self.state.set_winner(player_id=1, reason="Duelist B reached 3 Essence Points.")

        elif game_state["current_round"] >= game_state["max_rounds"]:
            # Check for draw or winner by points
            if a_points > b_points:
                game_state["winner"] = "duelist_A"
                game_state["is_terminal"] = True
                self.state.set_winner(player_id=0, reason="Duelist A has higher Essence Points.")
            elif b_points > a_points:
                game_state["winner"] = "duelist_B"
                game_state["is_terminal"] = True
                self.state.set_winner(player_id=1, reason="Duelist B has higher Essence Points.")
            else:
                game_state["winner"] = "Draw"
                game_state["is_terminal"] = True
                self.state.set_draw(reason="Both duelists have equal points after final round.")

        # Reset simultaneous actions buffer for next round
        self._round_actions = {0: None, 1: None}

        # Proceed to next turn
        return self.state.step()
```