env.py

import re
import random
from typing import Any, Dict, Optional, Tuple

import textarena as ta


class DuelOfSignsEnv(ta.Env):
    def __init__(self, max_rounds: int = 5):
        """Environment implementing 'Duel of Signs: A Rock–Paper–Scissors Tournament'."""
        self.max_rounds = max_rounds
        self.play_pattern = re.compile(r'^\[Play:(Rock|Paper|Scissors)\]$')
        self.predict_pattern = re.compile(r'^\[Predict:(Rock|Paper|Scissors)\]$')
        self.concede_pattern = re.compile(r'^\[Concede\]$')
        self.signs = ["Rock", "Paper", "Scissors"]

    def _extract_answer_content(self, action: str) -> str:
        """Extract content from \boxed{} for validation and execution."""
        match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
        if match:
            return match.group(1).strip()
        return action.strip()

    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Initialize a new tournament according to Stage 1 design.
        """
        if num_players != 2:
            raise ValueError("Duel of Signs requires exactly 2 players.")
        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)

        # Deterministic seed handling
        rng = random.Random(seed if seed is not None else 0)
        start_player = 0 if (seed is None or seed % 2 == 0) else 1

        # Initialize game state as per schema
        self.state.game_state = {
            "tournament_name": "Grand Duels 2077",
            "seed": seed,
            "round_index": 1,
            "max_rounds": self.max_rounds,
            "turn_order": ["PlayerA", "PlayerB"],
            "players": {
                "PlayerA": {
                    "score": 0,
                    "last_action": None,
                    "predicted_action": None,
                    "round_wins": 0,
                },
                "PlayerB": {
                    "score": 0,
                    "last_action": None,
                    "predicted_action": None,
                    "round_wins": 0,
                },
            },
            "round_history": [],
            "current_turn": "PlayerA" if start_player == 0 else "PlayerB",
            "status": "active",
            "winner": None,
            "observation_log": [],
        }

        # Reset framework state
        self.state.reset(
            game_state=self.state.game_state,
            player_prompt_function=self._generate_player_prompt,
            role_mapping={0: "PlayerA", 1: "PlayerB"},
        )

        # Announce tournament to all players
        intro_message = (
            f"Welcome to {self.state.game_state['tournament_name']}!\n"
            f"This is a {self.state.game_state['max_rounds']}-round duel between two Signmasters.\n"
            "Each round: choose [Play:Rock], [Play:Paper], or [Play:Scissors], or attempt [Predict:<Sign>].\n"
            "Win: +2 points, Draw: +1, Loss: 0, Correct Prediction: +1 bonus, Incorrect: -1.\n"
            "Concede anytime with [Concede]."
        )
        self.state.add_observation(intro_message, ta.ObservationType.GAME_MESSAGE)
        self.state.manually_set_current_player_id(start_player)

        return self.state

    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """Generate prompt text specific to each player."""
        role = "PlayerA" if player_id == 0 else "PlayerB"
        opponent = "PlayerB" if player_id == 0 else "PlayerA"
        player_data = game_state["players"][role]
        opp_data = game_state["players"][opponent]
        return (
            f"You are {role}, a Signmaster in the grand arena of Duel of Signs.\n"
            f"Round {game_state['round_index']} of {game_state['max_rounds']}.\n"
            f"Your score: {player_data['score']} | Opponent's score: {opp_data['score']}\n"
            "Choose your action token correctly:\n"
            "  [Play:Rock] | [Play:Paper] | [Play:Scissors]\n"
            "  [Predict:Rock] | [Predict:Paper] | [Predict:Scissors]\n"
            "  [Concede]\n"
            "Scoring: Win=+2, Draw=+1, Predicted correctly=+1, Incorrect prediction=−1.\n"
            "Put your final answer within \\boxed{} at the end of your response.\n\n"
            "Example valid response:\n"
            "I think Paper will protect me.\n"
            "\\boxed{[Play:Paper]}\n\n"
            "Example valid response with prediction:\n"
            "I foresee my foe choosing Scissors.\n"
            "\\boxed{[Predict:Scissors]}"
        )

    def _beats(self, sign1: str, sign2: str) -> bool:
        """Return True if sign1 beats sign2 according to RPS rule."""
        return (sign1, sign2) in [
            ("Rock", "Scissors"),
            ("Scissors", "Paper"),
            ("Paper", "Rock"),
        ]

    def _resolve_round(self, game_state: Dict[str, Any]) -> None:
        """Resolve round outcome once both have played."""
        A_action = game_state["players"]["PlayerA"]["last_action"]
        B_action = game_state["players"]["PlayerB"]["last_action"]

        if not (A_action and B_action):
            return  # not yet ready

        round_result = {"round": game_state["round_index"]}

        # Determine each player's play sign (if prediction, it’s not a play)
        A_play = None
        B_play = None
        for act, role in [(A_action, "PlayerA"), (B_action, "PlayerB")]:
            match = self.play_pattern.match(act)
            if match:
                if role == "PlayerA":
                    A_play = match.group(1)
                else:
                    B_play = match.group(1)

        # Determine if either has conceded
        if self.concede_pattern.match(A_action):
            self.state.set_winner(player_id=1, reason="PlayerA conceded.")
            game_state["status"] = "concluded"
            game_state["winner"] = "PlayerB"
            return
        if self.concede_pattern.match(B_action):
            self.state.set_winner(player_id=0, reason="PlayerB conceded.")
            game_state["status"] = "concluded"
            game_state["winner"] = "PlayerA"
            return

        # Predict actions apply to next opponent play; evaluate them now that both plays known
        for pid, role, opp_play in [(0, "PlayerA", B_play), (1, "PlayerB", A_play)]:
            predict = game_state["players"][role]["predicted_action"]
            if predict:
                pred_match = self.predict_pattern.match(predict)
                if pred_match:
                    predicted_sign = pred_match.group(1)
                    if predicted_sign == opp_play:
                        game_state["players"][role]["score"] += 1
                        self.state.add_observation(
                            f"{role} correctly predicted {predicted_sign} (+1 bonus).",
                            ta.ObservationType.GAME_MESSAGE,
                        )
                    else:
                        game_state["players"][role]["score"] -= 1
                        self.state.add_observation(
                            f"{role} wrongly predicted {predicted_sign} (-1 penalty).",
                            ta.ObservationType.GAME_MESSAGE,
                        )
                game_state["players"][role]["predicted_action"] = None

        # Determine duel result if both played a valid [Play:*]
        if A_play and B_play:
            if A_play == B_play:
                game_state["players"]["PlayerA"]["score"] += 1
                game_state["players"]["PlayerB"]["score"] += 1
                round_result["winner"] = "Draw"
                outcome_text = f"Round {game_state['round_index']}: Draw ({A_play} vs {B_play})."
            elif self._beats(A_play, B_play):
                game_state["players"]["PlayerA"]["score"] += 2
                game_state["players"]["PlayerA"]["round_wins"] += 1
                round_result["winner"] = "PlayerA"
                outcome_text = f"Round {game_state['round_index']}: PlayerA's {A_play} beats {B_play}."
            else:
                game_state["players"]["PlayerB"]["score"] += 2
                game_state["players"]["PlayerB"]["round_wins"] += 1
                round_result["winner"] = "PlayerB"
                outcome_text = f"Round {game_state['round_index']}: PlayerB's {B_play} beats {A_play}."

            self.state.add_observation(outcome_text, ta.ObservationType.GAME_MESSAGE)
            game_state["observation_log"].append(outcome_text)
            game_state["round_history"].append(
                {
                    "round": game_state["round_index"],
                    "PlayerA_action": A_action,
                    "PlayerB_action": B_action,
                    "winner": round_result["winner"],
                }
            )
            game_state["round_index"] += 1
            game_state["players"]["PlayerA"]["last_action"] = None
            game_state["players"]["PlayerB"]["last_action"] = None

            # End tournament if exceeded rounds
            if game_state["round_index"] > game_state["max_rounds"]:
                self._determine_final_winner(game_state)

    def _determine_final_winner(self, game_state: Dict[str, Any]):
        """Apply endgame scoring to determine winner."""
        A_score = game_state["players"]["PlayerA"]["score"]
        B_score = game_state["players"]["PlayerB"]["score"]
        if A_score > B_score:
            self.state.set_winner(player_id=0, reason="Higher total score.")
            game_state["winner"] = "PlayerA"
        elif B_score > A_score:
            self.state.set_winner(player_id=1, reason="Higher total score.")
            game_state["winner"] = "PlayerB"
        else:
            # Tie-breaker: round wins
            A_wins = game_state["players"]["PlayerA"]["round_wins"]
            B_wins = game_state["players"]["PlayerB"]["round_wins"]
            if A_wins > B_wins:
                self.state.set_winner(player_id=0, reason="Tiebreaker by round wins.")
                game_state["winner"] = "PlayerA"
            elif B_wins > A_wins:
                self.state.set_winner(player_id=1, reason="Tiebreaker by round wins.")
                game_state["winner"] = "PlayerB"
            else:
                self.state.set_draw(reason="Scores and round wins drawn.")
                game_state["winner"] = "Draw"
        game_state["status"] = "concluded"

    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Process a single player action.
        """
        player_id = self.state.current_player_id
        role = "PlayerA" if player_id == 0 else "PlayerB"
        opp_role = "PlayerB" if player_id == 0 else "PlayerA"

        # Log player action text
        self.state.add_observation(
            message=action,
            observation_type=ta.ObservationType.PLAYER_ACTION,
            from_id=player_id,
            to_id=-1,
        )

        # Extract boxed token for logic
        token = self._extract_answer_content(action)

        # Validate format
        if not (
            self.play_pattern.match(token)
            or self.predict_pattern.match(token)
            or self.concede_pattern.match(token)
        ):
            self.state.set_invalid_move(reason="Unrecognized token format.")
            return self.state.step()

        # Check if game already over
        if self.state.game_state["status"] != "active":
            self.state.set_invalid_move(reason="Game already concluded.")
            return self.state.step()

        # Fetch player state
        pstate = self.state.game_state["players"][role]

        # Check duplicate action this round
        if pstate["last_action"] is not None:
            self.state.set_invalid_move(reason="Duplicate action this round.")
            return self.state.step()

        # Record actions deterministically
        if self.play_pattern.match(token):
            pstate["last_action"] = token
        elif self.predict_pattern.match(token):
            pstate["predicted_action"] = token
            self.state.add_observation(
                f"{role} predicts their opponent will play {token[9:-1]}.",
                ta.ObservationType.GAME_MESSAGE,
            )
        elif self.concede_pattern.match(token):
            pstate["last_action"] = token

        # Check if both players have acts making round resolvable
        self._resolve_round(self.state.game_state)

        # Determine next player's turn (alternating per round)
        if not self.state.done:
            current_round = self.state.game_state["round_index"]
            if self.state.game_state["status"] == "active":
                if current_round % 2 == 1:
                    # odd => A starts
                    next_player = (
                        1 if player_id == 0 else 0
                    )  # alternate until both acted
                else:
                    next_player = 0 if player_id == 1 else 1
                self.state.manually_set_current_player_id(next_player)

        return self.state.step()