import re import random from typing import Any, Dict, Optional, Tuple import textarena as ta class DuelOfSignsEnv(ta.Env): def __init__(self, max_rounds: int = 5): """Environment implementing 'Duel of Signs: A Rock–Paper–Scissors Tournament'.""" self.max_rounds = max_rounds self.play_pattern = re.compile(r'^\[Play:(Rock|Paper|Scissors)\]$') self.predict_pattern = re.compile(r'^\[Predict:(Rock|Paper|Scissors)\]$') self.concede_pattern = re.compile(r'^\[Concede\]$') self.signs = ["Rock", "Paper", "Scissors"] def _extract_answer_content(self, action: str) -> str: """Extract content from \boxed{} for validation and execution.""" match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL) if match: return match.group(1).strip() return action.strip() def reset(self, num_players: int, seed: Optional[int] = None): """ Initialize a new tournament according to Stage 1 design. """ if num_players != 2: raise ValueError("Duel of Signs requires exactly 2 players.") self.state = ta.TwoPlayerState(num_players=num_players, seed=seed) # Deterministic seed handling rng = random.Random(seed if seed is not None else 0) start_player = 0 if (seed is None or seed % 2 == 0) else 1 # Initialize game state as per schema self.state.game_state = { "tournament_name": "Grand Duels 2077", "seed": seed, "round_index": 1, "max_rounds": self.max_rounds, "turn_order": ["PlayerA", "PlayerB"], "players": { "PlayerA": { "score": 0, "last_action": None, "predicted_action": None, "round_wins": 0, }, "PlayerB": { "score": 0, "last_action": None, "predicted_action": None, "round_wins": 0, }, }, "round_history": [], "current_turn": "PlayerA" if start_player == 0 else "PlayerB", "status": "active", "winner": None, "observation_log": [], } # Reset framework state self.state.reset( game_state=self.state.game_state, player_prompt_function=self._generate_player_prompt, role_mapping={0: "PlayerA", 1: "PlayerB"}, ) # Announce tournament to all players intro_message = ( f"Welcome to {self.state.game_state['tournament_name']}!\n" f"This is a {self.state.game_state['max_rounds']}-round duel between two Signmasters.\n" "Each round: choose [Play:Rock], [Play:Paper], or [Play:Scissors], or attempt [Predict:].\n" "Win: +2 points, Draw: +1, Loss: 0, Correct Prediction: +1 bonus, Incorrect: -1.\n" "Concede anytime with [Concede]." ) self.state.add_observation(intro_message, ta.ObservationType.GAME_MESSAGE) self.state.manually_set_current_player_id(start_player) return self.state def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: """Generate prompt text specific to each player.""" role = "PlayerA" if player_id == 0 else "PlayerB" opponent = "PlayerB" if player_id == 0 else "PlayerA" player_data = game_state["players"][role] opp_data = game_state["players"][opponent] return ( f"You are {role}, a Signmaster in the grand arena of Duel of Signs.\n" f"Round {game_state['round_index']} of {game_state['max_rounds']}.\n" f"Your score: {player_data['score']} | Opponent's score: {opp_data['score']}\n" "Choose your action token correctly:\n" " [Play:Rock] | [Play:Paper] | [Play:Scissors]\n" " [Predict:Rock] | [Predict:Paper] | [Predict:Scissors]\n" " [Concede]\n" "Scoring: Win=+2, Draw=+1, Predicted correctly=+1, Incorrect prediction=−1.\n" "Put your final answer within \\boxed{} at the end of your response.\n\n" "Example valid response:\n" "I think Paper will protect me.\n" "\\boxed{[Play:Paper]}\n\n" "Example valid response with prediction:\n" "I foresee my foe choosing Scissors.\n" "\\boxed{[Predict:Scissors]}" ) def _beats(self, sign1: str, sign2: str) -> bool: """Return True if sign1 beats sign2 according to RPS rule.""" return (sign1, sign2) in [ ("Rock", "Scissors"), ("Scissors", "Paper"), ("Paper", "Rock"), ] def _resolve_round(self, game_state: Dict[str, Any]) -> None: """Resolve round outcome once both have played.""" A_action = game_state["players"]["PlayerA"]["last_action"] B_action = game_state["players"]["PlayerB"]["last_action"] if not (A_action and B_action): return # not yet ready round_result = {"round": game_state["round_index"]} # Determine each player's play sign (if prediction, it’s not a play) A_play = None B_play = None for act, role in [(A_action, "PlayerA"), (B_action, "PlayerB")]: match = self.play_pattern.match(act) if match: if role == "PlayerA": A_play = match.group(1) else: B_play = match.group(1) # Determine if either has conceded if self.concede_pattern.match(A_action): self.state.set_winner(player_id=1, reason="PlayerA conceded.") game_state["status"] = "concluded" game_state["winner"] = "PlayerB" return if self.concede_pattern.match(B_action): self.state.set_winner(player_id=0, reason="PlayerB conceded.") game_state["status"] = "concluded" game_state["winner"] = "PlayerA" return # Predict actions apply to next opponent play; evaluate them now that both plays known for pid, role, opp_play in [(0, "PlayerA", B_play), (1, "PlayerB", A_play)]: predict = game_state["players"][role]["predicted_action"] if predict: pred_match = self.predict_pattern.match(predict) if pred_match: predicted_sign = pred_match.group(1) if predicted_sign == opp_play: game_state["players"][role]["score"] += 1 self.state.add_observation( f"{role} correctly predicted {predicted_sign} (+1 bonus).", ta.ObservationType.GAME_MESSAGE, ) else: game_state["players"][role]["score"] -= 1 self.state.add_observation( f"{role} wrongly predicted {predicted_sign} (-1 penalty).", ta.ObservationType.GAME_MESSAGE, ) game_state["players"][role]["predicted_action"] = None # Determine duel result if both played a valid [Play:*] if A_play and B_play: if A_play == B_play: game_state["players"]["PlayerA"]["score"] += 1 game_state["players"]["PlayerB"]["score"] += 1 round_result["winner"] = "Draw" outcome_text = f"Round {game_state['round_index']}: Draw ({A_play} vs {B_play})." elif self._beats(A_play, B_play): game_state["players"]["PlayerA"]["score"] += 2 game_state["players"]["PlayerA"]["round_wins"] += 1 round_result["winner"] = "PlayerA" outcome_text = f"Round {game_state['round_index']}: PlayerA's {A_play} beats {B_play}." else: game_state["players"]["PlayerB"]["score"] += 2 game_state["players"]["PlayerB"]["round_wins"] += 1 round_result["winner"] = "PlayerB" outcome_text = f"Round {game_state['round_index']}: PlayerB's {B_play} beats {A_play}." self.state.add_observation(outcome_text, ta.ObservationType.GAME_MESSAGE) game_state["observation_log"].append(outcome_text) game_state["round_history"].append( { "round": game_state["round_index"], "PlayerA_action": A_action, "PlayerB_action": B_action, "winner": round_result["winner"], } ) game_state["round_index"] += 1 game_state["players"]["PlayerA"]["last_action"] = None game_state["players"]["PlayerB"]["last_action"] = None # End tournament if exceeded rounds if game_state["round_index"] > game_state["max_rounds"]: self._determine_final_winner(game_state) def _determine_final_winner(self, game_state: Dict[str, Any]): """Apply endgame scoring to determine winner.""" A_score = game_state["players"]["PlayerA"]["score"] B_score = game_state["players"]["PlayerB"]["score"] if A_score > B_score: self.state.set_winner(player_id=0, reason="Higher total score.") game_state["winner"] = "PlayerA" elif B_score > A_score: self.state.set_winner(player_id=1, reason="Higher total score.") game_state["winner"] = "PlayerB" else: # Tie-breaker: round wins A_wins = game_state["players"]["PlayerA"]["round_wins"] B_wins = game_state["players"]["PlayerB"]["round_wins"] if A_wins > B_wins: self.state.set_winner(player_id=0, reason="Tiebreaker by round wins.") game_state["winner"] = "PlayerA" elif B_wins > A_wins: self.state.set_winner(player_id=1, reason="Tiebreaker by round wins.") game_state["winner"] = "PlayerB" else: self.state.set_draw(reason="Scores and round wins drawn.") game_state["winner"] = "Draw" game_state["status"] = "concluded" def step(self, action: str) -> Tuple[bool, ta.Info]: """ Process a single player action. """ player_id = self.state.current_player_id role = "PlayerA" if player_id == 0 else "PlayerB" opp_role = "PlayerB" if player_id == 0 else "PlayerA" # Log player action text self.state.add_observation( message=action, observation_type=ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1, ) # Extract boxed token for logic token = self._extract_answer_content(action) # Validate format if not ( self.play_pattern.match(token) or self.predict_pattern.match(token) or self.concede_pattern.match(token) ): self.state.set_invalid_move(reason="Unrecognized token format.") return self.state.step() # Check if game already over if self.state.game_state["status"] != "active": self.state.set_invalid_move(reason="Game already concluded.") return self.state.step() # Fetch player state pstate = self.state.game_state["players"][role] # Check duplicate action this round if pstate["last_action"] is not None: self.state.set_invalid_move(reason="Duplicate action this round.") return self.state.step() # Record actions deterministically if self.play_pattern.match(token): pstate["last_action"] = token elif self.predict_pattern.match(token): pstate["predicted_action"] = token self.state.add_observation( f"{role} predicts their opponent will play {token[9:-1]}.", ta.ObservationType.GAME_MESSAGE, ) elif self.concede_pattern.match(token): pstate["last_action"] = token # Check if both players have acts making round resolvable self._resolve_round(self.state.game_state) # Determine next player's turn (alternating per round) if not self.state.done: current_round = self.state.game_state["round_index"] if self.state.game_state["status"] == "active": if current_round % 2 == 1: # odd => A starts next_player = ( 1 if player_id == 0 else 0 ) # alternate until both acted else: next_player = 0 if player_id == 1 else 1 self.state.manually_set_current_player_id(next_player) return self.state.step()