```python import re import random from typing import Any, Dict, Optional, Tuple, List import textarena as ta class CrownOfFoolsEnv(ta.Env): """ Turn-based TextArena environment for 'Crown of Fools'. Two jesters draw, play, and discard cards to achieve the highest hand total. """ def __init__(self, max_turns: Optional[int] = 30): self.max_turns = max_turns self.draw_pattern = re.compile(r"^\[Draw\]$") self.play_pattern = re.compile(r"^\[Play:[A-Za-z0-9_]+\]$") self.discard_pattern = re.compile(r"^\[Discard:[A-Za-z0-9_]+\]$") self.pass_pattern = re.compile(r"^\[Pass\]$") self.crown_pattern = re.compile(r"^\[Crown\]$") def _extract_answer_content(self, action: str) -> str: """Extract content from \\boxed{} for action parsing.""" match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL) if match: return match.group(1).strip() return action.strip() # ---------------------- Helper Methods ----------------------- def _init_deck(self, seed: int) -> List[str]: """Initialize and shuffle deck deterministically using given seed.""" random.seed(seed) deck = [] # Numbers 1-10, each duplicated twice for i in range(1, 11): deck.append(f"Num_{i}") deck.append(f"Num_{i}") # Trick cards 1–5 for i in range(1, 6): deck.append(f"Trick_{i}") # Add the single Crown Joker deck.append("Crown_Joker") random.shuffle(deck) return deck def _calc_hand_score(self, hand: List[str]) -> Tuple[int, bool]: """Calculate hand score applying Trick card doubling and Joker value.""" nums = [] tricks = [] has_joker = False for card in hand: if card.startswith("Num_"): nums.append(int(card.split("_")[1])) elif card.startswith("Trick_"): tricks.append(int(card.split("_")[1])) elif card == "Crown_Joker": has_joker = True score = sum(nums) # Each Trick_x doubles any one card of same digit value if present for t in tricks: if t in nums: score += t # adds one more of that value (effectively doubling) if has_joker: score += 5 return score, has_joker def _determine_winner(self): """Decide winner based on hand values and Joker possession.""" A = self.state.game_state["players"]["A"] B = self.state.game_state["players"]["B"] A_score, A_joker = self._calc_hand_score(A["hand"]) B_score, B_joker = self._calc_hand_score(B["hand"]) self.state.game_state["players"]["A"]["score"] = A_score self.state.game_state["players"]["B"]["score"] = B_score self.state.game_state["players"]["A"]["has_joker"] = A_joker self.state.game_state["players"]["B"]["has_joker"] = B_joker if A_score > B_score: self.state.set_winner(player_id=0, reason="Jester Red has the higher total hand value.") self.state.game_state["winner"] = "A" elif B_score > A_score: self.state.set_winner(player_id=1, reason="Jester Blue has the higher total hand value.") self.state.game_state["winner"] = "B" else: # tie: check joker possession if A_joker and not B_joker: self.state.set_winner(player_id=0, reason="Tie on value, but Jester Red holds the Crown Joker.") self.state.game_state["winner"] = "A" elif B_joker and not A_joker: self.state.set_winner(player_id=1, reason="Tie on value, but Jester Blue holds the Crown Joker.") self.state.game_state["winner"] = "B" else: self.state.set_draw(reason="Equal hand values and no Joker advantage.") self.state.game_state["winner"] = None # --------------------------------------------------------------- def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the Crown of Fools environment. """ if num_players != 2: raise ValueError("Crown of Fools is a two-player environment.") self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) seed = seed if seed is not None else random.randint(0, 999999) deck = self._init_deck(seed) players = {"A": {"hand": [], "score": 0, "has_joker": False, "last_action": None}, "B": {"hand": [], "score": 0, "has_joker": False, "last_action": None}} # Initial draw for _ in range(3): if deck: players["A"]["hand"].append(deck.pop()) if deck: players["B"]["hand"].append(deck.pop()) game_state: Dict[str, Any] = { "phase": "active", "turn_index": 0, "current_player": "A", "deck_order": deck, "discard_pile": [], "players": players, "history": [], "seed": seed, "terminal": False, "winner": None, } role_map = {0: "Jester Red (Player A)", 1: "Jester Blue (Player B)"} self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_map) self.state.add_observation("Welcome to the royal court! The Crown of Fools challenge begins.", ta.ObservationType.GAME_MESSAGE) self.state.add_observation(f"Seed used: {seed}", ta.ObservationType.GAME_MESSAGE) return self.state def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: """Generate a contextual prompt for each jester.""" role = "Jester Red" if player_id == 0 else "Jester Blue" player_key = "A" if player_id == 0 else "B" hand = game_state["players"][player_key]["hand"] top_discard = game_state["discard_pile"][-1] if game_state["discard_pile"] else "None" return ( f"You are {role}, performing in the royal card duel 'Crown of Fools'.\n" "Your rival seeks to claim the court's laughter by building a mightier hand.\n" f"Your current hand: {', '.join(hand)}\n" f"Top of discard pile: {top_discard}\n\n" "Available actions (exact tokens required):\n" " • [Draw] – Take top card from the deck\n" " • [Play:] – Apply a card’s effect from your hand\n" " • [Discard:] – Remove an unwanted card (except Crown_Joker)\n" " • [Pass] – Skip your turn\n" " • [Crown] – Declare your confidence and end the round (after turn 5)\n\n" "Put your final answer inside \\boxed{} at the end of your response.\n\n" "Example valid response:\n" "The deck still hides potential.\n\\boxed{[Draw]}\n\n" "Example invalid response:\n" "I choose DrawCard.\n\\boxed{[DrawCard]}" ) def _validate_and_execute_action(self, p_id: str, action: str): """Check grammar and legality, then apply side effects.""" game_state = self.state.game_state players = game_state["players"] player = players[p_id] turn = game_state["turn_index"] # Grammar validation if not ( self.draw_pattern.match(action) or self.play_pattern.match(action) or self.discard_pattern.match(action) or self.pass_pattern.match(action) or self.crown_pattern.match(action) ): self.state.set_invalid_move("Unrecognized action format") return # Action semantics if action == "[Draw]": if not game_state["deck_order"]: self.state.set_invalid_move("Deck empty; cannot draw") return drawn = game_state["deck_order"].pop() player["hand"].append(drawn) elif self.pass_pattern.match(action): pass # No effect elif self.play_pattern.match(action): card_id = action.split(":", 1)[1][:-1] # remove trailing ] if card_id not in player["hand"]: self.state.set_invalid_move("Card not in hand") return # Play Trick cards apply immediately (effect abstractly for now) if card_id.startswith("Trick_"): # Could later have behavior, but for now just mark as played pass player["hand"].remove(card_id) game_state["discard_pile"].append(card_id) elif self.discard_pattern.match(action): card_id = action.split(":", 1)[1][:-1] if card_id == "Crown_Joker": self.state.set_invalid_move("Cannot discard the Crown Joker") return if card_id not in player["hand"]: self.state.set_invalid_move("Card not in hand") return player["hand"].remove(card_id) game_state["discard_pile"].append(card_id) elif action == "[Crown]": if turn < 5: self.state.set_invalid_move("Crown can only be declared after turn 5") return # Immediately end sequence self._determine_winner() game_state["terminal"] = True game_state["phase"] = "revealed" return # After legal action update info player["last_action"] = action game_state["turn_index"] += 1 game_state["history"].append({"player": p_id, "action": action}) # Recalculate score, Joker flags after every turn for key in ["A", "B"]: s, j = self._calc_hand_score(players[key]["hand"]) players[key]["score"] = s players[key]["has_joker"] = j # Terminal checkpoints if not game_state["deck_order"] or game_state["turn_index"] >= 30: self._determine_winner() game_state["terminal"] = True game_state["phase"] = "revealed" def step(self, action: str) -> Tuple[bool, ta.Info]: """Perform a single environment step for the current player.""" self.state.add_observation( message=action, observation_type=ta.ObservationType.PLAYER_ACTION, from_id=self.state.current_player_id, to_id=-1, ) extracted = self._extract_answer_content(action) p_id = "A" if self.state.current_player_id == 0 else "B" if not self.state.done: self._validate_and_execute_action(p_id, extracted) if self.state.done: self.state.game_state["terminal"] = True done, info = self.state.step() return done, info ```