From bdd563b0b35ae6b352b8ffd50ccf894ffcdd23b6 Mon Sep 17 00:00:00 2001 From: Openverse Builder Date: Mon, 1 Jan 2001 00:00:00 +0000 Subject: [PATCH] Add env.py from Openverse builder --- env.py | 254 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100644 env.py diff --git a/env.py b/env.py new file mode 100644 index 0000000..4d0914e --- /dev/null +++ b/env.py @@ -0,0 +1,254 @@ +```python +import re +import random +from typing import Any, Dict, Optional, Tuple, List + +import textarena as ta + + +class CrownOfFoolsEnv(ta.Env): + """ + Turn-based TextArena environment for 'Crown of Fools'. + Two jesters draw, play, and discard cards to achieve the highest hand total. + """ + + def __init__(self, max_turns: Optional[int] = 30): + self.max_turns = max_turns + self.draw_pattern = re.compile(r"^\[Draw\]$") + self.play_pattern = re.compile(r"^\[Play:[A-Za-z0-9_]+\]$") + self.discard_pattern = re.compile(r"^\[Discard:[A-Za-z0-9_]+\]$") + self.pass_pattern = re.compile(r"^\[Pass\]$") + self.crown_pattern = re.compile(r"^\[Crown\]$") + + def _extract_answer_content(self, action: str) -> str: + """Extract content from \\boxed{} for action parsing.""" + match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL) + if match: + return match.group(1).strip() + return action.strip() + + # ---------------------- Helper Methods ----------------------- + def _init_deck(self, seed: int) -> List[str]: + """Initialize and shuffle deck deterministically using given seed.""" + random.seed(seed) + deck = [] + # Numbers 1-10, each duplicated twice + for i in range(1, 11): + deck.append(f"Num_{i}") + deck.append(f"Num_{i}") + # Trick cards 1–5 + for i in range(1, 6): + deck.append(f"Trick_{i}") + # Add the single Crown Joker + deck.append("Crown_Joker") + random.shuffle(deck) + return deck + + def _calc_hand_score(self, hand: List[str]) -> Tuple[int, bool]: + """Calculate hand score applying Trick card doubling and Joker value.""" + nums = [] + tricks = [] + has_joker = False + for card in hand: + if card.startswith("Num_"): + nums.append(int(card.split("_")[1])) + elif card.startswith("Trick_"): + tricks.append(int(card.split("_")[1])) + elif card == "Crown_Joker": + has_joker = True + score = sum(nums) + # Each Trick_x doubles any one card of same digit value if present + for t in tricks: + if t in nums: + score += t # adds one more of that value (effectively doubling) + if has_joker: + score += 5 + return score, has_joker + + def _determine_winner(self): + """Decide winner based on hand values and Joker possession.""" + A = self.state.game_state["players"]["A"] + B = self.state.game_state["players"]["B"] + A_score, A_joker = self._calc_hand_score(A["hand"]) + B_score, B_joker = self._calc_hand_score(B["hand"]) + self.state.game_state["players"]["A"]["score"] = A_score + self.state.game_state["players"]["B"]["score"] = B_score + self.state.game_state["players"]["A"]["has_joker"] = A_joker + self.state.game_state["players"]["B"]["has_joker"] = B_joker + + if A_score > B_score: + self.state.set_winner(player_id=0, reason="Jester Red has the higher total hand value.") + self.state.game_state["winner"] = "A" + elif B_score > A_score: + self.state.set_winner(player_id=1, reason="Jester Blue has the higher total hand value.") + self.state.game_state["winner"] = "B" + else: + # tie: check joker possession + if A_joker and not B_joker: + self.state.set_winner(player_id=0, reason="Tie on value, but Jester Red holds the Crown Joker.") + self.state.game_state["winner"] = "A" + elif B_joker and not A_joker: + self.state.set_winner(player_id=1, reason="Tie on value, but Jester Blue holds the Crown Joker.") + self.state.game_state["winner"] = "B" + else: + self.state.set_draw(reason="Equal hand values and no Joker advantage.") + self.state.game_state["winner"] = None + + # --------------------------------------------------------------- + + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the Crown of Fools environment. + """ + if num_players != 2: + raise ValueError("Crown of Fools is a two-player environment.") + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + + seed = seed if seed is not None else random.randint(0, 999999) + deck = self._init_deck(seed) + players = {"A": {"hand": [], "score": 0, "has_joker": False, "last_action": None}, + "B": {"hand": [], "score": 0, "has_joker": False, "last_action": None}} + # Initial draw + for _ in range(3): + if deck: players["A"]["hand"].append(deck.pop()) + if deck: players["B"]["hand"].append(deck.pop()) + + game_state: Dict[str, Any] = { + "phase": "active", + "turn_index": 0, + "current_player": "A", + "deck_order": deck, + "discard_pile": [], + "players": players, + "history": [], + "seed": seed, + "terminal": False, + "winner": None, + } + + role_map = {0: "Jester Red (Player A)", 1: "Jester Blue (Player B)"} + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_map) + self.state.add_observation("Welcome to the royal court! The Crown of Fools challenge begins.", ta.ObservationType.GAME_MESSAGE) + self.state.add_observation(f"Seed used: {seed}", ta.ObservationType.GAME_MESSAGE) + return self.state + + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + """Generate a contextual prompt for each jester.""" + role = "Jester Red" if player_id == 0 else "Jester Blue" + player_key = "A" if player_id == 0 else "B" + hand = game_state["players"][player_key]["hand"] + top_discard = game_state["discard_pile"][-1] if game_state["discard_pile"] else "None" + return ( + f"You are {role}, performing in the royal card duel 'Crown of Fools'.\n" + "Your rival seeks to claim the court's laughter by building a mightier hand.\n" + f"Your current hand: {', '.join(hand)}\n" + f"Top of discard pile: {top_discard}\n\n" + "Available actions (exact tokens required):\n" + " • [Draw] – Take top card from the deck\n" + " • [Play:] – Apply a card’s effect from your hand\n" + " • [Discard:] – Remove an unwanted card (except Crown_Joker)\n" + " • [Pass] – Skip your turn\n" + " • [Crown] – Declare your confidence and end the round (after turn 5)\n\n" + "Put your final answer inside \\boxed{} at the end of your response.\n\n" + "Example valid response:\n" + "The deck still hides potential.\n\\boxed{[Draw]}\n\n" + "Example invalid response:\n" + "I choose DrawCard.\n\\boxed{[DrawCard]}" + ) + + def _validate_and_execute_action(self, p_id: str, action: str): + """Check grammar and legality, then apply side effects.""" + game_state = self.state.game_state + players = game_state["players"] + player = players[p_id] + turn = game_state["turn_index"] + + # Grammar validation + if not ( + self.draw_pattern.match(action) + or self.play_pattern.match(action) + or self.discard_pattern.match(action) + or self.pass_pattern.match(action) + or self.crown_pattern.match(action) + ): + self.state.set_invalid_move("Unrecognized action format") + return + + # Action semantics + if action == "[Draw]": + if not game_state["deck_order"]: + self.state.set_invalid_move("Deck empty; cannot draw") + return + drawn = game_state["deck_order"].pop() + player["hand"].append(drawn) + + elif self.pass_pattern.match(action): + pass # No effect + + elif self.play_pattern.match(action): + card_id = action.split(":", 1)[1][:-1] # remove trailing ] + if card_id not in player["hand"]: + self.state.set_invalid_move("Card not in hand") + return + # Play Trick cards apply immediately (effect abstractly for now) + if card_id.startswith("Trick_"): + # Could later have behavior, but for now just mark as played + pass + player["hand"].remove(card_id) + game_state["discard_pile"].append(card_id) + + elif self.discard_pattern.match(action): + card_id = action.split(":", 1)[1][:-1] + if card_id == "Crown_Joker": + self.state.set_invalid_move("Cannot discard the Crown Joker") + return + if card_id not in player["hand"]: + self.state.set_invalid_move("Card not in hand") + return + player["hand"].remove(card_id) + game_state["discard_pile"].append(card_id) + + elif action == "[Crown]": + if turn < 5: + self.state.set_invalid_move("Crown can only be declared after turn 5") + return + # Immediately end sequence + self._determine_winner() + game_state["terminal"] = True + game_state["phase"] = "revealed" + return + + # After legal action update info + player["last_action"] = action + game_state["turn_index"] += 1 + game_state["history"].append({"player": p_id, "action": action}) + # Recalculate score, Joker flags after every turn + for key in ["A", "B"]: + s, j = self._calc_hand_score(players[key]["hand"]) + players[key]["score"] = s + players[key]["has_joker"] = j + + # Terminal checkpoints + if not game_state["deck_order"] or game_state["turn_index"] >= 30: + self._determine_winner() + game_state["terminal"] = True + game_state["phase"] = "revealed" + + def step(self, action: str) -> Tuple[bool, ta.Info]: + """Perform a single environment step for the current player.""" + self.state.add_observation( + message=action, + observation_type=ta.ObservationType.PLAYER_ACTION, + from_id=self.state.current_player_id, + to_id=-1, + ) + + extracted = self._extract_answer_content(action) + p_id = "A" if self.state.current_player_id == 0 else "B" + if not self.state.done: + self._validate_and_execute_action(p_id, extracted) + if self.state.done: + self.state.game_state["terminal"] = True + done, info = self.state.step() + return done, info +``` \ No newline at end of file