Files

254 lines
11 KiB
Python
Raw Permalink Normal View History

2001-01-01 00:00:00 +00:00
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class CrownOfFoolsEnv(ta.Env):
"""
Turn-based TextArena environment for 'Crown of Fools'.
Two jesters draw, play, and discard cards to achieve the highest hand total.
"""
def __init__(self, max_turns: Optional[int] = 30):
self.max_turns = max_turns
self.draw_pattern = re.compile(r"^\[Draw\]$")
self.play_pattern = re.compile(r"^\[Play:[A-Za-z0-9_]+\]$")
self.discard_pattern = re.compile(r"^\[Discard:[A-Za-z0-9_]+\]$")
self.pass_pattern = re.compile(r"^\[Pass\]$")
self.crown_pattern = re.compile(r"^\[Crown\]$")
def _extract_answer_content(self, action: str) -> str:
"""Extract content from \\boxed{} for action parsing."""
match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
# ---------------------- Helper Methods -----------------------
def _init_deck(self, seed: int) -> List[str]:
"""Initialize and shuffle deck deterministically using given seed."""
random.seed(seed)
deck = []
# Numbers 1-10, each duplicated twice
for i in range(1, 11):
deck.append(f"Num_{i}")
deck.append(f"Num_{i}")
# Trick cards 15
for i in range(1, 6):
deck.append(f"Trick_{i}")
# Add the single Crown Joker
deck.append("Crown_Joker")
random.shuffle(deck)
return deck
def _calc_hand_score(self, hand: List[str]) -> Tuple[int, bool]:
"""Calculate hand score applying Trick card doubling and Joker value."""
nums = []
tricks = []
has_joker = False
for card in hand:
if card.startswith("Num_"):
nums.append(int(card.split("_")[1]))
elif card.startswith("Trick_"):
tricks.append(int(card.split("_")[1]))
elif card == "Crown_Joker":
has_joker = True
score = sum(nums)
# Each Trick_x doubles any one card of same digit value if present
for t in tricks:
if t in nums:
score += t # adds one more of that value (effectively doubling)
if has_joker:
score += 5
return score, has_joker
def _determine_winner(self):
"""Decide winner based on hand values and Joker possession."""
A = self.state.game_state["players"]["A"]
B = self.state.game_state["players"]["B"]
A_score, A_joker = self._calc_hand_score(A["hand"])
B_score, B_joker = self._calc_hand_score(B["hand"])
self.state.game_state["players"]["A"]["score"] = A_score
self.state.game_state["players"]["B"]["score"] = B_score
self.state.game_state["players"]["A"]["has_joker"] = A_joker
self.state.game_state["players"]["B"]["has_joker"] = B_joker
if A_score > B_score:
self.state.set_winner(player_id=0, reason="Jester Red has the higher total hand value.")
self.state.game_state["winner"] = "A"
elif B_score > A_score:
self.state.set_winner(player_id=1, reason="Jester Blue has the higher total hand value.")
self.state.game_state["winner"] = "B"
else:
# tie: check joker possession
if A_joker and not B_joker:
self.state.set_winner(player_id=0, reason="Tie on value, but Jester Red holds the Crown Joker.")
self.state.game_state["winner"] = "A"
elif B_joker and not A_joker:
self.state.set_winner(player_id=1, reason="Tie on value, but Jester Blue holds the Crown Joker.")
self.state.game_state["winner"] = "B"
else:
self.state.set_draw(reason="Equal hand values and no Joker advantage.")
self.state.game_state["winner"] = None
# ---------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the Crown of Fools environment.
"""
if num_players != 2:
raise ValueError("Crown of Fools is a two-player environment.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
seed = seed if seed is not None else random.randint(0, 999999)
deck = self._init_deck(seed)
players = {"A": {"hand": [], "score": 0, "has_joker": False, "last_action": None},
"B": {"hand": [], "score": 0, "has_joker": False, "last_action": None}}
# Initial draw
for _ in range(3):
if deck: players["A"]["hand"].append(deck.pop())
if deck: players["B"]["hand"].append(deck.pop())
game_state: Dict[str, Any] = {
"phase": "active",
"turn_index": 0,
"current_player": "A",
"deck_order": deck,
"discard_pile": [],
"players": players,
"history": [],
"seed": seed,
"terminal": False,
"winner": None,
}
role_map = {0: "Jester Red (Player A)", 1: "Jester Blue (Player B)"}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_map)
self.state.add_observation("Welcome to the royal court! The Crown of Fools challenge begins.", ta.ObservationType.GAME_MESSAGE)
self.state.add_observation(f"Seed used: {seed}", ta.ObservationType.GAME_MESSAGE)
return self.state
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""Generate a contextual prompt for each jester."""
role = "Jester Red" if player_id == 0 else "Jester Blue"
player_key = "A" if player_id == 0 else "B"
hand = game_state["players"][player_key]["hand"]
top_discard = game_state["discard_pile"][-1] if game_state["discard_pile"] else "None"
return (
f"You are {role}, performing in the royal card duel 'Crown of Fools'.\n"
"Your rival seeks to claim the court's laughter by building a mightier hand.\n"
f"Your current hand: {', '.join(hand)}\n"
f"Top of discard pile: {top_discard}\n\n"
"Available actions (exact tokens required):\n"
" • [Draw] Take top card from the deck\n"
" • [Play:<card_id>] Apply a cards effect from your hand\n"
" • [Discard:<card_id>] Remove an unwanted card (except Crown_Joker)\n"
" • [Pass] Skip your turn\n"
" • [Crown] Declare your confidence and end the round (after turn 5)\n\n"
"Put your final answer inside \\boxed{} at the end of your response.\n\n"
"Example valid response:\n"
"The deck still hides potential.\n\\boxed{[Draw]}\n\n"
"Example invalid response:\n"
"I choose DrawCard.\n\\boxed{[DrawCard]}"
)
def _validate_and_execute_action(self, p_id: str, action: str):
"""Check grammar and legality, then apply side effects."""
game_state = self.state.game_state
players = game_state["players"]
player = players[p_id]
turn = game_state["turn_index"]
# Grammar validation
if not (
self.draw_pattern.match(action)
or self.play_pattern.match(action)
or self.discard_pattern.match(action)
or self.pass_pattern.match(action)
or self.crown_pattern.match(action)
):
self.state.set_invalid_move("Unrecognized action format")
return
# Action semantics
if action == "[Draw]":
if not game_state["deck_order"]:
self.state.set_invalid_move("Deck empty; cannot draw")
return
drawn = game_state["deck_order"].pop()
player["hand"].append(drawn)
elif self.pass_pattern.match(action):
pass # No effect
elif self.play_pattern.match(action):
card_id = action.split(":", 1)[1][:-1] # remove trailing ]
if card_id not in player["hand"]:
self.state.set_invalid_move("Card not in hand")
return
# Play Trick cards apply immediately (effect abstractly for now)
if card_id.startswith("Trick_"):
# Could later have behavior, but for now just mark as played
pass
player["hand"].remove(card_id)
game_state["discard_pile"].append(card_id)
elif self.discard_pattern.match(action):
card_id = action.split(":", 1)[1][:-1]
if card_id == "Crown_Joker":
self.state.set_invalid_move("Cannot discard the Crown Joker")
return
if card_id not in player["hand"]:
self.state.set_invalid_move("Card not in hand")
return
player["hand"].remove(card_id)
game_state["discard_pile"].append(card_id)
elif action == "[Crown]":
if turn < 5:
self.state.set_invalid_move("Crown can only be declared after turn 5")
return
# Immediately end sequence
self._determine_winner()
game_state["terminal"] = True
game_state["phase"] = "revealed"
return
# After legal action update info
player["last_action"] = action
game_state["turn_index"] += 1
game_state["history"].append({"player": p_id, "action": action})
# Recalculate score, Joker flags after every turn
for key in ["A", "B"]:
s, j = self._calc_hand_score(players[key]["hand"])
players[key]["score"] = s
players[key]["has_joker"] = j
# Terminal checkpoints
if not game_state["deck_order"] or game_state["turn_index"] >= 30:
self._determine_winner()
game_state["terminal"] = True
game_state["phase"] = "revealed"
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""Perform a single environment step for the current player."""
self.state.add_observation(
message=action,
observation_type=ta.ObservationType.PLAYER_ACTION,
from_id=self.state.current_player_id,
to_id=-1,
)
extracted = self._extract_answer_content(action)
p_id = "A" if self.state.current_player_id == 0 else "B"
if not self.state.done:
self._validate_and_execute_action(p_id, extracted)
if self.state.done:
self.state.game_state["terminal"] = True
done, info = self.state.step()
return done, info
```