254 lines
11 KiB
Python
254 lines
11 KiB
Python
```python
|
||
import re
|
||
import random
|
||
from typing import Any, Dict, Optional, Tuple, List
|
||
|
||
import textarena as ta
|
||
|
||
|
||
class CrownOfFoolsEnv(ta.Env):
|
||
"""
|
||
Turn-based TextArena environment for 'Crown of Fools'.
|
||
Two jesters draw, play, and discard cards to achieve the highest hand total.
|
||
"""
|
||
|
||
def __init__(self, max_turns: Optional[int] = 30):
|
||
self.max_turns = max_turns
|
||
self.draw_pattern = re.compile(r"^\[Draw\]$")
|
||
self.play_pattern = re.compile(r"^\[Play:[A-Za-z0-9_]+\]$")
|
||
self.discard_pattern = re.compile(r"^\[Discard:[A-Za-z0-9_]+\]$")
|
||
self.pass_pattern = re.compile(r"^\[Pass\]$")
|
||
self.crown_pattern = re.compile(r"^\[Crown\]$")
|
||
|
||
def _extract_answer_content(self, action: str) -> str:
|
||
"""Extract content from \\boxed{} for action parsing."""
|
||
match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
|
||
if match:
|
||
return match.group(1).strip()
|
||
return action.strip()
|
||
|
||
# ---------------------- Helper Methods -----------------------
|
||
def _init_deck(self, seed: int) -> List[str]:
|
||
"""Initialize and shuffle deck deterministically using given seed."""
|
||
random.seed(seed)
|
||
deck = []
|
||
# Numbers 1-10, each duplicated twice
|
||
for i in range(1, 11):
|
||
deck.append(f"Num_{i}")
|
||
deck.append(f"Num_{i}")
|
||
# Trick cards 1–5
|
||
for i in range(1, 6):
|
||
deck.append(f"Trick_{i}")
|
||
# Add the single Crown Joker
|
||
deck.append("Crown_Joker")
|
||
random.shuffle(deck)
|
||
return deck
|
||
|
||
def _calc_hand_score(self, hand: List[str]) -> Tuple[int, bool]:
|
||
"""Calculate hand score applying Trick card doubling and Joker value."""
|
||
nums = []
|
||
tricks = []
|
||
has_joker = False
|
||
for card in hand:
|
||
if card.startswith("Num_"):
|
||
nums.append(int(card.split("_")[1]))
|
||
elif card.startswith("Trick_"):
|
||
tricks.append(int(card.split("_")[1]))
|
||
elif card == "Crown_Joker":
|
||
has_joker = True
|
||
score = sum(nums)
|
||
# Each Trick_x doubles any one card of same digit value if present
|
||
for t in tricks:
|
||
if t in nums:
|
||
score += t # adds one more of that value (effectively doubling)
|
||
if has_joker:
|
||
score += 5
|
||
return score, has_joker
|
||
|
||
def _determine_winner(self):
|
||
"""Decide winner based on hand values and Joker possession."""
|
||
A = self.state.game_state["players"]["A"]
|
||
B = self.state.game_state["players"]["B"]
|
||
A_score, A_joker = self._calc_hand_score(A["hand"])
|
||
B_score, B_joker = self._calc_hand_score(B["hand"])
|
||
self.state.game_state["players"]["A"]["score"] = A_score
|
||
self.state.game_state["players"]["B"]["score"] = B_score
|
||
self.state.game_state["players"]["A"]["has_joker"] = A_joker
|
||
self.state.game_state["players"]["B"]["has_joker"] = B_joker
|
||
|
||
if A_score > B_score:
|
||
self.state.set_winner(player_id=0, reason="Jester Red has the higher total hand value.")
|
||
self.state.game_state["winner"] = "A"
|
||
elif B_score > A_score:
|
||
self.state.set_winner(player_id=1, reason="Jester Blue has the higher total hand value.")
|
||
self.state.game_state["winner"] = "B"
|
||
else:
|
||
# tie: check joker possession
|
||
if A_joker and not B_joker:
|
||
self.state.set_winner(player_id=0, reason="Tie on value, but Jester Red holds the Crown Joker.")
|
||
self.state.game_state["winner"] = "A"
|
||
elif B_joker and not A_joker:
|
||
self.state.set_winner(player_id=1, reason="Tie on value, but Jester Blue holds the Crown Joker.")
|
||
self.state.game_state["winner"] = "B"
|
||
else:
|
||
self.state.set_draw(reason="Equal hand values and no Joker advantage.")
|
||
self.state.game_state["winner"] = None
|
||
|
||
# ---------------------------------------------------------------
|
||
|
||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||
"""
|
||
Resets the Crown of Fools environment.
|
||
"""
|
||
if num_players != 2:
|
||
raise ValueError("Crown of Fools is a two-player environment.")
|
||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||
|
||
seed = seed if seed is not None else random.randint(0, 999999)
|
||
deck = self._init_deck(seed)
|
||
players = {"A": {"hand": [], "score": 0, "has_joker": False, "last_action": None},
|
||
"B": {"hand": [], "score": 0, "has_joker": False, "last_action": None}}
|
||
# Initial draw
|
||
for _ in range(3):
|
||
if deck: players["A"]["hand"].append(deck.pop())
|
||
if deck: players["B"]["hand"].append(deck.pop())
|
||
|
||
game_state: Dict[str, Any] = {
|
||
"phase": "active",
|
||
"turn_index": 0,
|
||
"current_player": "A",
|
||
"deck_order": deck,
|
||
"discard_pile": [],
|
||
"players": players,
|
||
"history": [],
|
||
"seed": seed,
|
||
"terminal": False,
|
||
"winner": None,
|
||
}
|
||
|
||
role_map = {0: "Jester Red (Player A)", 1: "Jester Blue (Player B)"}
|
||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_map)
|
||
self.state.add_observation("Welcome to the royal court! The Crown of Fools challenge begins.", ta.ObservationType.GAME_MESSAGE)
|
||
self.state.add_observation(f"Seed used: {seed}", ta.ObservationType.GAME_MESSAGE)
|
||
return self.state
|
||
|
||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||
"""Generate a contextual prompt for each jester."""
|
||
role = "Jester Red" if player_id == 0 else "Jester Blue"
|
||
player_key = "A" if player_id == 0 else "B"
|
||
hand = game_state["players"][player_key]["hand"]
|
||
top_discard = game_state["discard_pile"][-1] if game_state["discard_pile"] else "None"
|
||
return (
|
||
f"You are {role}, performing in the royal card duel 'Crown of Fools'.\n"
|
||
"Your rival seeks to claim the court's laughter by building a mightier hand.\n"
|
||
f"Your current hand: {', '.join(hand)}\n"
|
||
f"Top of discard pile: {top_discard}\n\n"
|
||
"Available actions (exact tokens required):\n"
|
||
" • [Draw] – Take top card from the deck\n"
|
||
" • [Play:<card_id>] – Apply a card’s effect from your hand\n"
|
||
" • [Discard:<card_id>] – Remove an unwanted card (except Crown_Joker)\n"
|
||
" • [Pass] – Skip your turn\n"
|
||
" • [Crown] – Declare your confidence and end the round (after turn 5)\n\n"
|
||
"Put your final answer inside \\boxed{} at the end of your response.\n\n"
|
||
"Example valid response:\n"
|
||
"The deck still hides potential.\n\\boxed{[Draw]}\n\n"
|
||
"Example invalid response:\n"
|
||
"I choose DrawCard.\n\\boxed{[DrawCard]}"
|
||
)
|
||
|
||
def _validate_and_execute_action(self, p_id: str, action: str):
|
||
"""Check grammar and legality, then apply side effects."""
|
||
game_state = self.state.game_state
|
||
players = game_state["players"]
|
||
player = players[p_id]
|
||
turn = game_state["turn_index"]
|
||
|
||
# Grammar validation
|
||
if not (
|
||
self.draw_pattern.match(action)
|
||
or self.play_pattern.match(action)
|
||
or self.discard_pattern.match(action)
|
||
or self.pass_pattern.match(action)
|
||
or self.crown_pattern.match(action)
|
||
):
|
||
self.state.set_invalid_move("Unrecognized action format")
|
||
return
|
||
|
||
# Action semantics
|
||
if action == "[Draw]":
|
||
if not game_state["deck_order"]:
|
||
self.state.set_invalid_move("Deck empty; cannot draw")
|
||
return
|
||
drawn = game_state["deck_order"].pop()
|
||
player["hand"].append(drawn)
|
||
|
||
elif self.pass_pattern.match(action):
|
||
pass # No effect
|
||
|
||
elif self.play_pattern.match(action):
|
||
card_id = action.split(":", 1)[1][:-1] # remove trailing ]
|
||
if card_id not in player["hand"]:
|
||
self.state.set_invalid_move("Card not in hand")
|
||
return
|
||
# Play Trick cards apply immediately (effect abstractly for now)
|
||
if card_id.startswith("Trick_"):
|
||
# Could later have behavior, but for now just mark as played
|
||
pass
|
||
player["hand"].remove(card_id)
|
||
game_state["discard_pile"].append(card_id)
|
||
|
||
elif self.discard_pattern.match(action):
|
||
card_id = action.split(":", 1)[1][:-1]
|
||
if card_id == "Crown_Joker":
|
||
self.state.set_invalid_move("Cannot discard the Crown Joker")
|
||
return
|
||
if card_id not in player["hand"]:
|
||
self.state.set_invalid_move("Card not in hand")
|
||
return
|
||
player["hand"].remove(card_id)
|
||
game_state["discard_pile"].append(card_id)
|
||
|
||
elif action == "[Crown]":
|
||
if turn < 5:
|
||
self.state.set_invalid_move("Crown can only be declared after turn 5")
|
||
return
|
||
# Immediately end sequence
|
||
self._determine_winner()
|
||
game_state["terminal"] = True
|
||
game_state["phase"] = "revealed"
|
||
return
|
||
|
||
# After legal action update info
|
||
player["last_action"] = action
|
||
game_state["turn_index"] += 1
|
||
game_state["history"].append({"player": p_id, "action": action})
|
||
# Recalculate score, Joker flags after every turn
|
||
for key in ["A", "B"]:
|
||
s, j = self._calc_hand_score(players[key]["hand"])
|
||
players[key]["score"] = s
|
||
players[key]["has_joker"] = j
|
||
|
||
# Terminal checkpoints
|
||
if not game_state["deck_order"] or game_state["turn_index"] >= 30:
|
||
self._determine_winner()
|
||
game_state["terminal"] = True
|
||
game_state["phase"] = "revealed"
|
||
|
||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||
"""Perform a single environment step for the current player."""
|
||
self.state.add_observation(
|
||
message=action,
|
||
observation_type=ta.ObservationType.PLAYER_ACTION,
|
||
from_id=self.state.current_player_id,
|
||
to_id=-1,
|
||
)
|
||
|
||
extracted = self._extract_answer_content(action)
|
||
p_id = "A" if self.state.current_player_id == 0 else "B"
|
||
if not self.state.done:
|
||
self._validate_and_execute_action(p_id, extracted)
|
||
if self.state.done:
|
||
self.state.game_state["terminal"] = True
|
||
done, info = self.state.step()
|
||
return done, info
|
||
``` |