Files
dummy-env/env.py
2001-01-01 00:00:00 +00:00

254 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class CrownOfFoolsEnv(ta.Env):
"""
Turn-based TextArena environment for 'Crown of Fools'.
Two jesters draw, play, and discard cards to achieve the highest hand total.
"""
def __init__(self, max_turns: Optional[int] = 30):
self.max_turns = max_turns
self.draw_pattern = re.compile(r"^\[Draw\]$")
self.play_pattern = re.compile(r"^\[Play:[A-Za-z0-9_]+\]$")
self.discard_pattern = re.compile(r"^\[Discard:[A-Za-z0-9_]+\]$")
self.pass_pattern = re.compile(r"^\[Pass\]$")
self.crown_pattern = re.compile(r"^\[Crown\]$")
def _extract_answer_content(self, action: str) -> str:
"""Extract content from \\boxed{} for action parsing."""
match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
# ---------------------- Helper Methods -----------------------
def _init_deck(self, seed: int) -> List[str]:
"""Initialize and shuffle deck deterministically using given seed."""
random.seed(seed)
deck = []
# Numbers 1-10, each duplicated twice
for i in range(1, 11):
deck.append(f"Num_{i}")
deck.append(f"Num_{i}")
# Trick cards 15
for i in range(1, 6):
deck.append(f"Trick_{i}")
# Add the single Crown Joker
deck.append("Crown_Joker")
random.shuffle(deck)
return deck
def _calc_hand_score(self, hand: List[str]) -> Tuple[int, bool]:
"""Calculate hand score applying Trick card doubling and Joker value."""
nums = []
tricks = []
has_joker = False
for card in hand:
if card.startswith("Num_"):
nums.append(int(card.split("_")[1]))
elif card.startswith("Trick_"):
tricks.append(int(card.split("_")[1]))
elif card == "Crown_Joker":
has_joker = True
score = sum(nums)
# Each Trick_x doubles any one card of same digit value if present
for t in tricks:
if t in nums:
score += t # adds one more of that value (effectively doubling)
if has_joker:
score += 5
return score, has_joker
def _determine_winner(self):
"""Decide winner based on hand values and Joker possession."""
A = self.state.game_state["players"]["A"]
B = self.state.game_state["players"]["B"]
A_score, A_joker = self._calc_hand_score(A["hand"])
B_score, B_joker = self._calc_hand_score(B["hand"])
self.state.game_state["players"]["A"]["score"] = A_score
self.state.game_state["players"]["B"]["score"] = B_score
self.state.game_state["players"]["A"]["has_joker"] = A_joker
self.state.game_state["players"]["B"]["has_joker"] = B_joker
if A_score > B_score:
self.state.set_winner(player_id=0, reason="Jester Red has the higher total hand value.")
self.state.game_state["winner"] = "A"
elif B_score > A_score:
self.state.set_winner(player_id=1, reason="Jester Blue has the higher total hand value.")
self.state.game_state["winner"] = "B"
else:
# tie: check joker possession
if A_joker and not B_joker:
self.state.set_winner(player_id=0, reason="Tie on value, but Jester Red holds the Crown Joker.")
self.state.game_state["winner"] = "A"
elif B_joker and not A_joker:
self.state.set_winner(player_id=1, reason="Tie on value, but Jester Blue holds the Crown Joker.")
self.state.game_state["winner"] = "B"
else:
self.state.set_draw(reason="Equal hand values and no Joker advantage.")
self.state.game_state["winner"] = None
# ---------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the Crown of Fools environment.
"""
if num_players != 2:
raise ValueError("Crown of Fools is a two-player environment.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
seed = seed if seed is not None else random.randint(0, 999999)
deck = self._init_deck(seed)
players = {"A": {"hand": [], "score": 0, "has_joker": False, "last_action": None},
"B": {"hand": [], "score": 0, "has_joker": False, "last_action": None}}
# Initial draw
for _ in range(3):
if deck: players["A"]["hand"].append(deck.pop())
if deck: players["B"]["hand"].append(deck.pop())
game_state: Dict[str, Any] = {
"phase": "active",
"turn_index": 0,
"current_player": "A",
"deck_order": deck,
"discard_pile": [],
"players": players,
"history": [],
"seed": seed,
"terminal": False,
"winner": None,
}
role_map = {0: "Jester Red (Player A)", 1: "Jester Blue (Player B)"}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_map)
self.state.add_observation("Welcome to the royal court! The Crown of Fools challenge begins.", ta.ObservationType.GAME_MESSAGE)
self.state.add_observation(f"Seed used: {seed}", ta.ObservationType.GAME_MESSAGE)
return self.state
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""Generate a contextual prompt for each jester."""
role = "Jester Red" if player_id == 0 else "Jester Blue"
player_key = "A" if player_id == 0 else "B"
hand = game_state["players"][player_key]["hand"]
top_discard = game_state["discard_pile"][-1] if game_state["discard_pile"] else "None"
return (
f"You are {role}, performing in the royal card duel 'Crown of Fools'.\n"
"Your rival seeks to claim the court's laughter by building a mightier hand.\n"
f"Your current hand: {', '.join(hand)}\n"
f"Top of discard pile: {top_discard}\n\n"
"Available actions (exact tokens required):\n"
" • [Draw] Take top card from the deck\n"
" • [Play:<card_id>] Apply a cards effect from your hand\n"
" • [Discard:<card_id>] Remove an unwanted card (except Crown_Joker)\n"
" • [Pass] Skip your turn\n"
" • [Crown] Declare your confidence and end the round (after turn 5)\n\n"
"Put your final answer inside \\boxed{} at the end of your response.\n\n"
"Example valid response:\n"
"The deck still hides potential.\n\\boxed{[Draw]}\n\n"
"Example invalid response:\n"
"I choose DrawCard.\n\\boxed{[DrawCard]}"
)
def _validate_and_execute_action(self, p_id: str, action: str):
"""Check grammar and legality, then apply side effects."""
game_state = self.state.game_state
players = game_state["players"]
player = players[p_id]
turn = game_state["turn_index"]
# Grammar validation
if not (
self.draw_pattern.match(action)
or self.play_pattern.match(action)
or self.discard_pattern.match(action)
or self.pass_pattern.match(action)
or self.crown_pattern.match(action)
):
self.state.set_invalid_move("Unrecognized action format")
return
# Action semantics
if action == "[Draw]":
if not game_state["deck_order"]:
self.state.set_invalid_move("Deck empty; cannot draw")
return
drawn = game_state["deck_order"].pop()
player["hand"].append(drawn)
elif self.pass_pattern.match(action):
pass # No effect
elif self.play_pattern.match(action):
card_id = action.split(":", 1)[1][:-1] # remove trailing ]
if card_id not in player["hand"]:
self.state.set_invalid_move("Card not in hand")
return
# Play Trick cards apply immediately (effect abstractly for now)
if card_id.startswith("Trick_"):
# Could later have behavior, but for now just mark as played
pass
player["hand"].remove(card_id)
game_state["discard_pile"].append(card_id)
elif self.discard_pattern.match(action):
card_id = action.split(":", 1)[1][:-1]
if card_id == "Crown_Joker":
self.state.set_invalid_move("Cannot discard the Crown Joker")
return
if card_id not in player["hand"]:
self.state.set_invalid_move("Card not in hand")
return
player["hand"].remove(card_id)
game_state["discard_pile"].append(card_id)
elif action == "[Crown]":
if turn < 5:
self.state.set_invalid_move("Crown can only be declared after turn 5")
return
# Immediately end sequence
self._determine_winner()
game_state["terminal"] = True
game_state["phase"] = "revealed"
return
# After legal action update info
player["last_action"] = action
game_state["turn_index"] += 1
game_state["history"].append({"player": p_id, "action": action})
# Recalculate score, Joker flags after every turn
for key in ["A", "B"]:
s, j = self._calc_hand_score(players[key]["hand"])
players[key]["score"] = s
players[key]["has_joker"] = j
# Terminal checkpoints
if not game_state["deck_order"] or game_state["turn_index"] >= 30:
self._determine_winner()
game_state["terminal"] = True
game_state["phase"] = "revealed"
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""Perform a single environment step for the current player."""
self.state.add_observation(
message=action,
observation_type=ta.ObservationType.PLAYER_ACTION,
from_id=self.state.current_player_id,
to_id=-1,
)
extracted = self._extract_answer_content(action)
p_id = "A" if self.state.current_player_id == 0 else "B"
if not self.state.done:
self._validate_and_execute_action(p_id, extracted)
if self.state.done:
self.state.game_state["terminal"] = True
done, info = self.state.step()
return done, info
```