223 lines
9.1 KiB
Python
223 lines
9.1 KiB
Python
```python
|
|
import re, random
|
|
from typing import Any, Dict, Optional, Tuple
|
|
import textarena as ta
|
|
|
|
|
|
class ElementalChampionsEnv(ta.Env):
|
|
"""
|
|
Implementation of 'Elemental Champions: The Tournament of Triads'
|
|
as defined in the Stage 1 design document.
|
|
"""
|
|
|
|
def __init__(self):
|
|
# Game constants
|
|
self.valid_elements = ["Flame", "Tide", "Gale"]
|
|
self.pattern = re.compile(r"^\[Channel:\s*(Flame|Tide|Gale)\]$")
|
|
self.max_rounds = 5
|
|
self.score_to_win = 3
|
|
self.dominance = {
|
|
"Flame": "Gale", # Flame over Gale
|
|
"Gale": "Tide", # Gale over Tide
|
|
"Tide": "Flame", # Tide over Flame
|
|
}
|
|
self.state = None
|
|
|
|
def _extract_answer_content(self, action: str) -> str:
|
|
"""Extract content from \\boxed{} for parsing."""
|
|
match = re.search(r'\\boxed\{\{([^}]*)\}\}', action, re.DOTALL)
|
|
if match:
|
|
return match.group(1).strip()
|
|
# Fallback single bracket variant
|
|
match2 = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
|
|
if match2:
|
|
return match2.group(1).strip()
|
|
return action.strip()
|
|
|
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
|
"""
|
|
Resets the environment to an initial state.
|
|
|
|
Args:
|
|
num_players: Number of players (must be 2)
|
|
seed: Optional seed for deterministic behavior
|
|
"""
|
|
if num_players != 2:
|
|
raise ValueError("Elemental Champions is strictly a 2-player game.")
|
|
|
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_rounds)
|
|
|
|
game_state: Dict[str, Any] = {
|
|
"seed": seed or random.randint(1, 9999999),
|
|
"current_round": 0,
|
|
"max_rounds": self.max_rounds,
|
|
"score_to_win": self.score_to_win,
|
|
"duelist_A": {
|
|
"name": "duelist_A",
|
|
"essence_points": 0,
|
|
"last_action": None
|
|
},
|
|
"duelist_B": {
|
|
"name": "duelist_B",
|
|
"essence_points": 0,
|
|
"last_action": None
|
|
},
|
|
"transcript": [],
|
|
"winner": None,
|
|
"is_terminal": False,
|
|
"invalid_reason": None
|
|
}
|
|
|
|
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
|
|
|
|
welcome_message = (
|
|
"Welcome to the Tournament of Triads! First to 3 Essence Points wins.\n"
|
|
"Choose your elemental channel each round: Flame, Tide, or Gale."
|
|
)
|
|
self.state.add_observation(
|
|
welcome_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1
|
|
)
|
|
|
|
# Prepare round buffer for simultaneous actions
|
|
self._round_actions = {0: None, 1: None}
|
|
|
|
return self.state
|
|
|
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
|
"""Generates per-player prompts consistent with Stage 1 document."""
|
|
opponent_id = 1 - player_id
|
|
player_key = "duelist_A" if player_id == 0 else "duelist_B"
|
|
opponent_key = "duelist_B" if player_id == 0 else "duelist_A"
|
|
|
|
info_section = (
|
|
f"Current round: {game_state['current_round']} / {game_state['max_rounds']}\n"
|
|
f"Your Essence Points: {game_state[player_key]['essence_points']}\n"
|
|
f"Opponent Essence Points: {game_state[opponent_key]['essence_points']}\n"
|
|
)
|
|
if game_state[player_key]["last_action"]:
|
|
info_section += f"Your last channel: {game_state[player_key]['last_action']}\n"
|
|
if game_state[opponent_key]["last_action"]:
|
|
info_section += f"Opponent's last channel was: {game_state[opponent_key]['last_action']}\n"
|
|
|
|
rules_section = (
|
|
"You are a mystical duelist in the Tournament of Triads, channeling elemental forces "
|
|
"of Flame, Tide, and Gale. Each round, select one element to channel.\n\n"
|
|
"Rules of Elemental Dominance:\n"
|
|
" - Flame defeats Gale\n"
|
|
" - Gale defeats Tide\n"
|
|
" - Tide defeats Flame\n\n"
|
|
"Use the following exact tokens (placed inside \\boxed{{}}):\n"
|
|
"[Channel: Flame]\n[Channel: Tide]\n[Channel: Gale]\n\n"
|
|
"Put your final answer within \\boxed{{}} at the end of your response.\n\n"
|
|
"Example valid response:\n"
|
|
"Flame is powerful this turn; I trust its strength.\n"
|
|
"\\boxed{{[Channel: Flame]}}\n\n"
|
|
"Example invalid response:\n"
|
|
"I summon Fire!\n"
|
|
"\\boxed{{[Cast: Fire]}}\n(reason: invalid token or element)\n"
|
|
)
|
|
|
|
transcript_section = ""
|
|
if game_state["transcript"]:
|
|
transcript_lines = []
|
|
for t in game_state["transcript"]:
|
|
transcript_lines.append(
|
|
f"Round {t['round']}: Duelist A={t['A']} Duelist B={t['B']} → {t['outcome']}"
|
|
)
|
|
transcript_section = "Battle Log:\n" + "\n".join(transcript_lines) + "\n\n"
|
|
|
|
return info_section + transcript_section + rules_section
|
|
|
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
|
"""
|
|
Perform a single environment step for the current player.
|
|
Handles simultaneous element selection, outcome computation,
|
|
scoring, and termination.
|
|
"""
|
|
player_id = self.state.current_player_id
|
|
player_key = "duelist_A" if player_id == 0 else "duelist_B"
|
|
|
|
self.state.add_observation(
|
|
action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1
|
|
)
|
|
action_token = self._extract_answer_content(action)
|
|
|
|
# Validate format
|
|
if not self.pattern.match(action_token):
|
|
self.state.set_invalid_move(reason="Malformed or unsupported action format.")
|
|
self.state.game_state["invalid_reason"] = "Malformed or unsupported action format."
|
|
return self.state.step()
|
|
|
|
# Store move
|
|
game_state = self.state.game_state
|
|
game_state[player_key]["last_action"] = action_token
|
|
self._round_actions[player_id] = action_token
|
|
|
|
# Wait until both actions received
|
|
if None in self._round_actions.values():
|
|
# Not both actions received yet, just advance turn but not round
|
|
return self.state.step()
|
|
|
|
# Both actions are in, resolve the round
|
|
a_action = self._round_actions[0]
|
|
b_action = self._round_actions[1]
|
|
element_A = re.search(r"(Flame|Tide|Gale)", a_action).group(1)
|
|
element_B = re.search(r"(Flame|Tide|Gale)", b_action).group(1)
|
|
|
|
# Determine winner for the round
|
|
outcome_text = "Draw"
|
|
if element_A == element_B:
|
|
outcome_text = "Draw"
|
|
elif self.dominance[element_A] == element_B:
|
|
outcome_text = "A wins"
|
|
game_state["duelist_A"]["essence_points"] += 1
|
|
elif self.dominance[element_B] == element_A:
|
|
outcome_text = "B wins"
|
|
game_state["duelist_B"]["essence_points"] += 1
|
|
|
|
# Record transcript
|
|
game_state["current_round"] += 1
|
|
game_state["transcript"].append(
|
|
{
|
|
"round": game_state["current_round"],
|
|
"A": a_action,
|
|
"B": b_action,
|
|
"outcome": outcome_text,
|
|
}
|
|
)
|
|
|
|
# Check for tournament winner
|
|
a_points = game_state["duelist_A"]["essence_points"]
|
|
b_points = game_state["duelist_B"]["essence_points"]
|
|
|
|
if a_points >= self.score_to_win:
|
|
game_state["winner"] = "duelist_A"
|
|
game_state["is_terminal"] = True
|
|
self.state.set_winner(player_id=0, reason="Duelist A reached 3 Essence Points.")
|
|
|
|
elif b_points >= self.score_to_win:
|
|
game_state["winner"] = "duelist_B"
|
|
game_state["is_terminal"] = True
|
|
self.state.set_winner(player_id=1, reason="Duelist B reached 3 Essence Points.")
|
|
|
|
elif game_state["current_round"] >= game_state["max_rounds"]:
|
|
# Check for draw or winner by points
|
|
if a_points > b_points:
|
|
game_state["winner"] = "duelist_A"
|
|
game_state["is_terminal"] = True
|
|
self.state.set_winner(player_id=0, reason="Duelist A has higher Essence Points.")
|
|
elif b_points > a_points:
|
|
game_state["winner"] = "duelist_B"
|
|
game_state["is_terminal"] = True
|
|
self.state.set_winner(player_id=1, reason="Duelist B has higher Essence Points.")
|
|
else:
|
|
game_state["winner"] = "Draw"
|
|
game_state["is_terminal"] = True
|
|
self.state.set_draw(reason="Both duelists have equal points after final round.")
|
|
|
|
# Reset simultaneous actions buffer for next round
|
|
self._round_actions = {0: None, 1: None}
|
|
|
|
# Proceed to next turn
|
|
return self.state.step()
|
|
``` |