Add env.py from Openverse builder
This commit is contained in:
223
env.py
Normal file
223
env.py
Normal file
@@ -0,0 +1,223 @@
|
||||
```python
|
||||
import re, random
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
import textarena as ta
|
||||
|
||||
|
||||
class ElementalChampionsEnv(ta.Env):
|
||||
"""
|
||||
Implementation of 'Elemental Champions: The Tournament of Triads'
|
||||
as defined in the Stage 1 design document.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
# Game constants
|
||||
self.valid_elements = ["Flame", "Tide", "Gale"]
|
||||
self.pattern = re.compile(r"^\[Channel:\s*(Flame|Tide|Gale)\]$")
|
||||
self.max_rounds = 5
|
||||
self.score_to_win = 3
|
||||
self.dominance = {
|
||||
"Flame": "Gale", # Flame over Gale
|
||||
"Gale": "Tide", # Gale over Tide
|
||||
"Tide": "Flame", # Tide over Flame
|
||||
}
|
||||
self.state = None
|
||||
|
||||
def _extract_answer_content(self, action: str) -> str:
|
||||
"""Extract content from \\boxed{} for parsing."""
|
||||
match = re.search(r'\\boxed\{\{([^}]*)\}\}', action, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
# Fallback single bracket variant
|
||||
match2 = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
|
||||
if match2:
|
||||
return match2.group(1).strip()
|
||||
return action.strip()
|
||||
|
||||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||
"""
|
||||
Resets the environment to an initial state.
|
||||
|
||||
Args:
|
||||
num_players: Number of players (must be 2)
|
||||
seed: Optional seed for deterministic behavior
|
||||
"""
|
||||
if num_players != 2:
|
||||
raise ValueError("Elemental Champions is strictly a 2-player game.")
|
||||
|
||||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_rounds)
|
||||
|
||||
game_state: Dict[str, Any] = {
|
||||
"seed": seed or random.randint(1, 9999999),
|
||||
"current_round": 0,
|
||||
"max_rounds": self.max_rounds,
|
||||
"score_to_win": self.score_to_win,
|
||||
"duelist_A": {
|
||||
"name": "duelist_A",
|
||||
"essence_points": 0,
|
||||
"last_action": None
|
||||
},
|
||||
"duelist_B": {
|
||||
"name": "duelist_B",
|
||||
"essence_points": 0,
|
||||
"last_action": None
|
||||
},
|
||||
"transcript": [],
|
||||
"winner": None,
|
||||
"is_terminal": False,
|
||||
"invalid_reason": None
|
||||
}
|
||||
|
||||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
|
||||
|
||||
welcome_message = (
|
||||
"Welcome to the Tournament of Triads! First to 3 Essence Points wins.\n"
|
||||
"Choose your elemental channel each round: Flame, Tide, or Gale."
|
||||
)
|
||||
self.state.add_observation(
|
||||
welcome_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1
|
||||
)
|
||||
|
||||
# Prepare round buffer for simultaneous actions
|
||||
self._round_actions = {0: None, 1: None}
|
||||
|
||||
return self.state
|
||||
|
||||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||
"""Generates per-player prompts consistent with Stage 1 document."""
|
||||
opponent_id = 1 - player_id
|
||||
player_key = "duelist_A" if player_id == 0 else "duelist_B"
|
||||
opponent_key = "duelist_B" if player_id == 0 else "duelist_A"
|
||||
|
||||
info_section = (
|
||||
f"Current round: {game_state['current_round']} / {game_state['max_rounds']}\n"
|
||||
f"Your Essence Points: {game_state[player_key]['essence_points']}\n"
|
||||
f"Opponent Essence Points: {game_state[opponent_key]['essence_points']}\n"
|
||||
)
|
||||
if game_state[player_key]["last_action"]:
|
||||
info_section += f"Your last channel: {game_state[player_key]['last_action']}\n"
|
||||
if game_state[opponent_key]["last_action"]:
|
||||
info_section += f"Opponent's last channel was: {game_state[opponent_key]['last_action']}\n"
|
||||
|
||||
rules_section = (
|
||||
"You are a mystical duelist in the Tournament of Triads, channeling elemental forces "
|
||||
"of Flame, Tide, and Gale. Each round, select one element to channel.\n\n"
|
||||
"Rules of Elemental Dominance:\n"
|
||||
" - Flame defeats Gale\n"
|
||||
" - Gale defeats Tide\n"
|
||||
" - Tide defeats Flame\n\n"
|
||||
"Use the following exact tokens (placed inside \\boxed{{}}):\n"
|
||||
"[Channel: Flame]\n[Channel: Tide]\n[Channel: Gale]\n\n"
|
||||
"Put your final answer within \\boxed{{}} at the end of your response.\n\n"
|
||||
"Example valid response:\n"
|
||||
"Flame is powerful this turn; I trust its strength.\n"
|
||||
"\\boxed{{[Channel: Flame]}}\n\n"
|
||||
"Example invalid response:\n"
|
||||
"I summon Fire!\n"
|
||||
"\\boxed{{[Cast: Fire]}}\n(reason: invalid token or element)\n"
|
||||
)
|
||||
|
||||
transcript_section = ""
|
||||
if game_state["transcript"]:
|
||||
transcript_lines = []
|
||||
for t in game_state["transcript"]:
|
||||
transcript_lines.append(
|
||||
f"Round {t['round']}: Duelist A={t['A']} Duelist B={t['B']} → {t['outcome']}"
|
||||
)
|
||||
transcript_section = "Battle Log:\n" + "\n".join(transcript_lines) + "\n\n"
|
||||
|
||||
return info_section + transcript_section + rules_section
|
||||
|
||||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||
"""
|
||||
Perform a single environment step for the current player.
|
||||
Handles simultaneous element selection, outcome computation,
|
||||
scoring, and termination.
|
||||
"""
|
||||
player_id = self.state.current_player_id
|
||||
player_key = "duelist_A" if player_id == 0 else "duelist_B"
|
||||
|
||||
self.state.add_observation(
|
||||
action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1
|
||||
)
|
||||
action_token = self._extract_answer_content(action)
|
||||
|
||||
# Validate format
|
||||
if not self.pattern.match(action_token):
|
||||
self.state.set_invalid_move(reason="Malformed or unsupported action format.")
|
||||
self.state.game_state["invalid_reason"] = "Malformed or unsupported action format."
|
||||
return self.state.step()
|
||||
|
||||
# Store move
|
||||
game_state = self.state.game_state
|
||||
game_state[player_key]["last_action"] = action_token
|
||||
self._round_actions[player_id] = action_token
|
||||
|
||||
# Wait until both actions received
|
||||
if None in self._round_actions.values():
|
||||
# Not both actions received yet, just advance turn but not round
|
||||
return self.state.step()
|
||||
|
||||
# Both actions are in, resolve the round
|
||||
a_action = self._round_actions[0]
|
||||
b_action = self._round_actions[1]
|
||||
element_A = re.search(r"(Flame|Tide|Gale)", a_action).group(1)
|
||||
element_B = re.search(r"(Flame|Tide|Gale)", b_action).group(1)
|
||||
|
||||
# Determine winner for the round
|
||||
outcome_text = "Draw"
|
||||
if element_A == element_B:
|
||||
outcome_text = "Draw"
|
||||
elif self.dominance[element_A] == element_B:
|
||||
outcome_text = "A wins"
|
||||
game_state["duelist_A"]["essence_points"] += 1
|
||||
elif self.dominance[element_B] == element_A:
|
||||
outcome_text = "B wins"
|
||||
game_state["duelist_B"]["essence_points"] += 1
|
||||
|
||||
# Record transcript
|
||||
game_state["current_round"] += 1
|
||||
game_state["transcript"].append(
|
||||
{
|
||||
"round": game_state["current_round"],
|
||||
"A": a_action,
|
||||
"B": b_action,
|
||||
"outcome": outcome_text,
|
||||
}
|
||||
)
|
||||
|
||||
# Check for tournament winner
|
||||
a_points = game_state["duelist_A"]["essence_points"]
|
||||
b_points = game_state["duelist_B"]["essence_points"]
|
||||
|
||||
if a_points >= self.score_to_win:
|
||||
game_state["winner"] = "duelist_A"
|
||||
game_state["is_terminal"] = True
|
||||
self.state.set_winner(player_id=0, reason="Duelist A reached 3 Essence Points.")
|
||||
|
||||
elif b_points >= self.score_to_win:
|
||||
game_state["winner"] = "duelist_B"
|
||||
game_state["is_terminal"] = True
|
||||
self.state.set_winner(player_id=1, reason="Duelist B reached 3 Essence Points.")
|
||||
|
||||
elif game_state["current_round"] >= game_state["max_rounds"]:
|
||||
# Check for draw or winner by points
|
||||
if a_points > b_points:
|
||||
game_state["winner"] = "duelist_A"
|
||||
game_state["is_terminal"] = True
|
||||
self.state.set_winner(player_id=0, reason="Duelist A has higher Essence Points.")
|
||||
elif b_points > a_points:
|
||||
game_state["winner"] = "duelist_B"
|
||||
game_state["is_terminal"] = True
|
||||
self.state.set_winner(player_id=1, reason="Duelist B has higher Essence Points.")
|
||||
else:
|
||||
game_state["winner"] = "Draw"
|
||||
game_state["is_terminal"] = True
|
||||
self.state.set_draw(reason="Both duelists have equal points after final round.")
|
||||
|
||||
# Reset simultaneous actions buffer for next round
|
||||
self._round_actions = {0: None, 1: None}
|
||||
|
||||
# Proceed to next turn
|
||||
return self.state.step()
|
||||
```
|
||||
Reference in New Issue
Block a user