Files
testtest9/env.py

300 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import re
import random
from typing import Any, Dict, Optional, Tuple
import textarena as ta
class DuelOfSignsEnv(ta.Env):
def __init__(self, max_rounds: int = 5):
"""Environment implementing 'Duel of Signs: A RockPaperScissors Tournament'."""
self.max_rounds = max_rounds
self.play_pattern = re.compile(r'^\[Play:(Rock|Paper|Scissors)\]$')
self.predict_pattern = re.compile(r'^\[Predict:(Rock|Paper|Scissors)\]$')
self.concede_pattern = re.compile(r'^\[Concede\]$')
self.signs = ["Rock", "Paper", "Scissors"]
def _extract_answer_content(self, action: str) -> str:
"""Extract content from \boxed{} for validation and execution."""
match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Initialize a new tournament according to Stage 1 design.
"""
if num_players != 2:
raise ValueError("Duel of Signs requires exactly 2 players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)
# Deterministic seed handling
rng = random.Random(seed if seed is not None else 0)
start_player = 0 if (seed is None or seed % 2 == 0) else 1
# Initialize game state as per schema
self.state.game_state = {
"tournament_name": "Grand Duels 2077",
"seed": seed,
"round_index": 1,
"max_rounds": self.max_rounds,
"turn_order": ["PlayerA", "PlayerB"],
"players": {
"PlayerA": {
"score": 0,
"last_action": None,
"predicted_action": None,
"round_wins": 0,
},
"PlayerB": {
"score": 0,
"last_action": None,
"predicted_action": None,
"round_wins": 0,
},
},
"round_history": [],
"current_turn": "PlayerA" if start_player == 0 else "PlayerB",
"status": "active",
"winner": None,
"observation_log": [],
}
# Reset framework state
self.state.reset(
game_state=self.state.game_state,
player_prompt_function=self._generate_player_prompt,
role_mapping={0: "PlayerA", 1: "PlayerB"},
)
# Announce tournament to all players
intro_message = (
f"Welcome to {self.state.game_state['tournament_name']}!\n"
f"This is a {self.state.game_state['max_rounds']}-round duel between two Signmasters.\n"
"Each round: choose [Play:Rock], [Play:Paper], or [Play:Scissors], or attempt [Predict:<Sign>].\n"
"Win: +2 points, Draw: +1, Loss: 0, Correct Prediction: +1 bonus, Incorrect: -1.\n"
"Concede anytime with [Concede]."
)
self.state.add_observation(intro_message, ta.ObservationType.GAME_MESSAGE)
self.state.manually_set_current_player_id(start_player)
return self.state
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""Generate prompt text specific to each player."""
role = "PlayerA" if player_id == 0 else "PlayerB"
opponent = "PlayerB" if player_id == 0 else "PlayerA"
player_data = game_state["players"][role]
opp_data = game_state["players"][opponent]
return (
f"You are {role}, a Signmaster in the grand arena of Duel of Signs.\n"
f"Round {game_state['round_index']} of {game_state['max_rounds']}.\n"
f"Your score: {player_data['score']} | Opponent's score: {opp_data['score']}\n"
"Choose your action token correctly:\n"
" [Play:Rock] | [Play:Paper] | [Play:Scissors]\n"
" [Predict:Rock] | [Predict:Paper] | [Predict:Scissors]\n"
" [Concede]\n"
"Scoring: Win=+2, Draw=+1, Predicted correctly=+1, Incorrect prediction=1.\n"
"Put your final answer within \\boxed{} at the end of your response.\n\n"
"Example valid response:\n"
"I think Paper will protect me.\n"
"\\boxed{[Play:Paper]}\n\n"
"Example valid response with prediction:\n"
"I foresee my foe choosing Scissors.\n"
"\\boxed{[Predict:Scissors]}"
)
def _beats(self, sign1: str, sign2: str) -> bool:
"""Return True if sign1 beats sign2 according to RPS rule."""
return (sign1, sign2) in [
("Rock", "Scissors"),
("Scissors", "Paper"),
("Paper", "Rock"),
]
def _resolve_round(self, game_state: Dict[str, Any]) -> None:
"""Resolve round outcome once both have played."""
A_action = game_state["players"]["PlayerA"]["last_action"]
B_action = game_state["players"]["PlayerB"]["last_action"]
if not (A_action and B_action):
return # not yet ready
round_result = {"round": game_state["round_index"]}
# Determine each player's play sign (if prediction, its not a play)
A_play = None
B_play = None
for act, role in [(A_action, "PlayerA"), (B_action, "PlayerB")]:
match = self.play_pattern.match(act)
if match:
if role == "PlayerA":
A_play = match.group(1)
else:
B_play = match.group(1)
# Determine if either has conceded
if self.concede_pattern.match(A_action):
self.state.set_winner(player_id=1, reason="PlayerA conceded.")
game_state["status"] = "concluded"
game_state["winner"] = "PlayerB"
return
if self.concede_pattern.match(B_action):
self.state.set_winner(player_id=0, reason="PlayerB conceded.")
game_state["status"] = "concluded"
game_state["winner"] = "PlayerA"
return
# Predict actions apply to next opponent play; evaluate them now that both plays known
for pid, role, opp_play in [(0, "PlayerA", B_play), (1, "PlayerB", A_play)]:
predict = game_state["players"][role]["predicted_action"]
if predict:
pred_match = self.predict_pattern.match(predict)
if pred_match:
predicted_sign = pred_match.group(1)
if predicted_sign == opp_play:
game_state["players"][role]["score"] += 1
self.state.add_observation(
f"{role} correctly predicted {predicted_sign} (+1 bonus).",
ta.ObservationType.GAME_MESSAGE,
)
else:
game_state["players"][role]["score"] -= 1
self.state.add_observation(
f"{role} wrongly predicted {predicted_sign} (-1 penalty).",
ta.ObservationType.GAME_MESSAGE,
)
game_state["players"][role]["predicted_action"] = None
# Determine duel result if both played a valid [Play:*]
if A_play and B_play:
if A_play == B_play:
game_state["players"]["PlayerA"]["score"] += 1
game_state["players"]["PlayerB"]["score"] += 1
round_result["winner"] = "Draw"
outcome_text = f"Round {game_state['round_index']}: Draw ({A_play} vs {B_play})."
elif self._beats(A_play, B_play):
game_state["players"]["PlayerA"]["score"] += 2
game_state["players"]["PlayerA"]["round_wins"] += 1
round_result["winner"] = "PlayerA"
outcome_text = f"Round {game_state['round_index']}: PlayerA's {A_play} beats {B_play}."
else:
game_state["players"]["PlayerB"]["score"] += 2
game_state["players"]["PlayerB"]["round_wins"] += 1
round_result["winner"] = "PlayerB"
outcome_text = f"Round {game_state['round_index']}: PlayerB's {B_play} beats {A_play}."
self.state.add_observation(outcome_text, ta.ObservationType.GAME_MESSAGE)
game_state["observation_log"].append(outcome_text)
game_state["round_history"].append(
{
"round": game_state["round_index"],
"PlayerA_action": A_action,
"PlayerB_action": B_action,
"winner": round_result["winner"],
}
)
game_state["round_index"] += 1
game_state["players"]["PlayerA"]["last_action"] = None
game_state["players"]["PlayerB"]["last_action"] = None
# End tournament if exceeded rounds
if game_state["round_index"] > game_state["max_rounds"]:
self._determine_final_winner(game_state)
def _determine_final_winner(self, game_state: Dict[str, Any]):
"""Apply endgame scoring to determine winner."""
A_score = game_state["players"]["PlayerA"]["score"]
B_score = game_state["players"]["PlayerB"]["score"]
if A_score > B_score:
self.state.set_winner(player_id=0, reason="Higher total score.")
game_state["winner"] = "PlayerA"
elif B_score > A_score:
self.state.set_winner(player_id=1, reason="Higher total score.")
game_state["winner"] = "PlayerB"
else:
# Tie-breaker: round wins
A_wins = game_state["players"]["PlayerA"]["round_wins"]
B_wins = game_state["players"]["PlayerB"]["round_wins"]
if A_wins > B_wins:
self.state.set_winner(player_id=0, reason="Tiebreaker by round wins.")
game_state["winner"] = "PlayerA"
elif B_wins > A_wins:
self.state.set_winner(player_id=1, reason="Tiebreaker by round wins.")
game_state["winner"] = "PlayerB"
else:
self.state.set_draw(reason="Scores and round wins drawn.")
game_state["winner"] = "Draw"
game_state["status"] = "concluded"
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Process a single player action.
"""
player_id = self.state.current_player_id
role = "PlayerA" if player_id == 0 else "PlayerB"
opp_role = "PlayerB" if player_id == 0 else "PlayerA"
# Log player action text
self.state.add_observation(
message=action,
observation_type=ta.ObservationType.PLAYER_ACTION,
from_id=player_id,
to_id=-1,
)
# Extract boxed token for logic
token = self._extract_answer_content(action)
# Validate format
if not (
self.play_pattern.match(token)
or self.predict_pattern.match(token)
or self.concede_pattern.match(token)
):
self.state.set_invalid_move(reason="Unrecognized token format.")
return self.state.step()
# Check if game already over
if self.state.game_state["status"] != "active":
self.state.set_invalid_move(reason="Game already concluded.")
return self.state.step()
# Fetch player state
pstate = self.state.game_state["players"][role]
# Check duplicate action this round
if pstate["last_action"] is not None:
self.state.set_invalid_move(reason="Duplicate action this round.")
return self.state.step()
# Record actions deterministically
if self.play_pattern.match(token):
pstate["last_action"] = token
elif self.predict_pattern.match(token):
pstate["predicted_action"] = token
self.state.add_observation(
f"{role} predicts their opponent will play {token[9:-1]}.",
ta.ObservationType.GAME_MESSAGE,
)
elif self.concede_pattern.match(token):
pstate["last_action"] = token
# Check if both players have acts making round resolvable
self._resolve_round(self.state.game_state)
# Determine next player's turn (alternating per round)
if not self.state.done:
current_round = self.state.game_state["round_index"]
if self.state.game_state["status"] == "active":
if current_round % 2 == 1:
# odd => A starts
next_player = (
1 if player_id == 0 else 0
) # alternate until both acted
else:
next_player = 0 if player_id == 1 else 1
self.state.manually_set_current_player_id(next_player)
return self.state.step()