Initial commit from Openverse UI
This commit is contained in:
300
env.py
Normal file
300
env.py
Normal file
@@ -0,0 +1,300 @@
|
||||
import re
|
||||
import random
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
|
||||
import textarena as ta
|
||||
|
||||
|
||||
class DuelOfSignsEnv(ta.Env):
|
||||
def __init__(self, max_rounds: int = 5):
|
||||
"""Environment implementing 'Duel of Signs: A Rock–Paper–Scissors Tournament'."""
|
||||
self.max_rounds = max_rounds
|
||||
self.play_pattern = re.compile(r'^\[Play:(Rock|Paper|Scissors)\]$')
|
||||
self.predict_pattern = re.compile(r'^\[Predict:(Rock|Paper|Scissors)\]$')
|
||||
self.concede_pattern = re.compile(r'^\[Concede\]$')
|
||||
self.signs = ["Rock", "Paper", "Scissors"]
|
||||
|
||||
def _extract_answer_content(self, action: str) -> str:
|
||||
"""Extract content from \boxed{} for validation and execution."""
|
||||
match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return action.strip()
|
||||
|
||||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||
"""
|
||||
Initialize a new tournament according to Stage 1 design.
|
||||
"""
|
||||
if num_players != 2:
|
||||
raise ValueError("Duel of Signs requires exactly 2 players.")
|
||||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed)
|
||||
|
||||
# Deterministic seed handling
|
||||
rng = random.Random(seed if seed is not None else 0)
|
||||
start_player = 0 if (seed is None or seed % 2 == 0) else 1
|
||||
|
||||
# Initialize game state as per schema
|
||||
self.state.game_state = {
|
||||
"tournament_name": "Grand Duels 2077",
|
||||
"seed": seed,
|
||||
"round_index": 1,
|
||||
"max_rounds": self.max_rounds,
|
||||
"turn_order": ["PlayerA", "PlayerB"],
|
||||
"players": {
|
||||
"PlayerA": {
|
||||
"score": 0,
|
||||
"last_action": None,
|
||||
"predicted_action": None,
|
||||
"round_wins": 0,
|
||||
},
|
||||
"PlayerB": {
|
||||
"score": 0,
|
||||
"last_action": None,
|
||||
"predicted_action": None,
|
||||
"round_wins": 0,
|
||||
},
|
||||
},
|
||||
"round_history": [],
|
||||
"current_turn": "PlayerA" if start_player == 0 else "PlayerB",
|
||||
"status": "active",
|
||||
"winner": None,
|
||||
"observation_log": [],
|
||||
}
|
||||
|
||||
# Reset framework state
|
||||
self.state.reset(
|
||||
game_state=self.state.game_state,
|
||||
player_prompt_function=self._generate_player_prompt,
|
||||
role_mapping={0: "PlayerA", 1: "PlayerB"},
|
||||
)
|
||||
|
||||
# Announce tournament to all players
|
||||
intro_message = (
|
||||
f"Welcome to {self.state.game_state['tournament_name']}!\n"
|
||||
f"This is a {self.state.game_state['max_rounds']}-round duel between two Signmasters.\n"
|
||||
"Each round: choose [Play:Rock], [Play:Paper], or [Play:Scissors], or attempt [Predict:<Sign>].\n"
|
||||
"Win: +2 points, Draw: +1, Loss: 0, Correct Prediction: +1 bonus, Incorrect: -1.\n"
|
||||
"Concede anytime with [Concede]."
|
||||
)
|
||||
self.state.add_observation(intro_message, ta.ObservationType.GAME_MESSAGE)
|
||||
self.state.manually_set_current_player_id(start_player)
|
||||
|
||||
return self.state
|
||||
|
||||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||
"""Generate prompt text specific to each player."""
|
||||
role = "PlayerA" if player_id == 0 else "PlayerB"
|
||||
opponent = "PlayerB" if player_id == 0 else "PlayerA"
|
||||
player_data = game_state["players"][role]
|
||||
opp_data = game_state["players"][opponent]
|
||||
return (
|
||||
f"You are {role}, a Signmaster in the grand arena of Duel of Signs.\n"
|
||||
f"Round {game_state['round_index']} of {game_state['max_rounds']}.\n"
|
||||
f"Your score: {player_data['score']} | Opponent's score: {opp_data['score']}\n"
|
||||
"Choose your action token correctly:\n"
|
||||
" [Play:Rock] | [Play:Paper] | [Play:Scissors]\n"
|
||||
" [Predict:Rock] | [Predict:Paper] | [Predict:Scissors]\n"
|
||||
" [Concede]\n"
|
||||
"Scoring: Win=+2, Draw=+1, Predicted correctly=+1, Incorrect prediction=−1.\n"
|
||||
"Put your final answer within \\boxed{} at the end of your response.\n\n"
|
||||
"Example valid response:\n"
|
||||
"I think Paper will protect me.\n"
|
||||
"\\boxed{[Play:Paper]}\n\n"
|
||||
"Example valid response with prediction:\n"
|
||||
"I foresee my foe choosing Scissors.\n"
|
||||
"\\boxed{[Predict:Scissors]}"
|
||||
)
|
||||
|
||||
def _beats(self, sign1: str, sign2: str) -> bool:
|
||||
"""Return True if sign1 beats sign2 according to RPS rule."""
|
||||
return (sign1, sign2) in [
|
||||
("Rock", "Scissors"),
|
||||
("Scissors", "Paper"),
|
||||
("Paper", "Rock"),
|
||||
]
|
||||
|
||||
def _resolve_round(self, game_state: Dict[str, Any]) -> None:
|
||||
"""Resolve round outcome once both have played."""
|
||||
A_action = game_state["players"]["PlayerA"]["last_action"]
|
||||
B_action = game_state["players"]["PlayerB"]["last_action"]
|
||||
|
||||
if not (A_action and B_action):
|
||||
return # not yet ready
|
||||
|
||||
round_result = {"round": game_state["round_index"]}
|
||||
|
||||
# Determine each player's play sign (if prediction, it’s not a play)
|
||||
A_play = None
|
||||
B_play = None
|
||||
for act, role in [(A_action, "PlayerA"), (B_action, "PlayerB")]:
|
||||
match = self.play_pattern.match(act)
|
||||
if match:
|
||||
if role == "PlayerA":
|
||||
A_play = match.group(1)
|
||||
else:
|
||||
B_play = match.group(1)
|
||||
|
||||
# Determine if either has conceded
|
||||
if self.concede_pattern.match(A_action):
|
||||
self.state.set_winner(player_id=1, reason="PlayerA conceded.")
|
||||
game_state["status"] = "concluded"
|
||||
game_state["winner"] = "PlayerB"
|
||||
return
|
||||
if self.concede_pattern.match(B_action):
|
||||
self.state.set_winner(player_id=0, reason="PlayerB conceded.")
|
||||
game_state["status"] = "concluded"
|
||||
game_state["winner"] = "PlayerA"
|
||||
return
|
||||
|
||||
# Predict actions apply to next opponent play; evaluate them now that both plays known
|
||||
for pid, role, opp_play in [(0, "PlayerA", B_play), (1, "PlayerB", A_play)]:
|
||||
predict = game_state["players"][role]["predicted_action"]
|
||||
if predict:
|
||||
pred_match = self.predict_pattern.match(predict)
|
||||
if pred_match:
|
||||
predicted_sign = pred_match.group(1)
|
||||
if predicted_sign == opp_play:
|
||||
game_state["players"][role]["score"] += 1
|
||||
self.state.add_observation(
|
||||
f"{role} correctly predicted {predicted_sign} (+1 bonus).",
|
||||
ta.ObservationType.GAME_MESSAGE,
|
||||
)
|
||||
else:
|
||||
game_state["players"][role]["score"] -= 1
|
||||
self.state.add_observation(
|
||||
f"{role} wrongly predicted {predicted_sign} (-1 penalty).",
|
||||
ta.ObservationType.GAME_MESSAGE,
|
||||
)
|
||||
game_state["players"][role]["predicted_action"] = None
|
||||
|
||||
# Determine duel result if both played a valid [Play:*]
|
||||
if A_play and B_play:
|
||||
if A_play == B_play:
|
||||
game_state["players"]["PlayerA"]["score"] += 1
|
||||
game_state["players"]["PlayerB"]["score"] += 1
|
||||
round_result["winner"] = "Draw"
|
||||
outcome_text = f"Round {game_state['round_index']}: Draw ({A_play} vs {B_play})."
|
||||
elif self._beats(A_play, B_play):
|
||||
game_state["players"]["PlayerA"]["score"] += 2
|
||||
game_state["players"]["PlayerA"]["round_wins"] += 1
|
||||
round_result["winner"] = "PlayerA"
|
||||
outcome_text = f"Round {game_state['round_index']}: PlayerA's {A_play} beats {B_play}."
|
||||
else:
|
||||
game_state["players"]["PlayerB"]["score"] += 2
|
||||
game_state["players"]["PlayerB"]["round_wins"] += 1
|
||||
round_result["winner"] = "PlayerB"
|
||||
outcome_text = f"Round {game_state['round_index']}: PlayerB's {B_play} beats {A_play}."
|
||||
|
||||
self.state.add_observation(outcome_text, ta.ObservationType.GAME_MESSAGE)
|
||||
game_state["observation_log"].append(outcome_text)
|
||||
game_state["round_history"].append(
|
||||
{
|
||||
"round": game_state["round_index"],
|
||||
"PlayerA_action": A_action,
|
||||
"PlayerB_action": B_action,
|
||||
"winner": round_result["winner"],
|
||||
}
|
||||
)
|
||||
game_state["round_index"] += 1
|
||||
game_state["players"]["PlayerA"]["last_action"] = None
|
||||
game_state["players"]["PlayerB"]["last_action"] = None
|
||||
|
||||
# End tournament if exceeded rounds
|
||||
if game_state["round_index"] > game_state["max_rounds"]:
|
||||
self._determine_final_winner(game_state)
|
||||
|
||||
def _determine_final_winner(self, game_state: Dict[str, Any]):
|
||||
"""Apply endgame scoring to determine winner."""
|
||||
A_score = game_state["players"]["PlayerA"]["score"]
|
||||
B_score = game_state["players"]["PlayerB"]["score"]
|
||||
if A_score > B_score:
|
||||
self.state.set_winner(player_id=0, reason="Higher total score.")
|
||||
game_state["winner"] = "PlayerA"
|
||||
elif B_score > A_score:
|
||||
self.state.set_winner(player_id=1, reason="Higher total score.")
|
||||
game_state["winner"] = "PlayerB"
|
||||
else:
|
||||
# Tie-breaker: round wins
|
||||
A_wins = game_state["players"]["PlayerA"]["round_wins"]
|
||||
B_wins = game_state["players"]["PlayerB"]["round_wins"]
|
||||
if A_wins > B_wins:
|
||||
self.state.set_winner(player_id=0, reason="Tiebreaker by round wins.")
|
||||
game_state["winner"] = "PlayerA"
|
||||
elif B_wins > A_wins:
|
||||
self.state.set_winner(player_id=1, reason="Tiebreaker by round wins.")
|
||||
game_state["winner"] = "PlayerB"
|
||||
else:
|
||||
self.state.set_draw(reason="Scores and round wins drawn.")
|
||||
game_state["winner"] = "Draw"
|
||||
game_state["status"] = "concluded"
|
||||
|
||||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||
"""
|
||||
Process a single player action.
|
||||
"""
|
||||
player_id = self.state.current_player_id
|
||||
role = "PlayerA" if player_id == 0 else "PlayerB"
|
||||
opp_role = "PlayerB" if player_id == 0 else "PlayerA"
|
||||
|
||||
# Log player action text
|
||||
self.state.add_observation(
|
||||
message=action,
|
||||
observation_type=ta.ObservationType.PLAYER_ACTION,
|
||||
from_id=player_id,
|
||||
to_id=-1,
|
||||
)
|
||||
|
||||
# Extract boxed token for logic
|
||||
token = self._extract_answer_content(action)
|
||||
|
||||
# Validate format
|
||||
if not (
|
||||
self.play_pattern.match(token)
|
||||
or self.predict_pattern.match(token)
|
||||
or self.concede_pattern.match(token)
|
||||
):
|
||||
self.state.set_invalid_move(reason="Unrecognized token format.")
|
||||
return self.state.step()
|
||||
|
||||
# Check if game already over
|
||||
if self.state.game_state["status"] != "active":
|
||||
self.state.set_invalid_move(reason="Game already concluded.")
|
||||
return self.state.step()
|
||||
|
||||
# Fetch player state
|
||||
pstate = self.state.game_state["players"][role]
|
||||
|
||||
# Check duplicate action this round
|
||||
if pstate["last_action"] is not None:
|
||||
self.state.set_invalid_move(reason="Duplicate action this round.")
|
||||
return self.state.step()
|
||||
|
||||
# Record actions deterministically
|
||||
if self.play_pattern.match(token):
|
||||
pstate["last_action"] = token
|
||||
elif self.predict_pattern.match(token):
|
||||
pstate["predicted_action"] = token
|
||||
self.state.add_observation(
|
||||
f"{role} predicts their opponent will play {token[9:-1]}.",
|
||||
ta.ObservationType.GAME_MESSAGE,
|
||||
)
|
||||
elif self.concede_pattern.match(token):
|
||||
pstate["last_action"] = token
|
||||
|
||||
# Check if both players have acts making round resolvable
|
||||
self._resolve_round(self.state.game_state)
|
||||
|
||||
# Determine next player's turn (alternating per round)
|
||||
if not self.state.done:
|
||||
current_round = self.state.game_state["round_index"]
|
||||
if self.state.game_state["status"] == "active":
|
||||
if current_round % 2 == 1:
|
||||
# odd => A starts
|
||||
next_player = (
|
||||
1 if player_id == 0 else 0
|
||||
) # alternate until both acted
|
||||
else:
|
||||
next_player = 0 if player_id == 1 else 1
|
||||
self.state.manually_set_current_player_id(next_player)
|
||||
|
||||
return self.state.step()
|
||||
Reference in New Issue
Block a user