Files
blahblahblah/env.py
2001-01-01 00:00:00 +00:00

233 lines
9.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class StarGridDuelEnv(ta.Env):
"""
Implementation of the 'StarGrid Duel' game environment.
Deterministic two-player strategy game where navigators place energy beacons
on a 3x3 grid aiming to align three of their own beacons in a row, column, or diagonal.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
# Compile regex patterns once
self.place_pattern = re.compile(r"^\[Place:\s*(A|B|C)(1|2|3)\]$")
# Cell labels in order
self.all_cells = [f"{r}{c}" for r in "ABC" for c in "123"]
# ------------------------ Helper Methods ------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract content inside \boxed{} for machine parsing.
Falls back to entire content (trimmed) if no match.
"""
match = re.search(r"\\boxed\{\{([^}]*)\}\}", action)
if not match: # Also support single braces in case formatting differs
match = re.search(r"\\boxed\{([^}]*)\}", action)
if match:
return match.group(1).strip()
return action.strip()
def _check_victory(self, board: Dict[str, Optional[str]], color: str) -> bool:
"""Check all 8 winning line combinations for the specified color."""
lines = [
["A1", "A2", "A3"],
["B1", "B2", "B3"],
["C1", "C2", "C3"],
["A1", "B1", "C1"],
["A2", "B2", "C2"],
["A3", "B3", "C3"],
["A1", "B2", "C3"],
["A3", "B2", "C1"],
]
for line in lines:
if all(board[cell] == color for cell in line):
return True
return False
def _generate_board_str(self, board: Dict[str, Optional[str]]) -> str:
"""Render the 3x3 StarGrid as a simple text table."""
rows = []
for r in "ABC":
row_cells = []
for c in "123":
val = board[f"{r}{c}"]
if val is None:
row_cells.append(f"{r}{c}")
else:
symbol = "B" if val == "Blue" else "C"
row_cells.append(symbol)
rows.append(" | ".join(row_cells))
return "\n".join(rows)
def _get_active_player_label(self, player_id: int) -> str:
return "Navigator Alpha" if player_id == 0 else "Navigator Beta"
def _cell_valid(self, cell: str) -> bool:
return cell in self.all_cells
# ------------------------ Core Env API ------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Number of players in the game. Must be 2.
seed: Optional seed for determinism.
Returns:
None (or self.state for compatibility)
"""
if num_players != 2:
raise ValueError("StarGrid Duel requires exactly 2 players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
empty_board = {cell: None for cell in self.all_cells}
game_state: Dict[str, Any] = {
"turn_index": 0,
"active_player": "A",
"board": empty_board,
"player_symbols": {"A": "Blue", "B": "Crimson"},
"move_history": [],
"winner": None,
"is_draw": False,
"observations": {"A": "", "B": ""},
"seed": seed,
}
role_mapping = {0: "Navigator Alpha", 1: "Navigator Beta"}
# Initialize internal game state
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
# Onboarding observations
onboarding_msg = (
"Welcome to StarGrid Duel!\n"
"Two navigators will alternately place energy beacons on the 3×3 StarGrid.\n"
"Your mission is to align three of your beacons in a line before your rival."
)
self.state.add_observation(onboarding_msg, ta.ObservationType.GAME_MESSAGE)
board_msg = self._generate_board_str(empty_board)
self.state.add_observation(board_msg, ta.ObservationType.GAME_BOARD)
return self.state
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player.
Returns:
A tuple (done, info) where:
done: True if the episode has concluded
info: A ta.Info object with auxiliary details
"""
player_id = self.state.current_player_id
player_key = "A" if player_id == 0 else "B"
player_color = self.state.game_state["player_symbols"][player_key]
# 1. Log the raw player action
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
# 2. Extract the boxed content
extracted = self._extract_answer_content(action)
# 3. Validate the action pattern
match = self.place_pattern.match(extracted)
if not match:
self.state.set_invalid_move(reason="MalformedAction: The action must strictly follow '[Place: <cell_id>]' format.")
return self.state.step()
cell_id = f"{match.group(1)}{match.group(2)}"
if not self._cell_valid(cell_id):
self.state.set_invalid_move(reason=f"CellOutOfRange: {cell_id} is not a valid StarGrid coordinate.")
return self.state.step()
board = self.state.game_state["board"]
if board[cell_id] is not None:
self.state.set_invalid_move(reason=f"CellOccupied: {cell_id} is already occupied.")
return self.state.step()
# 4. Execute valid action: place beacon
board[cell_id] = player_color
self.state.game_state["board"] = board
# Record move
self.state.game_state["move_history"].append(
{"player": player_key, "action": extracted}
)
# Increment turn index and rotate active player (unless terminal)
self.state.game_state["turn_index"] += 1
# 5. Check for victory
if self._check_victory(board, player_color):
self.state.game_state["winner"] = player_key
winner_str = self._get_active_player_label(player_id)
self.state.set_winner(player_id=player_id, reason=f"{winner_str} aligned three energy beacons in a line.")
board_str = self._generate_board_str(board)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
return self.state.step()
# 6. Check for draw (grid filled, no winner)
if all(v is not None for v in board.values()):
self.state.game_state["is_draw"] = True
self.state.set_draw(reason="All cells filled with no aligned beacons—a balanced standoff.")
board_str = self._generate_board_str(board)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
return self.state.step()
# 7. Update board observation for next player
board_str = self._generate_board_str(board)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
# 8. Rotate turn
self.state.game_state["active_player"] = "B" if player_key == "A" else "A"
# Proceed to next step
return self.state.step()
# -------------------- Player Prompt Generation --------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Generate a detailed prompt for each player at the start or on every turn.
"""
role = "Navigator Alpha" if player_id == 0 else "Navigator Beta"
color = game_state["player_symbols"]["A" if player_id == 0 else "B"]
active_pid = game_state["active_player"]
active_label = "Navigator Alpha" if active_pid == "A" else "Navigator Beta"
board_repr = self._generate_board_str(game_state["board"])
open_cells = [cell for cell, val in game_state["board"].items() if val is None]
allowed_actions = [f"[Place: {cell}]" for cell in open_cells]
prompt = (
f"You are {role}, commanding the {color} energy.\n"
"Your goal: deploy energy beacons across the StarGrid to align three of your own in a straight line before your opponent.\n\n"
f"Current Board:\n{board_repr}\n\n"
f"Your Color: {color}\nActive Navigator: {active_label}\n\n"
f"Allowed Actions:\nFormat: [Place: <cell_id>]\nAvailable cells: {', '.join(open_cells) if open_cells else 'None'}\n\n"
"Response Format:\n"
"You may describe your reasoning, then finalize your move as:\n\n"
"Example valid response:\n"
"I will claim the center of the grid to control diagonals.\n"
"\\boxed{{[Place: B2]}}\n\n"
"Example invalid response:\n"
"I think I'll move now.\n"
"\\boxed{{[Move: B2]}}\n\n"
"Put your final answer within \\boxed{{}} at the end of your response."
)
return prompt
```