233 lines
9.2 KiB
Python
233 lines
9.2 KiB
Python
```python
|
||
import re
|
||
from typing import Any, Dict, Optional, Tuple, List
|
||
|
||
import textarena as ta
|
||
|
||
|
||
class StarGridDuelEnv(ta.Env):
|
||
"""
|
||
Implementation of the 'StarGrid Duel' game environment.
|
||
Deterministic two-player strategy game where navigators place energy beacons
|
||
on a 3x3 grid aiming to align three of their own beacons in a row, column, or diagonal.
|
||
"""
|
||
|
||
def __init__(self, max_turns: int = 9):
|
||
self.max_turns = max_turns
|
||
# Compile regex patterns once
|
||
self.place_pattern = re.compile(r"^\[Place:\s*(A|B|C)(1|2|3)\]$")
|
||
# Cell labels in order
|
||
self.all_cells = [f"{r}{c}" for r in "ABC" for c in "123"]
|
||
|
||
# ------------------------ Helper Methods ------------------------
|
||
|
||
def _extract_answer_content(self, action: str) -> str:
|
||
"""
|
||
Extract content inside \boxed{} for machine parsing.
|
||
Falls back to entire content (trimmed) if no match.
|
||
"""
|
||
match = re.search(r"\\boxed\{\{([^}]*)\}\}", action)
|
||
if not match: # Also support single braces in case formatting differs
|
||
match = re.search(r"\\boxed\{([^}]*)\}", action)
|
||
if match:
|
||
return match.group(1).strip()
|
||
return action.strip()
|
||
|
||
def _check_victory(self, board: Dict[str, Optional[str]], color: str) -> bool:
|
||
"""Check all 8 winning line combinations for the specified color."""
|
||
lines = [
|
||
["A1", "A2", "A3"],
|
||
["B1", "B2", "B3"],
|
||
["C1", "C2", "C3"],
|
||
["A1", "B1", "C1"],
|
||
["A2", "B2", "C2"],
|
||
["A3", "B3", "C3"],
|
||
["A1", "B2", "C3"],
|
||
["A3", "B2", "C1"],
|
||
]
|
||
for line in lines:
|
||
if all(board[cell] == color for cell in line):
|
||
return True
|
||
return False
|
||
|
||
def _generate_board_str(self, board: Dict[str, Optional[str]]) -> str:
|
||
"""Render the 3x3 StarGrid as a simple text table."""
|
||
rows = []
|
||
for r in "ABC":
|
||
row_cells = []
|
||
for c in "123":
|
||
val = board[f"{r}{c}"]
|
||
if val is None:
|
||
row_cells.append(f"{r}{c}")
|
||
else:
|
||
symbol = "B" if val == "Blue" else "C"
|
||
row_cells.append(symbol)
|
||
rows.append(" | ".join(row_cells))
|
||
return "\n".join(rows)
|
||
|
||
def _get_active_player_label(self, player_id: int) -> str:
|
||
return "Navigator Alpha" if player_id == 0 else "Navigator Beta"
|
||
|
||
def _cell_valid(self, cell: str) -> bool:
|
||
return cell in self.all_cells
|
||
|
||
# ------------------------ Core Env API ------------------------
|
||
|
||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||
"""
|
||
Resets the environment to an initial state.
|
||
|
||
Args:
|
||
num_players: Number of players in the game. Must be 2.
|
||
seed: Optional seed for determinism.
|
||
|
||
Returns:
|
||
None (or self.state for compatibility)
|
||
"""
|
||
if num_players != 2:
|
||
raise ValueError("StarGrid Duel requires exactly 2 players.")
|
||
|
||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||
|
||
empty_board = {cell: None for cell in self.all_cells}
|
||
game_state: Dict[str, Any] = {
|
||
"turn_index": 0,
|
||
"active_player": "A",
|
||
"board": empty_board,
|
||
"player_symbols": {"A": "Blue", "B": "Crimson"},
|
||
"move_history": [],
|
||
"winner": None,
|
||
"is_draw": False,
|
||
"observations": {"A": "", "B": ""},
|
||
"seed": seed,
|
||
}
|
||
|
||
role_mapping = {0: "Navigator Alpha", 1: "Navigator Beta"}
|
||
|
||
# Initialize internal game state
|
||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
||
|
||
# Onboarding observations
|
||
onboarding_msg = (
|
||
"Welcome to StarGrid Duel!\n"
|
||
"Two navigators will alternately place energy beacons on the 3×3 StarGrid.\n"
|
||
"Your mission is to align three of your beacons in a line before your rival."
|
||
)
|
||
self.state.add_observation(onboarding_msg, ta.ObservationType.GAME_MESSAGE)
|
||
|
||
board_msg = self._generate_board_str(empty_board)
|
||
self.state.add_observation(board_msg, ta.ObservationType.GAME_BOARD)
|
||
|
||
return self.state
|
||
|
||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||
"""
|
||
Perform a single environment step for the current player.
|
||
|
||
Args:
|
||
action: The action text submitted by the current player.
|
||
|
||
Returns:
|
||
A tuple (done, info) where:
|
||
done: True if the episode has concluded
|
||
info: A ta.Info object with auxiliary details
|
||
"""
|
||
player_id = self.state.current_player_id
|
||
player_key = "A" if player_id == 0 else "B"
|
||
player_color = self.state.game_state["player_symbols"][player_key]
|
||
|
||
# 1. Log the raw player action
|
||
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
||
|
||
# 2. Extract the boxed content
|
||
extracted = self._extract_answer_content(action)
|
||
|
||
# 3. Validate the action pattern
|
||
match = self.place_pattern.match(extracted)
|
||
if not match:
|
||
self.state.set_invalid_move(reason="MalformedAction: The action must strictly follow '[Place: <cell_id>]' format.")
|
||
return self.state.step()
|
||
|
||
cell_id = f"{match.group(1)}{match.group(2)}"
|
||
|
||
if not self._cell_valid(cell_id):
|
||
self.state.set_invalid_move(reason=f"CellOutOfRange: {cell_id} is not a valid StarGrid coordinate.")
|
||
return self.state.step()
|
||
|
||
board = self.state.game_state["board"]
|
||
if board[cell_id] is not None:
|
||
self.state.set_invalid_move(reason=f"CellOccupied: {cell_id} is already occupied.")
|
||
return self.state.step()
|
||
|
||
# 4. Execute valid action: place beacon
|
||
board[cell_id] = player_color
|
||
self.state.game_state["board"] = board
|
||
|
||
# Record move
|
||
self.state.game_state["move_history"].append(
|
||
{"player": player_key, "action": extracted}
|
||
)
|
||
|
||
# Increment turn index and rotate active player (unless terminal)
|
||
self.state.game_state["turn_index"] += 1
|
||
|
||
# 5. Check for victory
|
||
if self._check_victory(board, player_color):
|
||
self.state.game_state["winner"] = player_key
|
||
winner_str = self._get_active_player_label(player_id)
|
||
self.state.set_winner(player_id=player_id, reason=f"{winner_str} aligned three energy beacons in a line.")
|
||
board_str = self._generate_board_str(board)
|
||
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
|
||
return self.state.step()
|
||
|
||
# 6. Check for draw (grid filled, no winner)
|
||
if all(v is not None for v in board.values()):
|
||
self.state.game_state["is_draw"] = True
|
||
self.state.set_draw(reason="All cells filled with no aligned beacons—a balanced standoff.")
|
||
board_str = self._generate_board_str(board)
|
||
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
|
||
return self.state.step()
|
||
|
||
# 7. Update board observation for next player
|
||
board_str = self._generate_board_str(board)
|
||
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
|
||
|
||
# 8. Rotate turn
|
||
self.state.game_state["active_player"] = "B" if player_key == "A" else "A"
|
||
|
||
# Proceed to next step
|
||
return self.state.step()
|
||
|
||
# -------------------- Player Prompt Generation --------------------
|
||
|
||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||
"""
|
||
Generate a detailed prompt for each player at the start or on every turn.
|
||
"""
|
||
role = "Navigator Alpha" if player_id == 0 else "Navigator Beta"
|
||
color = game_state["player_symbols"]["A" if player_id == 0 else "B"]
|
||
active_pid = game_state["active_player"]
|
||
active_label = "Navigator Alpha" if active_pid == "A" else "Navigator Beta"
|
||
|
||
board_repr = self._generate_board_str(game_state["board"])
|
||
open_cells = [cell for cell, val in game_state["board"].items() if val is None]
|
||
allowed_actions = [f"[Place: {cell}]" for cell in open_cells]
|
||
|
||
prompt = (
|
||
f"You are {role}, commanding the {color} energy.\n"
|
||
"Your goal: deploy energy beacons across the StarGrid to align three of your own in a straight line before your opponent.\n\n"
|
||
f"Current Board:\n{board_repr}\n\n"
|
||
f"Your Color: {color}\nActive Navigator: {active_label}\n\n"
|
||
f"Allowed Actions:\nFormat: [Place: <cell_id>]\nAvailable cells: {', '.join(open_cells) if open_cells else 'None'}\n\n"
|
||
"Response Format:\n"
|
||
"You may describe your reasoning, then finalize your move as:\n\n"
|
||
"Example valid response:\n"
|
||
"I will claim the center of the grid to control diagonals.\n"
|
||
"\\boxed{{[Place: B2]}}\n\n"
|
||
"Example invalid response:\n"
|
||
"I think I'll move now.\n"
|
||
"\\boxed{{[Move: B2]}}\n\n"
|
||
"Put your final answer within \\boxed{{}} at the end of your response."
|
||
)
|
||
return prompt
|
||
``` |