Files
mazerunner-v0/env.py
2001-01-01 00:00:00 +00:00

239 lines
9.5 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
from typing import Any, Dict, Tuple, Optional, List
import textarena as ta
class CrystalGridEnv(ta.Env):
"""
Environment implementation for the deterministic 2-player game "Crystal Grid".
Each player alternately places their mark (S or L) on a 3x3 grid.
First to align three of their crystals in a row, column, or diagonal wins.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
# Precompile regex for valid actions
self.action_pattern = re.compile(r"^\[Place:\s*([1-3]),\s*([1-3])\]$")
self.symbols = {0: "S", 1: "L"}
self.role_mapping = {0: "Solar Architect", 1: "Lunar Architect"}
# ---------------------------------------------------------------
# === HELPER FUNCTIONS ===
# ---------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract the content inside \\boxed{} markers.
Falls back to full action if the pattern is missing.
"""
match = re.search(r'\\boxed\{\{?([^}]*)\}?\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _check_winner(self, symbol: str, grid: List[List[Optional[str]]]) -> bool:
"""Determines whether the given symbol has a winning line on the grid."""
# Rows
for row in grid:
if all(cell == symbol for cell in row):
return True
# Columns
for c in range(3):
if all(grid[r][c] == symbol for r in range(3)):
return True
# Diagonals
if all(grid[i][i] == symbol for i in range(3)):
return True
if all(grid[i][2 - i] == symbol for i in range(3)):
return True
return False
def _get_available_cells(self, grid: List[List[Optional[str]]]) -> List[List[int]]:
return [
[r + 1, c + 1]
for r in range(3)
for c in range(3)
if grid[r][c] is None
]
def _render_grid(self, grid: List[List[Optional[str]]]) -> str:
"""Produces a human-readable board representation for prompts/observations."""
display = []
display.append(" 1 2 3")
for i, row in enumerate(grid, start=1):
symbols = [cell if cell is not None else "." for cell in row]
display.append(f"{i} " + " | ".join(symbols))
return "\n".join(display)
# ---------------------------------------------------------------
# === CORE API IMPLEMENTATION ===
# ---------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Must be 2 for this environment.
seed: Optional seed for deterministic initialization.
Returns:
None
"""
if num_players != 2:
raise ValueError("Crystal Grid requires exactly 2 players.")
# Initialize two-player state from textarena framework
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
# Construct initial game state according to Stage 1 schema
grid = [[None for _ in range(3)] for _ in range(3)]
game_state = {
"turn_count": 0,
"current_player": "Solar",
"grid": grid,
"available_cells": self._get_available_cells(grid),
"winner": None,
"is_terminal": False,
"observations": {
"Solar": "The Crystal Grid is empty. You are Solar Architect (symbol S). Your charge begins first.",
"Lunar": "The Crystal Grid is empty. You are Lunar Architect (symbol L). Wait for Solar Architect to place first."
},
"history": [],
"seed": seed,
"score": {
"Solar": 0,
"Lunar": 0
}
}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=self.role_mapping)
# Add initial game message
self.state.add_observation(
message="Welcome to Crystal Grid. The Solar Architect begins the alignment ritual.",
observation_type=ta.ObservationType.GAME_MESSAGE
)
# Visualize starting grid
board_str = self._render_grid(grid)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
return None
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player (possibly boxed).
Returns:
(done, info)
"""
acting_player = self.state.current_player_id
player_symbol = self.symbols[acting_player]
player_role = "Solar" if acting_player == 0 else "Lunar"
# Log observed action
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=acting_player, to_id=-1)
# Extract boxed content
extracted = self._extract_answer_content(action)
# Validate format
match = self.action_pattern.match(extracted)
if not match:
self.state.set_invalid_move(reason="Action format not recognized.")
return self.state.step()
# Parse coordinates (convert to 0-index)
row, col = int(match.group(1)) - 1, int(match.group(2)) - 1
if not (0 <= row < 3 and 0 <= col < 3):
self.state.set_invalid_move(reason="Coordinates must be between 1 and 3.")
return self.state.step()
current_grid = self.state.game_state["grid"]
# Check if cell already occupied
if current_grid[row][col] is not None:
self.state.set_invalid_move(reason="That node already holds a crystal.")
return self.state.step()
# Make placement
current_grid[row][col] = player_symbol
self.state.game_state["turn_count"] += 1
self.state.game_state["available_cells"] = self._get_available_cells(current_grid)
move_text = f"{player_role} → [Place: {row+1},{col+1}]"
self.state.game_state["history"].append(move_text)
# Update observations for both
self.state.game_state["observations"]["Solar"] = f"Previous move: [Place: {row+1},{col+1}] by {player_role}."
self.state.game_state["observations"]["Lunar"] = f"Your opponent placed [Place: {row+1},{col+1}]."
# Add board visualization
board_str = self._render_grid(current_grid)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
# Check for win condition
winner_found = self._check_winner(player_symbol, current_grid)
if winner_found:
self.state.game_state["winner"] = player_role
self.state.game_state["is_terminal"] = True
# Assign scores
if player_role == "Solar":
self.state.game_state["score"]["Solar"] = 1
self.state.game_state["score"]["Lunar"] = 0
self.state.set_winner(player_id=0, reason=f"{player_role} formed a stable energy conduit.")
else:
self.state.game_state["score"]["Lunar"] = 1
self.state.game_state["score"]["Solar"] = 0
self.state.set_winner(player_id=1, reason=f"{player_role} formed a stable energy conduit.")
return self.state.step()
# Check for draw condition
if self.state.game_state["turn_count"] >= 9:
self.state.game_state["winner"] = "draw"
self.state.game_state["is_terminal"] = True
self.state.game_state["score"]["Solar"] = 0.5
self.state.game_state["score"]["Lunar"] = 0.5
self.state.set_draw(reason="The grid is full; energy flows evenly—a draw.")
return self.state.step()
# No terminal condition reached — rotate to next player
self.state.game_state["current_player"] = "Lunar" if player_role == "Solar" else "Solar"
return self.state.step()
# ---------------------------------------------------------------
# === PROMPT GENERATION ===
# ---------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
role = self.role_mapping[player_id]
symbol = self.symbols[player_id]
board_text = self._render_grid(game_state["grid"])
prompt = (
f"You are a mystic architect competing on the Crystal Grid.\n"
f"Role: {role} (symbol '{symbol}')\n\n"
"Objective:\n"
"Align three of your charged crystals in a row, column, or diagonal before your opponent does.\n"
"Players alternate placing crystals: Solar goes first, then Lunar.\n\n"
"Current Grid:\n"
f"{board_text}\n\n"
"Allowed Action:\n"
" [Place: row,col]\n"
" where row and col are integers in {1,2,3}.\n\n"
"Example valid response:\n"
"I will channel energy into the central node for stability.\n"
"\\boxed{{[Place: 2,2]}}\n\n"
"Invalid example (do not use):\n"
"\\boxed{{[Play: 2,2]}} <-- token must be [Place: ...]\n\n"
"At the end of your message, put your final answer within \\boxed{{}} using one allowed action."
)
return prompt
```