Add env.py from Openverse builder

This commit is contained in:
Openverse Builder
2001-01-01 00:00:00 +00:00
commit 828a76a7f8

239
env.py Normal file
View File

@@ -0,0 +1,239 @@
```python
import re
from typing import Any, Dict, Tuple, Optional, List
import textarena as ta
class CrystalGridEnv(ta.Env):
"""
Environment implementation for the deterministic 2-player game "Crystal Grid".
Each player alternately places their mark (S or L) on a 3x3 grid.
First to align three of their crystals in a row, column, or diagonal wins.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
# Precompile regex for valid actions
self.action_pattern = re.compile(r"^\[Place:\s*([1-3]),\s*([1-3])\]$")
self.symbols = {0: "S", 1: "L"}
self.role_mapping = {0: "Solar Architect", 1: "Lunar Architect"}
# ---------------------------------------------------------------
# === HELPER FUNCTIONS ===
# ---------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract the content inside \\boxed{} markers.
Falls back to full action if the pattern is missing.
"""
match = re.search(r'\\boxed\{\{?([^}]*)\}?\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _check_winner(self, symbol: str, grid: List[List[Optional[str]]]) -> bool:
"""Determines whether the given symbol has a winning line on the grid."""
# Rows
for row in grid:
if all(cell == symbol for cell in row):
return True
# Columns
for c in range(3):
if all(grid[r][c] == symbol for r in range(3)):
return True
# Diagonals
if all(grid[i][i] == symbol for i in range(3)):
return True
if all(grid[i][2 - i] == symbol for i in range(3)):
return True
return False
def _get_available_cells(self, grid: List[List[Optional[str]]]) -> List[List[int]]:
return [
[r + 1, c + 1]
for r in range(3)
for c in range(3)
if grid[r][c] is None
]
def _render_grid(self, grid: List[List[Optional[str]]]) -> str:
"""Produces a human-readable board representation for prompts/observations."""
display = []
display.append(" 1 2 3")
for i, row in enumerate(grid, start=1):
symbols = [cell if cell is not None else "." for cell in row]
display.append(f"{i} " + " | ".join(symbols))
return "\n".join(display)
# ---------------------------------------------------------------
# === CORE API IMPLEMENTATION ===
# ---------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Must be 2 for this environment.
seed: Optional seed for deterministic initialization.
Returns:
None
"""
if num_players != 2:
raise ValueError("Crystal Grid requires exactly 2 players.")
# Initialize two-player state from textarena framework
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
# Construct initial game state according to Stage 1 schema
grid = [[None for _ in range(3)] for _ in range(3)]
game_state = {
"turn_count": 0,
"current_player": "Solar",
"grid": grid,
"available_cells": self._get_available_cells(grid),
"winner": None,
"is_terminal": False,
"observations": {
"Solar": "The Crystal Grid is empty. You are Solar Architect (symbol S). Your charge begins first.",
"Lunar": "The Crystal Grid is empty. You are Lunar Architect (symbol L). Wait for Solar Architect to place first."
},
"history": [],
"seed": seed,
"score": {
"Solar": 0,
"Lunar": 0
}
}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=self.role_mapping)
# Add initial game message
self.state.add_observation(
message="Welcome to Crystal Grid. The Solar Architect begins the alignment ritual.",
observation_type=ta.ObservationType.GAME_MESSAGE
)
# Visualize starting grid
board_str = self._render_grid(grid)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
return None
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player (possibly boxed).
Returns:
(done, info)
"""
acting_player = self.state.current_player_id
player_symbol = self.symbols[acting_player]
player_role = "Solar" if acting_player == 0 else "Lunar"
# Log observed action
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=acting_player, to_id=-1)
# Extract boxed content
extracted = self._extract_answer_content(action)
# Validate format
match = self.action_pattern.match(extracted)
if not match:
self.state.set_invalid_move(reason="Action format not recognized.")
return self.state.step()
# Parse coordinates (convert to 0-index)
row, col = int(match.group(1)) - 1, int(match.group(2)) - 1
if not (0 <= row < 3 and 0 <= col < 3):
self.state.set_invalid_move(reason="Coordinates must be between 1 and 3.")
return self.state.step()
current_grid = self.state.game_state["grid"]
# Check if cell already occupied
if current_grid[row][col] is not None:
self.state.set_invalid_move(reason="That node already holds a crystal.")
return self.state.step()
# Make placement
current_grid[row][col] = player_symbol
self.state.game_state["turn_count"] += 1
self.state.game_state["available_cells"] = self._get_available_cells(current_grid)
move_text = f"{player_role} → [Place: {row+1},{col+1}]"
self.state.game_state["history"].append(move_text)
# Update observations for both
self.state.game_state["observations"]["Solar"] = f"Previous move: [Place: {row+1},{col+1}] by {player_role}."
self.state.game_state["observations"]["Lunar"] = f"Your opponent placed [Place: {row+1},{col+1}]."
# Add board visualization
board_str = self._render_grid(current_grid)
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
# Check for win condition
winner_found = self._check_winner(player_symbol, current_grid)
if winner_found:
self.state.game_state["winner"] = player_role
self.state.game_state["is_terminal"] = True
# Assign scores
if player_role == "Solar":
self.state.game_state["score"]["Solar"] = 1
self.state.game_state["score"]["Lunar"] = 0
self.state.set_winner(player_id=0, reason=f"{player_role} formed a stable energy conduit.")
else:
self.state.game_state["score"]["Lunar"] = 1
self.state.game_state["score"]["Solar"] = 0
self.state.set_winner(player_id=1, reason=f"{player_role} formed a stable energy conduit.")
return self.state.step()
# Check for draw condition
if self.state.game_state["turn_count"] >= 9:
self.state.game_state["winner"] = "draw"
self.state.game_state["is_terminal"] = True
self.state.game_state["score"]["Solar"] = 0.5
self.state.game_state["score"]["Lunar"] = 0.5
self.state.set_draw(reason="The grid is full; energy flows evenly—a draw.")
return self.state.step()
# No terminal condition reached — rotate to next player
self.state.game_state["current_player"] = "Lunar" if player_role == "Solar" else "Solar"
return self.state.step()
# ---------------------------------------------------------------
# === PROMPT GENERATION ===
# ---------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
role = self.role_mapping[player_id]
symbol = self.symbols[player_id]
board_text = self._render_grid(game_state["grid"])
prompt = (
f"You are a mystic architect competing on the Crystal Grid.\n"
f"Role: {role} (symbol '{symbol}')\n\n"
"Objective:\n"
"Align three of your charged crystals in a row, column, or diagonal before your opponent does.\n"
"Players alternate placing crystals: Solar goes first, then Lunar.\n\n"
"Current Grid:\n"
f"{board_text}\n\n"
"Allowed Action:\n"
" [Place: row,col]\n"
" where row and col are integers in {1,2,3}.\n\n"
"Example valid response:\n"
"I will channel energy into the central node for stability.\n"
"\\boxed{{[Place: 2,2]}}\n\n"
"Invalid example (do not use):\n"
"\\boxed{{[Play: 2,2]}} <-- token must be [Place: ...]\n\n"
"At the end of your message, put your final answer within \\boxed{{}} using one allowed action."
)
return prompt
```