```python import re from typing import Any, Dict, Tuple, Optional, List import textarena as ta class CrystalGridEnv(ta.Env): """ Environment implementation for the deterministic 2-player game "Crystal Grid". Each player alternately places their mark (S or L) on a 3x3 grid. First to align three of their crystals in a row, column, or diagonal wins. """ def __init__(self, max_turns: int = 9): self.max_turns = max_turns # Precompile regex for valid actions self.action_pattern = re.compile(r"^\[Place:\s*([1-3]),\s*([1-3])\]$") self.symbols = {0: "S", 1: "L"} self.role_mapping = {0: "Solar Architect", 1: "Lunar Architect"} # --------------------------------------------------------------- # === HELPER FUNCTIONS === # --------------------------------------------------------------- def _extract_answer_content(self, action: str) -> str: """ Extract the content inside \\boxed{} markers. Falls back to full action if the pattern is missing. """ match = re.search(r'\\boxed\{\{?([^}]*)\}?\}', action, re.DOTALL) if match: return match.group(1).strip() return action.strip() def _check_winner(self, symbol: str, grid: List[List[Optional[str]]]) -> bool: """Determines whether the given symbol has a winning line on the grid.""" # Rows for row in grid: if all(cell == symbol for cell in row): return True # Columns for c in range(3): if all(grid[r][c] == symbol for r in range(3)): return True # Diagonals if all(grid[i][i] == symbol for i in range(3)): return True if all(grid[i][2 - i] == symbol for i in range(3)): return True return False def _get_available_cells(self, grid: List[List[Optional[str]]]) -> List[List[int]]: return [ [r + 1, c + 1] for r in range(3) for c in range(3) if grid[r][c] is None ] def _render_grid(self, grid: List[List[Optional[str]]]) -> str: """Produces a human-readable board representation for prompts/observations.""" display = [] display.append(" 1 2 3") for i, row in enumerate(grid, start=1): symbols = [cell if cell is not None else "." for cell in row] display.append(f"{i} " + " | ".join(symbols)) return "\n".join(display) # --------------------------------------------------------------- # === CORE API IMPLEMENTATION === # --------------------------------------------------------------- def reset(self, num_players: int, seed: Optional[int] = None): """ Resets the environment to an initial state. Args: num_players: Must be 2 for this environment. seed: Optional seed for deterministic initialization. Returns: None """ if num_players != 2: raise ValueError("Crystal Grid requires exactly 2 players.") # Initialize two-player state from textarena framework self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) # Construct initial game state according to Stage 1 schema grid = [[None for _ in range(3)] for _ in range(3)] game_state = { "turn_count": 0, "current_player": "Solar", "grid": grid, "available_cells": self._get_available_cells(grid), "winner": None, "is_terminal": False, "observations": { "Solar": "The Crystal Grid is empty. You are Solar Architect (symbol ‘S’). Your charge begins first.", "Lunar": "The Crystal Grid is empty. You are Lunar Architect (symbol ‘L’). Wait for Solar Architect to place first." }, "history": [], "seed": seed, "score": { "Solar": 0, "Lunar": 0 } } self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=self.role_mapping) # Add initial game message self.state.add_observation( message="Welcome to Crystal Grid. The Solar Architect begins the alignment ritual.", observation_type=ta.ObservationType.GAME_MESSAGE ) # Visualize starting grid board_str = self._render_grid(grid) self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) return None def step(self, action: str) -> Tuple[bool, ta.Info]: """ Perform a single environment step for the current player. Args: action: The action text submitted by the current player (possibly boxed). Returns: (done, info) """ acting_player = self.state.current_player_id player_symbol = self.symbols[acting_player] player_role = "Solar" if acting_player == 0 else "Lunar" # Log observed action self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=acting_player, to_id=-1) # Extract boxed content extracted = self._extract_answer_content(action) # Validate format match = self.action_pattern.match(extracted) if not match: self.state.set_invalid_move(reason="Action format not recognized.") return self.state.step() # Parse coordinates (convert to 0-index) row, col = int(match.group(1)) - 1, int(match.group(2)) - 1 if not (0 <= row < 3 and 0 <= col < 3): self.state.set_invalid_move(reason="Coordinates must be between 1 and 3.") return self.state.step() current_grid = self.state.game_state["grid"] # Check if cell already occupied if current_grid[row][col] is not None: self.state.set_invalid_move(reason="That node already holds a crystal.") return self.state.step() # Make placement current_grid[row][col] = player_symbol self.state.game_state["turn_count"] += 1 self.state.game_state["available_cells"] = self._get_available_cells(current_grid) move_text = f"{player_role} → [Place: {row+1},{col+1}]" self.state.game_state["history"].append(move_text) # Update observations for both self.state.game_state["observations"]["Solar"] = f"Previous move: [Place: {row+1},{col+1}] by {player_role}." self.state.game_state["observations"]["Lunar"] = f"Your opponent placed [Place: {row+1},{col+1}]." # Add board visualization board_str = self._render_grid(current_grid) self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) # Check for win condition winner_found = self._check_winner(player_symbol, current_grid) if winner_found: self.state.game_state["winner"] = player_role self.state.game_state["is_terminal"] = True # Assign scores if player_role == "Solar": self.state.game_state["score"]["Solar"] = 1 self.state.game_state["score"]["Lunar"] = 0 self.state.set_winner(player_id=0, reason=f"{player_role} formed a stable energy conduit.") else: self.state.game_state["score"]["Lunar"] = 1 self.state.game_state["score"]["Solar"] = 0 self.state.set_winner(player_id=1, reason=f"{player_role} formed a stable energy conduit.") return self.state.step() # Check for draw condition if self.state.game_state["turn_count"] >= 9: self.state.game_state["winner"] = "draw" self.state.game_state["is_terminal"] = True self.state.game_state["score"]["Solar"] = 0.5 self.state.game_state["score"]["Lunar"] = 0.5 self.state.set_draw(reason="The grid is full; energy flows evenly—a draw.") return self.state.step() # No terminal condition reached — rotate to next player self.state.game_state["current_player"] = "Lunar" if player_role == "Solar" else "Solar" return self.state.step() # --------------------------------------------------------------- # === PROMPT GENERATION === # --------------------------------------------------------------- def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: role = self.role_mapping[player_id] symbol = self.symbols[player_id] board_text = self._render_grid(game_state["grid"]) prompt = ( f"You are a mystic architect competing on the Crystal Grid.\n" f"Role: {role} (symbol '{symbol}')\n\n" "Objective:\n" "Align three of your charged crystals in a row, column, or diagonal before your opponent does.\n" "Players alternate placing crystals: Solar goes first, then Lunar.\n\n" "Current Grid:\n" f"{board_text}\n\n" "Allowed Action:\n" " [Place: row,col]\n" " where row and col are integers in {1,2,3}.\n\n" "Example valid response:\n" "I will channel energy into the central node for stability.\n" "\\boxed{{[Place: 2,2]}}\n\n" "Invalid example (do not use):\n" "\\boxed{{[Play: 2,2]}} <-- token must be [Place: ...]\n\n" "At the end of your message, put your final answer within \\boxed{{}} using one allowed action." ) return prompt ```