commit 828a76a7f8634682a9f097b1195147bfae51c795 Author: Openverse Builder Date: Mon Jan 1 00:00:00 2001 +0000 Add env.py from Openverse builder diff --git a/env.py b/env.py new file mode 100644 index 0000000..395f154 --- /dev/null +++ b/env.py @@ -0,0 +1,239 @@ +```python +import re +from typing import Any, Dict, Tuple, Optional, List +import textarena as ta + + +class CrystalGridEnv(ta.Env): + """ + Environment implementation for the deterministic 2-player game "Crystal Grid". + Each player alternately places their mark (S or L) on a 3x3 grid. + First to align three of their crystals in a row, column, or diagonal wins. + """ + + def __init__(self, max_turns: int = 9): + self.max_turns = max_turns + # Precompile regex for valid actions + self.action_pattern = re.compile(r"^\[Place:\s*([1-3]),\s*([1-3])\]$") + self.symbols = {0: "S", 1: "L"} + self.role_mapping = {0: "Solar Architect", 1: "Lunar Architect"} + + # --------------------------------------------------------------- + # === HELPER FUNCTIONS === + # --------------------------------------------------------------- + + def _extract_answer_content(self, action: str) -> str: + """ + Extract the content inside \\boxed{} markers. + Falls back to full action if the pattern is missing. + """ + match = re.search(r'\\boxed\{\{?([^}]*)\}?\}', action, re.DOTALL) + if match: + return match.group(1).strip() + return action.strip() + + def _check_winner(self, symbol: str, grid: List[List[Optional[str]]]) -> bool: + """Determines whether the given symbol has a winning line on the grid.""" + # Rows + for row in grid: + if all(cell == symbol for cell in row): + return True + # Columns + for c in range(3): + if all(grid[r][c] == symbol for r in range(3)): + return True + # Diagonals + if all(grid[i][i] == symbol for i in range(3)): + return True + if all(grid[i][2 - i] == symbol for i in range(3)): + return True + return False + + def _get_available_cells(self, grid: List[List[Optional[str]]]) -> List[List[int]]: + return [ + [r + 1, c + 1] + for r in range(3) + for c in range(3) + if grid[r][c] is None + ] + + def _render_grid(self, grid: List[List[Optional[str]]]) -> str: + """Produces a human-readable board representation for prompts/observations.""" + display = [] + display.append(" 1 2 3") + for i, row in enumerate(grid, start=1): + symbols = [cell if cell is not None else "." for cell in row] + display.append(f"{i} " + " | ".join(symbols)) + return "\n".join(display) + + # --------------------------------------------------------------- + # === CORE API IMPLEMENTATION === + # --------------------------------------------------------------- + + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + + Args: + num_players: Must be 2 for this environment. + seed: Optional seed for deterministic initialization. + + Returns: + None + """ + if num_players != 2: + raise ValueError("Crystal Grid requires exactly 2 players.") + + # Initialize two-player state from textarena framework + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + + # Construct initial game state according to Stage 1 schema + grid = [[None for _ in range(3)] for _ in range(3)] + game_state = { + "turn_count": 0, + "current_player": "Solar", + "grid": grid, + "available_cells": self._get_available_cells(grid), + "winner": None, + "is_terminal": False, + "observations": { + "Solar": "The Crystal Grid is empty. You are Solar Architect (symbol ‘S’). Your charge begins first.", + "Lunar": "The Crystal Grid is empty. You are Lunar Architect (symbol ‘L’). Wait for Solar Architect to place first." + }, + "history": [], + "seed": seed, + "score": { + "Solar": 0, + "Lunar": 0 + } + } + + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=self.role_mapping) + + # Add initial game message + self.state.add_observation( + message="Welcome to Crystal Grid. The Solar Architect begins the alignment ritual.", + observation_type=ta.ObservationType.GAME_MESSAGE + ) + + # Visualize starting grid + board_str = self._render_grid(grid) + self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) + + return None + + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + + Args: + action: The action text submitted by the current player (possibly boxed). + + Returns: + (done, info) + """ + acting_player = self.state.current_player_id + player_symbol = self.symbols[acting_player] + player_role = "Solar" if acting_player == 0 else "Lunar" + + # Log observed action + self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=acting_player, to_id=-1) + + # Extract boxed content + extracted = self._extract_answer_content(action) + + # Validate format + match = self.action_pattern.match(extracted) + if not match: + self.state.set_invalid_move(reason="Action format not recognized.") + return self.state.step() + + # Parse coordinates (convert to 0-index) + row, col = int(match.group(1)) - 1, int(match.group(2)) - 1 + + if not (0 <= row < 3 and 0 <= col < 3): + self.state.set_invalid_move(reason="Coordinates must be between 1 and 3.") + return self.state.step() + + current_grid = self.state.game_state["grid"] + + # Check if cell already occupied + if current_grid[row][col] is not None: + self.state.set_invalid_move(reason="That node already holds a crystal.") + return self.state.step() + + # Make placement + current_grid[row][col] = player_symbol + self.state.game_state["turn_count"] += 1 + self.state.game_state["available_cells"] = self._get_available_cells(current_grid) + + move_text = f"{player_role} → [Place: {row+1},{col+1}]" + self.state.game_state["history"].append(move_text) + + # Update observations for both + self.state.game_state["observations"]["Solar"] = f"Previous move: [Place: {row+1},{col+1}] by {player_role}." + self.state.game_state["observations"]["Lunar"] = f"Your opponent placed [Place: {row+1},{col+1}]." + + # Add board visualization + board_str = self._render_grid(current_grid) + self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) + + # Check for win condition + winner_found = self._check_winner(player_symbol, current_grid) + if winner_found: + self.state.game_state["winner"] = player_role + self.state.game_state["is_terminal"] = True + # Assign scores + if player_role == "Solar": + self.state.game_state["score"]["Solar"] = 1 + self.state.game_state["score"]["Lunar"] = 0 + self.state.set_winner(player_id=0, reason=f"{player_role} formed a stable energy conduit.") + else: + self.state.game_state["score"]["Lunar"] = 1 + self.state.game_state["score"]["Solar"] = 0 + self.state.set_winner(player_id=1, reason=f"{player_role} formed a stable energy conduit.") + return self.state.step() + + # Check for draw condition + if self.state.game_state["turn_count"] >= 9: + self.state.game_state["winner"] = "draw" + self.state.game_state["is_terminal"] = True + self.state.game_state["score"]["Solar"] = 0.5 + self.state.game_state["score"]["Lunar"] = 0.5 + self.state.set_draw(reason="The grid is full; energy flows evenly—a draw.") + return self.state.step() + + # No terminal condition reached — rotate to next player + self.state.game_state["current_player"] = "Lunar" if player_role == "Solar" else "Solar" + + return self.state.step() + + # --------------------------------------------------------------- + # === PROMPT GENERATION === + # --------------------------------------------------------------- + + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + role = self.role_mapping[player_id] + symbol = self.symbols[player_id] + + board_text = self._render_grid(game_state["grid"]) + prompt = ( + f"You are a mystic architect competing on the Crystal Grid.\n" + f"Role: {role} (symbol '{symbol}')\n\n" + "Objective:\n" + "Align three of your charged crystals in a row, column, or diagonal before your opponent does.\n" + "Players alternate placing crystals: Solar goes first, then Lunar.\n\n" + "Current Grid:\n" + f"{board_text}\n\n" + "Allowed Action:\n" + " [Place: row,col]\n" + " where row and col are integers in {1,2,3}.\n\n" + "Example valid response:\n" + "I will channel energy into the central node for stability.\n" + "\\boxed{{[Place: 2,2]}}\n\n" + "Invalid example (do not use):\n" + "\\boxed{{[Play: 2,2]}} <-- token must be [Place: ...]\n\n" + "At the end of your message, put your final answer within \\boxed{{}} using one allowed action." + ) + return prompt +``` \ No newline at end of file