Add env.py from Openverse builder

2001-01-01 00:00:00 +00:00
commit 828a76a7f8
1 changed files with 239 additions and 0 deletions
--- a/env.py
+++ b/env.py
@@ -0,0 +1,239 @@
 ```python
 import re
 from typing import Any, Dict, Tuple, Optional, List
 import textarena as ta
 class CrystalGridEnv(ta.Env):
    """
    Environment implementation for the deterministic 2-player game "Crystal Grid".
    Each player alternately places their mark (S or L) on a 3x3 grid.
    First to align three of their crystals in a row, column, or diagonal wins.
    """
    def __init__(self, max_turns: int = 9):
        self.max_turns = max_turns
        # Precompile regex for valid actions
        self.action_pattern = re.compile(r"^\[Place:\s*([1-3]),\s*([1-3])\]$")
        self.symbols = {0: "S", 1: "L"}
        self.role_mapping = {0: "Solar Architect", 1: "Lunar Architect"}
    # ---------------------------------------------------------------
    # === HELPER FUNCTIONS ===
    # ---------------------------------------------------------------
    def _extract_answer_content(self, action: str) -> str:
        """
        Extract the content inside \\boxed{} markers.
        Falls back to full action if the pattern is missing.
        """
        match = re.search(r'\\boxed\{\{?([^}]*)\}?\}', action, re.DOTALL)
        if match:
            return match.group(1).strip()
        return action.strip()
    def _check_winner(self, symbol: str, grid: List[List[Optional[str]]]) -> bool:
        """Determines whether the given symbol has a winning line on the grid."""
        # Rows
        for row in grid:
            if all(cell == symbol for cell in row):
                return True
        # Columns
        for c in range(3):
            if all(grid[r][c] == symbol for r in range(3)):
                return True
        # Diagonals
        if all(grid[i][i] == symbol for i in range(3)):
            return True
        if all(grid[i][2 - i] == symbol for i in range(3)):
            return True
        return False
    def _get_available_cells(self, grid: List[List[Optional[str]]]) -> List[List[int]]:
        return [
            [r + 1, c + 1]
            for r in range(3)
            for c in range(3)
            if grid[r][c] is None
        ]
    def _render_grid(self, grid: List[List[Optional[str]]]) -> str:
        """Produces a human-readable board representation for prompts/observations."""
        display = []
        display.append("   1   2   3")
        for i, row in enumerate(grid, start=1):
            symbols = [cell if cell is not None else "." for cell in row]
            display.append(f"{i}  " + " | ".join(symbols))
        return "\n".join(display)
    # ---------------------------------------------------------------
    # === CORE API IMPLEMENTATION ===
    # ---------------------------------------------------------------
    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.
        Args:
            num_players: Must be 2 for this environment.
            seed: Optional seed for deterministic initialization.
        Returns:
            None
        """
        if num_players != 2:
            raise ValueError("Crystal Grid requires exactly 2 players.")
        # Initialize two-player state from textarena framework
        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
        # Construct initial game state according to Stage 1 schema
        grid = [[None for _ in range(3)] for _ in range(3)]
        game_state = {
            "turn_count": 0,
            "current_player": "Solar",
            "grid": grid,
            "available_cells": self._get_available_cells(grid),
            "winner": None,
            "is_terminal": False,
            "observations": {
                "Solar": "The Crystal Grid is empty. You are Solar Architect (symbol ‘S’). Your charge begins first.",
                "Lunar": "The Crystal Grid is empty. You are Lunar Architect (symbol ‘L’). Wait for Solar Architect to place first."
            },
            "history": [],
            "seed": seed,
            "score": {
                "Solar": 0,
                "Lunar": 0
            }
        }
        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=self.role_mapping)
        # Add initial game message
        self.state.add_observation(
            message="Welcome to Crystal Grid. The Solar Architect begins the alignment ritual.",
            observation_type=ta.ObservationType.GAME_MESSAGE
        )
        # Visualize starting grid
        board_str = self._render_grid(grid)
        self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
        return None
    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.
        Args:
            action: The action text submitted by the current player (possibly boxed).
        Returns:
            (done, info)
        """
        acting_player = self.state.current_player_id
        player_symbol = self.symbols[acting_player]
        player_role = "Solar" if acting_player == 0 else "Lunar"
        # Log observed action
        self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=acting_player, to_id=-1)
        # Extract boxed content
        extracted = self._extract_answer_content(action)
        # Validate format
        match = self.action_pattern.match(extracted)
        if not match:
            self.state.set_invalid_move(reason="Action format not recognized.")
            return self.state.step()
        # Parse coordinates (convert to 0-index)
        row, col = int(match.group(1)) - 1, int(match.group(2)) - 1
        if not (0 <= row < 3 and 0 <= col < 3):
            self.state.set_invalid_move(reason="Coordinates must be between 1 and 3.")
            return self.state.step()
        current_grid = self.state.game_state["grid"]
        # Check if cell already occupied
        if current_grid[row][col] is not None:
            self.state.set_invalid_move(reason="That node already holds a crystal.")
            return self.state.step()
        # Make placement
        current_grid[row][col] = player_symbol
        self.state.game_state["turn_count"] += 1
        self.state.game_state["available_cells"] = self._get_available_cells(current_grid)
        move_text = f"{player_role} → [Place: {row+1},{col+1}]"
        self.state.game_state["history"].append(move_text)
        # Update observations for both
        self.state.game_state["observations"]["Solar"] = f"Previous move: [Place: {row+1},{col+1}] by {player_role}."
        self.state.game_state["observations"]["Lunar"] = f"Your opponent placed [Place: {row+1},{col+1}]."
        # Add board visualization
        board_str = self._render_grid(current_grid)
        self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
        # Check for win condition
        winner_found = self._check_winner(player_symbol, current_grid)
        if winner_found:
            self.state.game_state["winner"] = player_role
            self.state.game_state["is_terminal"] = True
            # Assign scores
            if player_role == "Solar":
                self.state.game_state["score"]["Solar"] = 1
                self.state.game_state["score"]["Lunar"] = 0
                self.state.set_winner(player_id=0, reason=f"{player_role} formed a stable energy conduit.")
            else:
                self.state.game_state["score"]["Lunar"] = 1
                self.state.game_state["score"]["Solar"] = 0
                self.state.set_winner(player_id=1, reason=f"{player_role} formed a stable energy conduit.")
            return self.state.step()
        # Check for draw condition
        if self.state.game_state["turn_count"] >= 9:
            self.state.game_state["winner"] = "draw"
            self.state.game_state["is_terminal"] = True
            self.state.game_state["score"]["Solar"] = 0.5
            self.state.game_state["score"]["Lunar"] = 0.5
            self.state.set_draw(reason="The grid is full; energy flows evenly—a draw.")
            return self.state.step()
        # No terminal condition reached — rotate to next player
        self.state.game_state["current_player"] = "Lunar" if player_role == "Solar" else "Solar"
        return self.state.step()
    # ---------------------------------------------------------------
    # === PROMPT GENERATION ===
    # ---------------------------------------------------------------
    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        role = self.role_mapping[player_id]
        symbol = self.symbols[player_id]
        board_text = self._render_grid(game_state["grid"])
        prompt = (
            f"You are a mystic architect competing on the Crystal Grid.\n"
            f"Role: {role} (symbol '{symbol}')\n\n"
            "Objective:\n"
            "Align three of your charged crystals in a row, column, or diagonal before your opponent does.\n"
            "Players alternate placing crystals: Solar goes first, then Lunar.\n\n"
            "Current Grid:\n"
            f"{board_text}\n\n"
            "Allowed Action:\n"
            "  [Place: row,col]\n"
            "  where row and col are integers in {1,2,3}.\n\n"
            "Example valid response:\n"
            "I will channel energy into the central node for stability.\n"
            "\\boxed{{[Place: 2,2]}}\n\n"
            "Invalid example (do not use):\n"
            "\\boxed{{[Play: 2,2]}}  <-- token must be [Place: ...]\n\n"
            "At the end of your message, put your final answer within \\boxed{{}} using one allowed action."
        )
        return prompt
 ```