Add env.py from Openverse builder

2001-01-01 00:00:00 +00:00
commit b091040a19
1 changed files with 236 additions and 0 deletions
--- a/env.py
+++ b/env.py
@@ -0,0 +1,236 @@
 ```python
 import re
 import random
 from typing import Any, Dict, Optional, Tuple, List
 import textarena as ta
 class GlyphGridDuelEnv(ta.Env):
    """
    GlyphGrid Duel Environment
    Implements the deterministic, turn-based game designed in Stage 1.
    """
    def __init__(self, max_turns: int = 9):
        self.max_turns = max_turns
        self.action_pattern = re.compile(r"^\[Etch:\s*([1-3]),\s*([1-3])\]$")
        self.player_roles = {0: "Solar", 1: "Lunar"}
        self.player_symbols = {"Solar": "S", "Lunar": "L"}
    # -------------------------------------------------------------------------
    # Helper Methods
    # -------------------------------------------------------------------------
    def _extract_answer_content(self, action: str) -> str:
        """Extract content between \boxed{{...}} from a player's response."""
        # Double braces escaped pattern
        match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL)
        if match:
            return match.group(1).strip()
        # fallback single brace just in case
        match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL)
        if match:
            return match.group(1).strip()
        return action.strip()
    def _empty_runeboard(self) -> List[List[str]]:
        """Create an empty 3x3 runeboard."""
        return [["_"] * 3 for _ in range(3)]
    def _render_runeboard(self, runeboard: List[List[str]]) -> str:
        """Return a string representation of the current runeboard."""
        board_lines = []
        for row in runeboard:
            board_lines.append(" ".join(row))
        return "\n".join(board_lines)
    def _check_winner(self, runeboard: List[List[str]], symbol: str) -> bool:
        """Return True if the provided symbol has aligned three glyphs."""
        # rows
        for r in range(3):
            if all(runeboard[r][c] == symbol for c in range(3)):
                return True
        # cols
        for c in range(3):
            if all(runeboard[r][c] == symbol for r in range(3)):
                return True
        # diagonals
        if all(runeboard[i][i] == symbol for i in range(3)):
            return True
        if all(runeboard[i][2 - i] == symbol for i in range(3)):
            return True
        return False
    # -------------------------------------------------------------------------
    # Game Lifecycle
    # -------------------------------------------------------------------------
    def reset(self, num_players: int, seed: Optional[int] = None):
        """
        Resets the environment to an initial state.
        Args:
            num_players: Number of players in the game (must be 2).
            seed: Optional seed for deterministic behavior.
        """
        if num_players != 2:
            raise ValueError("GlyphGrid Duel requires exactly 2 players.")
        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
        rng_seed = seed if seed is not None else random.randint(0, 10000)
        game_state: Dict[str, Any] = {
            "runeboard": self._empty_runeboard(),
            "current_player": "Solar",
            "turn_count": 0,
            "winner": None,
            "is_terminal": False,
            "last_action": None,
            "observations": {"Solar": [], "Lunar": []},
            "player_symbols": self.player_symbols,
            "seed": rng_seed,
        }
        self.state.reset(
            game_state=game_state,
            player_prompt_function=self._generate_player_prompt,
            role_mapping=self.player_roles,
        )
        # initial observation to all players
        init_message = (
            "The Runeboard is empty. Each Scribe may etch a glyph using [Etch: row, col]."
        )
        self.state.add_observation(
            init_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1
        )
        board_str = self._render_runeboard(game_state["runeboard"])
        self.state.add_observation(
            board_str, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1
        )
        return self.state
    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
        """Generate turn prompt for each player."""
        player_role = self.player_roles[player_id]
        player_symbol = game_state["player_symbols"][player_role]
        runeboard_str = self._render_runeboard(game_state["runeboard"])
        empties = [
            f"[Etch: {r+1}, {c+1}]"
            for r in range(3)
            for c in range(3)
            if game_state["runeboard"][r][c] == "_"
        ]
        empties_str = ", ".join(empties)
        prompt = (
            f"You are a Scribe competing to master the Runeboard through glyph alignment.\n"
            f"Role: Scribe {player_role} ({player_symbol})\n\n"
            f"Rules Summary:\n"
            f"- Each player alternately etches one glyph per turn.\n"
            f"- Wins occur when three identical glyphs align (row, column, or diagonal).\n"
            f"- If all nine cells are filled without alignment, it’s a draw.\n\n"
            f"Current Runeboard:\n{runeboard_str}\n\n"
            f"Empty Cells where you can etch:\n{empties_str}\n\n"
            f"Action Format:\n"
            f"Use [Etch: row, column] with row and column in 1–3.\n"
            f"Put your final answer within \\boxed{{}} at the end of your response.\n\n"
            f"Example valid response:\n"
            f"I will etch at the top right corner.\n"
            f"\\boxed{{{{[Etch: 1, 3]}}}}\n\n"
            f"Example invalid response:\n"
            f"\\boxed{{{{[Mark: 1, 3]}}}}   # Reason: 'Mark' is not a valid action.\n"
        )
        return prompt
    def step(self, action: str) -> Tuple[bool, ta.Info]:
        """
        Perform a single environment step for the current player.
        Args:
            action: The action text submitted by the current player.
        Returns:
            (done, info)
        """
        current_id = self.state.current_player_id
        current_role = self.player_roles[current_id]
        opponent_role = self.player_roles[1 - current_id]
        board = self.state.game_state["runeboard"]
        # Record player's raw action
        self.state.add_observation(
            action,
            ta.ObservationType.PLAYER_ACTION,
            from_id=current_id,
            to_id=-1,
        )
        # Extract content inside boxed
        action_content = self._extract_answer_content(action)
        # Validate
        if self.state.game_state["is_terminal"]:
            self.state.set_invalid_move("Game already ended.")
            return self.state.step()
        match = self.action_pattern.match(action_content)
        if not match:
            self.state.set_invalid_move(
                "Invalid format: must be [Etch: row, column] with row,col in 1–3."
            )
            return self.state.step()
        try:
            row, col = int(match.group(1)) - 1, int(match.group(2)) - 1
        except (ValueError, IndexError):
            self.state.set_invalid_move(
                "Out of bounds: coordinates must be between 1 and 3."
            )
            return self.state.step()
        if row not in range(3) or col not in range(3):
            self.state.set_invalid_move(
                "Out of bounds: coordinates must be between 1 and 3."
            )
            return self.state.step()
        if board[row][col] != "_":
            self.state.set_invalid_move("Cell already occupied.")
            return self.state.step()
        # Apply action
        symbol = self.player_symbols[current_role]
        board[row][col] = symbol
        self.state.game_state["last_action"] = action_content
        self.state.game_state["turn_count"] += 1
        # Announce move
        move_msg = f"{current_role} etched a {symbol} glyph at ({row+1},{col+1})."
        self.state.add_observation(move_msg, ta.ObservationType.GAME_MESSAGE)
        # Show updated board
        board_render = self._render_runeboard(board)
        self.state.add_observation(
            board_render, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1
        )
        # Check win condition
        if self._check_winner(board, symbol):
            self.state.game_state["winner"] = current_role
            self.state.game_state["is_terminal"] = True
            self.state.set_winner(
                player_id=current_id, reason=f"{current_role} formed a line of glyphs."
            )
            return self.state.step()
        # Check draw condition
        if self.state.game_state["turn_count"] >= 9:
            self.state.game_state["is_terminal"] = True
            self.state.set_draw("Runeboard is full with no alignment. Draw.")
            return self.state.step()
        # Switch player
        self.state.game_state["current_player"] = opponent_role
        return self.state.step()
 ```