commit b091040a1970584ba8abea1d8fcd2d0be781d903 Author: Openverse Builder Date: Mon Jan 1 00:00:00 2001 +0000 Add env.py from Openverse builder diff --git a/env.py b/env.py new file mode 100644 index 0000000..bab68f3 --- /dev/null +++ b/env.py @@ -0,0 +1,236 @@ +```python +import re +import random +from typing import Any, Dict, Optional, Tuple, List + +import textarena as ta + + +class GlyphGridDuelEnv(ta.Env): + """ + GlyphGrid Duel Environment + Implements the deterministic, turn-based game designed in Stage 1. + """ + + def __init__(self, max_turns: int = 9): + self.max_turns = max_turns + self.action_pattern = re.compile(r"^\[Etch:\s*([1-3]),\s*([1-3])\]$") + self.player_roles = {0: "Solar", 1: "Lunar"} + self.player_symbols = {"Solar": "S", "Lunar": "L"} + + # ------------------------------------------------------------------------- + # Helper Methods + # ------------------------------------------------------------------------- + def _extract_answer_content(self, action: str) -> str: + """Extract content between \boxed{{...}} from a player's response.""" + # Double braces escaped pattern + match = re.search(r'\\boxed\{\{(.*?)\}\}', action, re.DOTALL) + if match: + return match.group(1).strip() + # fallback single brace just in case + match = re.search(r'\\boxed\{(.*?)\}', action, re.DOTALL) + if match: + return match.group(1).strip() + return action.strip() + + def _empty_runeboard(self) -> List[List[str]]: + """Create an empty 3x3 runeboard.""" + return [["_"] * 3 for _ in range(3)] + + def _render_runeboard(self, runeboard: List[List[str]]) -> str: + """Return a string representation of the current runeboard.""" + board_lines = [] + for row in runeboard: + board_lines.append(" ".join(row)) + return "\n".join(board_lines) + + def _check_winner(self, runeboard: List[List[str]], symbol: str) -> bool: + """Return True if the provided symbol has aligned three glyphs.""" + # rows + for r in range(3): + if all(runeboard[r][c] == symbol for c in range(3)): + return True + # cols + for c in range(3): + if all(runeboard[r][c] == symbol for r in range(3)): + return True + # diagonals + if all(runeboard[i][i] == symbol for i in range(3)): + return True + if all(runeboard[i][2 - i] == symbol for i in range(3)): + return True + return False + + # ------------------------------------------------------------------------- + # Game Lifecycle + # ------------------------------------------------------------------------- + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + + Args: + num_players: Number of players in the game (must be 2). + seed: Optional seed for deterministic behavior. + """ + if num_players != 2: + raise ValueError("GlyphGrid Duel requires exactly 2 players.") + + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + rng_seed = seed if seed is not None else random.randint(0, 10000) + + game_state: Dict[str, Any] = { + "runeboard": self._empty_runeboard(), + "current_player": "Solar", + "turn_count": 0, + "winner": None, + "is_terminal": False, + "last_action": None, + "observations": {"Solar": [], "Lunar": []}, + "player_symbols": self.player_symbols, + "seed": rng_seed, + } + + self.state.reset( + game_state=game_state, + player_prompt_function=self._generate_player_prompt, + role_mapping=self.player_roles, + ) + + # initial observation to all players + init_message = ( + "The Runeboard is empty. Each Scribe may etch a glyph using [Etch: row, col]." + ) + self.state.add_observation( + init_message, ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1 + ) + board_str = self._render_runeboard(game_state["runeboard"]) + self.state.add_observation( + board_str, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1 + ) + + return self.state + + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + """Generate turn prompt for each player.""" + player_role = self.player_roles[player_id] + player_symbol = game_state["player_symbols"][player_role] + runeboard_str = self._render_runeboard(game_state["runeboard"]) + empties = [ + f"[Etch: {r+1}, {c+1}]" + for r in range(3) + for c in range(3) + if game_state["runeboard"][r][c] == "_" + ] + empties_str = ", ".join(empties) + + prompt = ( + f"You are a Scribe competing to master the Runeboard through glyph alignment.\n" + f"Role: Scribe {player_role} ({player_symbol})\n\n" + f"Rules Summary:\n" + f"- Each player alternately etches one glyph per turn.\n" + f"- Wins occur when three identical glyphs align (row, column, or diagonal).\n" + f"- If all nine cells are filled without alignment, it’s a draw.\n\n" + f"Current Runeboard:\n{runeboard_str}\n\n" + f"Empty Cells where you can etch:\n{empties_str}\n\n" + f"Action Format:\n" + f"Use [Etch: row, column] with row and column in 1–3.\n" + f"Put your final answer within \\boxed{{}} at the end of your response.\n\n" + f"Example valid response:\n" + f"I will etch at the top right corner.\n" + f"\\boxed{{{{[Etch: 1, 3]}}}}\n\n" + f"Example invalid response:\n" + f"\\boxed{{{{[Mark: 1, 3]}}}} # Reason: 'Mark' is not a valid action.\n" + ) + return prompt + + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + + Args: + action: The action text submitted by the current player. + + Returns: + (done, info) + """ + current_id = self.state.current_player_id + current_role = self.player_roles[current_id] + opponent_role = self.player_roles[1 - current_id] + board = self.state.game_state["runeboard"] + + # Record player's raw action + self.state.add_observation( + action, + ta.ObservationType.PLAYER_ACTION, + from_id=current_id, + to_id=-1, + ) + + # Extract content inside boxed + action_content = self._extract_answer_content(action) + + # Validate + if self.state.game_state["is_terminal"]: + self.state.set_invalid_move("Game already ended.") + return self.state.step() + + match = self.action_pattern.match(action_content) + if not match: + self.state.set_invalid_move( + "Invalid format: must be [Etch: row, column] with row,col in 1–3." + ) + return self.state.step() + + try: + row, col = int(match.group(1)) - 1, int(match.group(2)) - 1 + except (ValueError, IndexError): + self.state.set_invalid_move( + "Out of bounds: coordinates must be between 1 and 3." + ) + return self.state.step() + + if row not in range(3) or col not in range(3): + self.state.set_invalid_move( + "Out of bounds: coordinates must be between 1 and 3." + ) + return self.state.step() + + if board[row][col] != "_": + self.state.set_invalid_move("Cell already occupied.") + return self.state.step() + + # Apply action + symbol = self.player_symbols[current_role] + board[row][col] = symbol + self.state.game_state["last_action"] = action_content + self.state.game_state["turn_count"] += 1 + + # Announce move + move_msg = f"{current_role} etched a {symbol} glyph at ({row+1},{col+1})." + self.state.add_observation(move_msg, ta.ObservationType.GAME_MESSAGE) + + # Show updated board + board_render = self._render_runeboard(board) + self.state.add_observation( + board_render, ta.ObservationType.GAME_BOARD, from_id=-1, to_id=-1 + ) + + # Check win condition + if self._check_winner(board, symbol): + self.state.game_state["winner"] = current_role + self.state.game_state["is_terminal"] = True + self.state.set_winner( + player_id=current_id, reason=f"{current_role} formed a line of glyphs." + ) + return self.state.step() + + # Check draw condition + if self.state.game_state["turn_count"] >= 9: + self.state.game_state["is_terminal"] = True + self.state.set_draw("Runeboard is full with no alignment. Draw.") + return self.state.step() + + # Switch player + self.state.game_state["current_player"] = opponent_role + return self.state.step() +``` \ No newline at end of file