From 24ad4e6214f13693a9ae5ceb72b1389fc9b0d8c5 Mon Sep 17 00:00:00 2001 From: admin Date: Mon, 1 Jan 2001 00:00:00 +0000 Subject: [PATCH] Add env.py from Openverse builder --- env.py | 268 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 268 insertions(+) create mode 100644 env.py diff --git a/env.py b/env.py new file mode 100644 index 0000000..7f69be0 --- /dev/null +++ b/env.py @@ -0,0 +1,268 @@ +```python +import re +import random +from typing import Any, Dict, Optional, Tuple, List + +import textarena as ta + + +class GlyphGridDuelEnv(ta.Env): + """ + GlyphGrid Duel: a deterministic two-player abstract logic game. + + Players alternate inscribing glyphs ("X" or "O") on a 3×3 grid. + The first to align three identical glyphs along any row, column, or diagonal wins. + """ + + VALID_ACTION_PATTERN = re.compile(r"^\[Inscribe:(1|2|3),(1|2|3)\]$") + + def __init__(self): + """Initialize reusable attributes.""" + self.state: Optional[ta.TwoPlayerState] = None + + # ------------------------------------------------------------------------- + # Helper: Extract content inside tags + # ------------------------------------------------------------------------- + def _extract_answer_content(self, action: str) -> str: + """ + Extract content from tags. + If tags are missing, fallback to stripping the entire action string. + """ + match = re.search(r"(.*?)", action, re.DOTALL | re.IGNORECASE) + if match: + return match.group(1).strip() + return action.strip() + + # ------------------------------------------------------------------------- + # Reset environment + # ------------------------------------------------------------------------- + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + + Args: + num_players: Number of players (must be 2 for GlyphGrid Duel). + seed: Optional seed for deterministic behavior. + + Notes: + • Must construct a State object and pass game_state and player_prompt_function + • Must call self.state.reset(...) + • Should emit initial observations if helpful + """ + if num_players != 2: + raise ValueError("GlyphGrid Duel requires exactly 2 players.") + + if seed is None: + seed = random.randint(0, 99999) + + # Create a reproducible state manager + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed) + + # Determine which player starts (based on seed parity) + starting_player_id = 0 if seed % 2 == 0 else 1 + starting_player_name = f"Player {starting_player_id + 1}" + + # Construct initial 3x3 board (each cell empty string) + board = [["" for _ in range(3)] for _ in range(3)] + + # Define player info + players = { + "Player 1": {"symbol": "X", "moves_made": 0}, + "Player 2": {"symbol": "O", "moves_made": 0}, + } + + # Build game_state dictionary + game_state: Dict[str, Any] = { + "turn_count": 0, + "current_player": starting_player_name, + "seed": seed, + "board": board, + "players": players, + "winner": None, + "is_terminal": False, + "last_action": None, + "observation_log": [f"{starting_player_name} begins the glyph duel."], + } + + # Initialize internal environment state + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt) + + # Initial observations + self.state.add_observation( + from_id=-1, + message=f"{starting_player_name} begins the glyph duel.", + observation_type=ta.ObservationType.GAME_MESSAGE, + ) + self.state.add_observation( + from_id=-1, + message=self._render_board(board), + observation_type=ta.ObservationType.GAME_BOARD, + ) + + return self.state.game_state + + # ------------------------------------------------------------------------- + # Board and State Helpers + # ------------------------------------------------------------------------- + def _render_board(self, board: List[List[str]]) -> str: + """Format the 3×3 board for display.""" + header = " 1 2 3" + rows = [] + for i, row in enumerate(board): + cells = [cell if cell else "." for cell in row] + rows.append(f"{i+1} " + " | ".join(cells)) + return f"{header}\n" + "\n".join(rows) + + def _check_winner(self, symbol: str, board: List[List[str]]) -> bool: + """Check if the given symbol has three in a row (row, column, diagonal).""" + # Rows and columns + for i in range(3): + if all(board[i][j] == symbol for j in range(3)): + return True + if all(board[j][i] == symbol for j in range(3)): + return True + # Diagonals + if all(board[i][i] == symbol for i in range(3)): + return True + if all(board[i][2 - i] == symbol for i in range(3)): + return True + return False + + def _is_board_full(self, board: List[List[str]]) -> bool: + """Return True if no empty cells remain.""" + return all(cell != "" for row in board for cell in row) + + # ------------------------------------------------------------------------- + # Step Action + # ------------------------------------------------------------------------- + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + + Args: + action: The action text submitted by the current player. + + Returns: + A tuple (done, info) where: + done: True if the episode has concluded + info: A ta.Info object with auxiliary details + """ + player_id = self.state.current_player_id + player_name = f"Player {player_id + 1}" + + # Log player's raw action + self.state.add_observation( + from_id=player_id, + to_id=-1, + message=action, + observation_type=ta.ObservationType.PLAYER_ACTION, + ) + + # Extract meaningful token content + answer_content = self._extract_answer_content(action) + + # Validate action format + if not self.VALID_ACTION_PATTERN.match(answer_content): + self.state.set_invalid_move(reason="Invalid action format. Must match [Inscribe:x,y].") + return self.state.step() + + # Parse coordinates + x_str, y_str = re.findall(r"(1|2|3)", answer_content) + x, y = int(x_str) - 1, int(y_str) - 1 + + # Access current game_state + g = self.state.game_state + board = g["board"] + + # Check if cell already occupied + if board[x][y] != "": + self.state.set_invalid_move(reason="Cell already occupied.") + return self.state.step() + + # Check current player + if g["current_player"] != player_name: + self.state.set_invalid_move(reason="Not your turn.") + return self.state.step() + + # Apply move + symbol = g["players"][player_name]["symbol"] + board[x][y] = symbol + g["players"][player_name]["moves_made"] += 1 + g["turn_count"] += 1 + g["last_action"] = answer_content + g["observation_log"].append(f"{player_name} placed at ({x+1},{y+1})") + + # Add observation for move and board update + self.state.add_observation( + from_id=player_id, + message=f"{player_name} inscribed glyph '{symbol}' at ({x+1},{y+1})", + observation_type=ta.ObservationType.GAME_MESSAGE, + ) + self.state.add_observation( + from_id=-1, + message=self._render_board(board), + observation_type=ta.ObservationType.GAME_BOARD, + ) + + # Check win condition + if self._check_winner(symbol, board): + g["winner"] = player_name + g["is_terminal"] = True + self.state.set_winner(player_id=player_id, reason=f"{player_name} aligned three glyphs and won the duel.") + return self.state.step() + + # Check draw condition + if self._is_board_full(board): + g["winner"] = "Draw" + g["is_terminal"] = True + self.state.set_draw(reason="The grid is full. The duel ends in a draw.") + return self.state.step() + + # Switch turns + next_player_id = 1 - player_id + g["current_player"] = f"Player {next_player_id + 1}" + + # End step (non-terminal) + return self.state.step() + + # ------------------------------------------------------------------------- + # Prompt Generation + # ------------------------------------------------------------------------- + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + """ + Produce role-appropriate instructions for a player at episode start. + + Args: + player_id: The integer ID of the player. + game_state: The shared game state. + + Returns: + A string prompt describing the current state, rules, and expected format. + """ + player_name = f"Player {player_id + 1}" + symbol = game_state["players"][player_name]["symbol"] + board_str = self._render_board(game_state["board"]) + current_turn_name = game_state["current_player"] + + prompt = ( + f"You are {player_name}, bearer of the glyph '{symbol}', in the abstract digital arena.\n" + "Your goal is to align three of your runes (glyphs) in a straight line—row, column, or diagonal—before your opponent does.\n\n" + f"Current arena state:\n{board_str}\n\n" + f"It is currently {current_turn_name}'s turn.\n" + "On your turn, inscribe your glyph in any unoccupied cell.\n\n" + "Action grammar (must be exact): [Inscribe:x,y]\n" + " - x, y ∈ {1, 2, 3}\n" + " - Example: [Inscribe:2,3] inscribes at row 2, column 3.\n\n" + "Formatting rules:\n" + " - Put private reasoning inside .\n" + " - Put your chosen action inside .\n\n" + "Example valid response:\n" + "I will take the center to prepare a diagonal line.\n" + "[Inscribe:2,2]\n\n" + "Example invalid response:\n" + "I'll use a lowercase tag.\n" + "[inscribe:2,2] <-- Invalid keyword\n" + ) + + return prompt +``` \ No newline at end of file