From 1e40154fa037522dd58d9f28be8834da3b23d2f5 Mon Sep 17 00:00:00 2001 From: Openverse Builder Date: Mon, 1 Jan 2001 00:00:00 +0000 Subject: [PATCH] Add env.py from Openverse builder --- env.py | 233 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 233 insertions(+) create mode 100644 env.py diff --git a/env.py b/env.py new file mode 100644 index 0000000..56c4e13 --- /dev/null +++ b/env.py @@ -0,0 +1,233 @@ +```python +import re +from typing import Any, Dict, Optional, Tuple, List + +import textarena as ta + + +class StarGridDuelEnv(ta.Env): + """ + Implementation of the 'StarGrid Duel' game environment. + Deterministic two-player strategy game where navigators place energy beacons + on a 3x3 grid aiming to align three of their own beacons in a row, column, or diagonal. + """ + + def __init__(self, max_turns: int = 9): + self.max_turns = max_turns + # Compile regex patterns once + self.place_pattern = re.compile(r"^\[Place:\s*(A|B|C)(1|2|3)\]$") + # Cell labels in order + self.all_cells = [f"{r}{c}" for r in "ABC" for c in "123"] + + # ------------------------ Helper Methods ------------------------ + + def _extract_answer_content(self, action: str) -> str: + """ + Extract content inside \boxed{} for machine parsing. + Falls back to entire content (trimmed) if no match. + """ + match = re.search(r"\\boxed\{\{([^}]*)\}\}", action) + if not match: # Also support single braces in case formatting differs + match = re.search(r"\\boxed\{([^}]*)\}", action) + if match: + return match.group(1).strip() + return action.strip() + + def _check_victory(self, board: Dict[str, Optional[str]], color: str) -> bool: + """Check all 8 winning line combinations for the specified color.""" + lines = [ + ["A1", "A2", "A3"], + ["B1", "B2", "B3"], + ["C1", "C2", "C3"], + ["A1", "B1", "C1"], + ["A2", "B2", "C2"], + ["A3", "B3", "C3"], + ["A1", "B2", "C3"], + ["A3", "B2", "C1"], + ] + for line in lines: + if all(board[cell] == color for cell in line): + return True + return False + + def _generate_board_str(self, board: Dict[str, Optional[str]]) -> str: + """Render the 3x3 StarGrid as a simple text table.""" + rows = [] + for r in "ABC": + row_cells = [] + for c in "123": + val = board[f"{r}{c}"] + if val is None: + row_cells.append(f"{r}{c}") + else: + symbol = "B" if val == "Blue" else "C" + row_cells.append(symbol) + rows.append(" | ".join(row_cells)) + return "\n".join(rows) + + def _get_active_player_label(self, player_id: int) -> str: + return "Navigator Alpha" if player_id == 0 else "Navigator Beta" + + def _cell_valid(self, cell: str) -> bool: + return cell in self.all_cells + + # ------------------------ Core Env API ------------------------ + + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + + Args: + num_players: Number of players in the game. Must be 2. + seed: Optional seed for determinism. + + Returns: + None (or self.state for compatibility) + """ + if num_players != 2: + raise ValueError("StarGrid Duel requires exactly 2 players.") + + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + + empty_board = {cell: None for cell in self.all_cells} + game_state: Dict[str, Any] = { + "turn_index": 0, + "active_player": "A", + "board": empty_board, + "player_symbols": {"A": "Blue", "B": "Crimson"}, + "move_history": [], + "winner": None, + "is_draw": False, + "observations": {"A": "", "B": ""}, + "seed": seed, + } + + role_mapping = {0: "Navigator Alpha", 1: "Navigator Beta"} + + # Initialize internal game state + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping) + + # Onboarding observations + onboarding_msg = ( + "Welcome to StarGrid Duel!\n" + "Two navigators will alternately place energy beacons on the 3×3 StarGrid.\n" + "Your mission is to align three of your beacons in a line before your rival." + ) + self.state.add_observation(onboarding_msg, ta.ObservationType.GAME_MESSAGE) + + board_msg = self._generate_board_str(empty_board) + self.state.add_observation(board_msg, ta.ObservationType.GAME_BOARD) + + return self.state + + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + + Args: + action: The action text submitted by the current player. + + Returns: + A tuple (done, info) where: + done: True if the episode has concluded + info: A ta.Info object with auxiliary details + """ + player_id = self.state.current_player_id + player_key = "A" if player_id == 0 else "B" + player_color = self.state.game_state["player_symbols"][player_key] + + # 1. Log the raw player action + self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1) + + # 2. Extract the boxed content + extracted = self._extract_answer_content(action) + + # 3. Validate the action pattern + match = self.place_pattern.match(extracted) + if not match: + self.state.set_invalid_move(reason="MalformedAction: The action must strictly follow '[Place: ]' format.") + return self.state.step() + + cell_id = f"{match.group(1)}{match.group(2)}" + + if not self._cell_valid(cell_id): + self.state.set_invalid_move(reason=f"CellOutOfRange: {cell_id} is not a valid StarGrid coordinate.") + return self.state.step() + + board = self.state.game_state["board"] + if board[cell_id] is not None: + self.state.set_invalid_move(reason=f"CellOccupied: {cell_id} is already occupied.") + return self.state.step() + + # 4. Execute valid action: place beacon + board[cell_id] = player_color + self.state.game_state["board"] = board + + # Record move + self.state.game_state["move_history"].append( + {"player": player_key, "action": extracted} + ) + + # Increment turn index and rotate active player (unless terminal) + self.state.game_state["turn_index"] += 1 + + # 5. Check for victory + if self._check_victory(board, player_color): + self.state.game_state["winner"] = player_key + winner_str = self._get_active_player_label(player_id) + self.state.set_winner(player_id=player_id, reason=f"{winner_str} aligned three energy beacons in a line.") + board_str = self._generate_board_str(board) + self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) + return self.state.step() + + # 6. Check for draw (grid filled, no winner) + if all(v is not None for v in board.values()): + self.state.game_state["is_draw"] = True + self.state.set_draw(reason="All cells filled with no aligned beacons—a balanced standoff.") + board_str = self._generate_board_str(board) + self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) + return self.state.step() + + # 7. Update board observation for next player + board_str = self._generate_board_str(board) + self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD) + + # 8. Rotate turn + self.state.game_state["active_player"] = "B" if player_key == "A" else "A" + + # Proceed to next step + return self.state.step() + + # -------------------- Player Prompt Generation -------------------- + + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + """ + Generate a detailed prompt for each player at the start or on every turn. + """ + role = "Navigator Alpha" if player_id == 0 else "Navigator Beta" + color = game_state["player_symbols"]["A" if player_id == 0 else "B"] + active_pid = game_state["active_player"] + active_label = "Navigator Alpha" if active_pid == "A" else "Navigator Beta" + + board_repr = self._generate_board_str(game_state["board"]) + open_cells = [cell for cell, val in game_state["board"].items() if val is None] + allowed_actions = [f"[Place: {cell}]" for cell in open_cells] + + prompt = ( + f"You are {role}, commanding the {color} energy.\n" + "Your goal: deploy energy beacons across the StarGrid to align three of your own in a straight line before your opponent.\n\n" + f"Current Board:\n{board_repr}\n\n" + f"Your Color: {color}\nActive Navigator: {active_label}\n\n" + f"Allowed Actions:\nFormat: [Place: ]\nAvailable cells: {', '.join(open_cells) if open_cells else 'None'}\n\n" + "Response Format:\n" + "You may describe your reasoning, then finalize your move as:\n\n" + "Example valid response:\n" + "I will claim the center of the grid to control diagonals.\n" + "\\boxed{{[Place: B2]}}\n\n" + "Example invalid response:\n" + "I think I'll move now.\n" + "\\boxed{{[Move: B2]}}\n\n" + "Put your final answer within \\boxed{{}} at the end of your response." + ) + return prompt +``` \ No newline at end of file