commit 2ce0405988aaacb4b1153f487fb982f7d634a016 Author: Openverse Builder Date: Mon Jan 1 00:00:00 2001 +0000 Add env.py from Openverse builder diff --git a/env.py b/env.py new file mode 100644 index 0000000..d51653b --- /dev/null +++ b/env.py @@ -0,0 +1,240 @@ +```python +import re +import random +from typing import Any, Dict, List, Optional, Tuple + +import textarena as ta + + +class TicTacTrailEnv(ta.Env): + """ + Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game. + + Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid. + The first team to align three of their emblems horizontally, vertically, or diagonally wins. + """ + + def __init__(self, max_turns: int = 9): + self.max_turns = max_turns + # Define regex patterns for allowed actions + self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$") + self.pass_pattern = re.compile(r"^\[Pass\]$") + self.num_players = 2 + + # ---------------------------------------------------------------- + # Helper: Extract boxed content + # ---------------------------------------------------------------- + def _extract_answer_content(self, action: str) -> str: + """ + Extract content from \\boxed{{}}. Returns stripped text. + """ + match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL) + if not match: + # Try single braces fallback (\boxed{}) + match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL) + return match.group(1).strip() if match else action.strip() + + # ---------------------------------------------------------------- + # Helper: Board display utility + # ---------------------------------------------------------------- + def _board_to_str(self, board: List[List[str]]) -> str: + """Convert board to a readable string representation.""" + return "\n".join([" ".join(row) for row in board]) + + # ---------------------------------------------------------------- + # Helper: Compute available (empty) cells + # ---------------------------------------------------------------- + def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]: + moves: List[List[int]] = [] + for r in range(3): + for c in range(3): + if board[r][c] == "_": + moves.append([r, c]) + return moves + + # ---------------------------------------------------------------- + # Helper: Check for winner + # ---------------------------------------------------------------- + def _check_winner(self, board: List[List[str]]) -> Optional[str]: + """Return 'S' or 'M' if a symbol wins, else None.""" + lines = [] + # Rows and cols + for i in range(3): + lines.append(board[i]) + lines.append([board[r][i] for r in range(3)]) + # Diagonals + lines.append([board[i][i] for i in range(3)]) + lines.append([board[i][2 - i] for i in range(3)]) + + for line in lines: + if line[0] != "_" and line.count(line[0]) == 3: + return line[0] + return None + + # ---------------------------------------------------------------- + # Player Prompt Generator + # ---------------------------------------------------------------- + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + """ + Build instructions for a player based on the current board state. + """ + team_name = "Sun" if player_id == 0 else "Moon" + symbol = game_state["player_symbols"][team_name] + board_view = self._board_to_str(game_state["board_state"]) + + prompt = ( + f"You are an explorer representing Team {team_name} " + f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n" + f"Current board state:\n{board_view}\n\n" + "You may take one of the following actions:\n" + " - [Mark:,] to claim an unmarked tile (rows and cols 0–2)\n" + " - [Pass] if no unclaimed tiles remain\n\n" + "Victory condition: Align three of your emblems in a straight line.\n" + "All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n" + "Example valid response:\n" + "I should take the center stone before my rival.\n" + "\\boxed{{[Mark:1,1]}}\n\n" + "Example valid response (no moves left):\n" + "No moves left; I will pass.\n" + "\\boxed{{[Pass]}}\n" + ) + return prompt + + # ---------------------------------------------------------------- + # Reset + # ---------------------------------------------------------------- + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + + Args: + num_players: must be 2 (Sun, Moon) + seed: random seed (stored but unused for determinism) + """ + if num_players != 2: + raise ValueError("Tic-Tac-Trail requires exactly 2 players.") + + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + + if seed is not None: + random.seed(seed) + + empty_board = [["_"] * 3 for _ in range(3)] + + game_state: Dict[str, Any] = { + "seed": seed or 42, + "turn_count": 1, + "current_player": "Sun", + "board_state": empty_board, + "player_symbols": {"Sun": "S", "Moon": "M"}, + "history": [{"player": "System", "message": "The ancient board awaits."}], + "winner": None, + "status": "ongoing", + "available_moves": self._get_available_moves(empty_board), + "scores": {"Sun": 0, "Moon": 0}, + } + + role_mapping = {0: "Sun", 1: "Moon"} + + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping) + + self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1) + self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD) + return self.state + + # ---------------------------------------------------------------- + # Step + # ---------------------------------------------------------------- + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + + Args: + action: The action text submitted by the current player. + + Returns: + A tuple (done, info) + """ + player_id = self.state.current_player_id + role_names = {0: "Sun", 1: "Moon"} + current_team = role_names[player_id] + other_team = role_names[1 - player_id] + + # Log player action + self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1) + + extracted = self._extract_answer_content(action) + + # ---- Validation ---- + if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)): + self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].") + return self.state.step() + + game_state = self.state.game_state + board = game_state["board_state"] + + if self.mark_pattern.match(extracted): + m = self.mark_pattern.match(extracted) + r, c = int(m.group(1)), int(m.group(2)) + if r not in range(3) or c not in range(3): + self.state.set_invalid_move("Row or column index out of range.") + return self.state.step() + if board[r][c] != "_": + self.state.set_invalid_move("Chosen cell already occupied.") + return self.state.step() + + # Apply the move + board[r][c] = game_state["player_symbols"][current_team] + game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."}) + else: + # [Pass] + available = self._get_available_moves(board) + if len(available) > 0: + self.state.set_invalid_move("Cannot pass while moves still available.") + return self.state.step() + game_state["history"].append({"player": current_team, "message": "Passed."}) + + # Update game_state + game_state["available_moves"] = self._get_available_moves(board) + + # ---- Check terminal conditions ---- + symbol_winner = self._check_winner(board) + if symbol_winner: + winning_team = "Sun" if symbol_winner == "S" else "Moon" + game_state["winner"] = winning_team + game_state["status"] = "finished" + game_state["scores"][winning_team] = 1 + game_state["scores"][other_team] = 0 + self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!") + return self.state.step() + + if not game_state["available_moves"]: + game_state["winner"] = None + game_state["status"] = "draw" + game_state["scores"]["Sun"] = 0.5 + game_state["scores"]["Moon"] = 0.5 + self.state.set_draw(reason="All tiles filled without a winning alignment.") + return self.state.step() + + # If ongoing + game_state["turn_count"] += 1 + game_state["current_player"] = other_team + game_state["status"] = "ongoing" + self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD) + + return self.state.step() + + # ---------------------------------------------------------------- + # Observation Retrieval + # ---------------------------------------------------------------- + def get_observation(self) -> Tuple[int, List]: + """Return (player_id, observation_list) for current player.""" + return (self.state.current_player_id, self.state.observations) + + # ---------------------------------------------------------------- + # Close + # ---------------------------------------------------------------- + def close(self) -> Tuple[Dict, Dict]: + """Return final rewards and game info.""" + return self.state.rewards, self.state.game_info +``` \ No newline at end of file