Add env.py from Openverse builder

This commit is contained in:
Openverse Builder
2001-01-01 00:00:00 +00:00
commit 2ce0405988

240
env.py Normal file
View File

@@ -0,0 +1,240 @@
```python
import re
import random
from typing import Any, Dict, List, Optional, Tuple
import textarena as ta
class TicTacTrailEnv(ta.Env):
"""
Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game.
Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid.
The first team to align three of their emblems horizontally, vertically, or diagonally wins.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
# Define regex patterns for allowed actions
self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$")
self.pass_pattern = re.compile(r"^\[Pass\]$")
self.num_players = 2
# ----------------------------------------------------------------
# Helper: Extract boxed content
# ----------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract content from \\boxed{{}}. Returns stripped text.
"""
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
if not match:
# Try single braces fallback (\boxed{})
match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
return match.group(1).strip() if match else action.strip()
# ----------------------------------------------------------------
# Helper: Board display utility
# ----------------------------------------------------------------
def _board_to_str(self, board: List[List[str]]) -> str:
"""Convert board to a readable string representation."""
return "\n".join([" ".join(row) for row in board])
# ----------------------------------------------------------------
# Helper: Compute available (empty) cells
# ----------------------------------------------------------------
def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]:
moves: List[List[int]] = []
for r in range(3):
for c in range(3):
if board[r][c] == "_":
moves.append([r, c])
return moves
# ----------------------------------------------------------------
# Helper: Check for winner
# ----------------------------------------------------------------
def _check_winner(self, board: List[List[str]]) -> Optional[str]:
"""Return 'S' or 'M' if a symbol wins, else None."""
lines = []
# Rows and cols
for i in range(3):
lines.append(board[i])
lines.append([board[r][i] for r in range(3)])
# Diagonals
lines.append([board[i][i] for i in range(3)])
lines.append([board[i][2 - i] for i in range(3)])
for line in lines:
if line[0] != "_" and line.count(line[0]) == 3:
return line[0]
return None
# ----------------------------------------------------------------
# Player Prompt Generator
# ----------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Build instructions for a player based on the current board state.
"""
team_name = "Sun" if player_id == 0 else "Moon"
symbol = game_state["player_symbols"][team_name]
board_view = self._board_to_str(game_state["board_state"])
prompt = (
f"You are an explorer representing Team {team_name} "
f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n"
f"Current board state:\n{board_view}\n\n"
"You may take one of the following actions:\n"
" - [Mark:<row>,<col>] to claim an unmarked tile (rows and cols 02)\n"
" - [Pass] if no unclaimed tiles remain\n\n"
"Victory condition: Align three of your emblems in a straight line.\n"
"All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n"
"Example valid response:\n"
"I should take the center stone before my rival.\n"
"\\boxed{{[Mark:1,1]}}\n\n"
"Example valid response (no moves left):\n"
"No moves left; I will pass.\n"
"\\boxed{{[Pass]}}\n"
)
return prompt
# ----------------------------------------------------------------
# Reset
# ----------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: must be 2 (Sun, Moon)
seed: random seed (stored but unused for determinism)
"""
if num_players != 2:
raise ValueError("Tic-Tac-Trail requires exactly 2 players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
if seed is not None:
random.seed(seed)
empty_board = [["_"] * 3 for _ in range(3)]
game_state: Dict[str, Any] = {
"seed": seed or 42,
"turn_count": 1,
"current_player": "Sun",
"board_state": empty_board,
"player_symbols": {"Sun": "S", "Moon": "M"},
"history": [{"player": "System", "message": "The ancient board awaits."}],
"winner": None,
"status": "ongoing",
"available_moves": self._get_available_moves(empty_board),
"scores": {"Sun": 0, "Moon": 0},
}
role_mapping = {0: "Sun", 1: "Moon"}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1)
self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD)
return self.state
# ----------------------------------------------------------------
# Step
# ----------------------------------------------------------------
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player.
Returns:
A tuple (done, info)
"""
player_id = self.state.current_player_id
role_names = {0: "Sun", 1: "Moon"}
current_team = role_names[player_id]
other_team = role_names[1 - player_id]
# Log player action
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
extracted = self._extract_answer_content(action)
# ---- Validation ----
if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)):
self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].")
return self.state.step()
game_state = self.state.game_state
board = game_state["board_state"]
if self.mark_pattern.match(extracted):
m = self.mark_pattern.match(extracted)
r, c = int(m.group(1)), int(m.group(2))
if r not in range(3) or c not in range(3):
self.state.set_invalid_move("Row or column index out of range.")
return self.state.step()
if board[r][c] != "_":
self.state.set_invalid_move("Chosen cell already occupied.")
return self.state.step()
# Apply the move
board[r][c] = game_state["player_symbols"][current_team]
game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."})
else:
# [Pass]
available = self._get_available_moves(board)
if len(available) > 0:
self.state.set_invalid_move("Cannot pass while moves still available.")
return self.state.step()
game_state["history"].append({"player": current_team, "message": "Passed."})
# Update game_state
game_state["available_moves"] = self._get_available_moves(board)
# ---- Check terminal conditions ----
symbol_winner = self._check_winner(board)
if symbol_winner:
winning_team = "Sun" if symbol_winner == "S" else "Moon"
game_state["winner"] = winning_team
game_state["status"] = "finished"
game_state["scores"][winning_team] = 1
game_state["scores"][other_team] = 0
self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!")
return self.state.step()
if not game_state["available_moves"]:
game_state["winner"] = None
game_state["status"] = "draw"
game_state["scores"]["Sun"] = 0.5
game_state["scores"]["Moon"] = 0.5
self.state.set_draw(reason="All tiles filled without a winning alignment.")
return self.state.step()
# If ongoing
game_state["turn_count"] += 1
game_state["current_player"] = other_team
game_state["status"] = "ongoing"
self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD)
return self.state.step()
# ----------------------------------------------------------------
# Observation Retrieval
# ----------------------------------------------------------------
def get_observation(self) -> Tuple[int, List]:
"""Return (player_id, observation_list) for current player."""
return (self.state.current_player_id, self.state.observations)
# ----------------------------------------------------------------
# Close
# ----------------------------------------------------------------
def close(self) -> Tuple[Dict, Dict]:
"""Return final rewards and game info."""
return self.state.rewards, self.state.game_info
```