Add env.py from Openverse builder
This commit is contained in:
240
env.py
Normal file
240
env.py
Normal file
@@ -0,0 +1,240 @@
|
||||
```python
|
||||
import re
|
||||
import random
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import textarena as ta
|
||||
|
||||
|
||||
class TicTacTrailEnv(ta.Env):
|
||||
"""
|
||||
Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game.
|
||||
|
||||
Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid.
|
||||
The first team to align three of their emblems horizontally, vertically, or diagonally wins.
|
||||
"""
|
||||
|
||||
def __init__(self, max_turns: int = 9):
|
||||
self.max_turns = max_turns
|
||||
# Define regex patterns for allowed actions
|
||||
self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$")
|
||||
self.pass_pattern = re.compile(r"^\[Pass\]$")
|
||||
self.num_players = 2
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Helper: Extract boxed content
|
||||
# ----------------------------------------------------------------
|
||||
def _extract_answer_content(self, action: str) -> str:
|
||||
"""
|
||||
Extract content from \\boxed{{}}. Returns stripped text.
|
||||
"""
|
||||
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
|
||||
if not match:
|
||||
# Try single braces fallback (\boxed{})
|
||||
match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
|
||||
return match.group(1).strip() if match else action.strip()
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Helper: Board display utility
|
||||
# ----------------------------------------------------------------
|
||||
def _board_to_str(self, board: List[List[str]]) -> str:
|
||||
"""Convert board to a readable string representation."""
|
||||
return "\n".join([" ".join(row) for row in board])
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Helper: Compute available (empty) cells
|
||||
# ----------------------------------------------------------------
|
||||
def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]:
|
||||
moves: List[List[int]] = []
|
||||
for r in range(3):
|
||||
for c in range(3):
|
||||
if board[r][c] == "_":
|
||||
moves.append([r, c])
|
||||
return moves
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Helper: Check for winner
|
||||
# ----------------------------------------------------------------
|
||||
def _check_winner(self, board: List[List[str]]) -> Optional[str]:
|
||||
"""Return 'S' or 'M' if a symbol wins, else None."""
|
||||
lines = []
|
||||
# Rows and cols
|
||||
for i in range(3):
|
||||
lines.append(board[i])
|
||||
lines.append([board[r][i] for r in range(3)])
|
||||
# Diagonals
|
||||
lines.append([board[i][i] for i in range(3)])
|
||||
lines.append([board[i][2 - i] for i in range(3)])
|
||||
|
||||
for line in lines:
|
||||
if line[0] != "_" and line.count(line[0]) == 3:
|
||||
return line[0]
|
||||
return None
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Player Prompt Generator
|
||||
# ----------------------------------------------------------------
|
||||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Build instructions for a player based on the current board state.
|
||||
"""
|
||||
team_name = "Sun" if player_id == 0 else "Moon"
|
||||
symbol = game_state["player_symbols"][team_name]
|
||||
board_view = self._board_to_str(game_state["board_state"])
|
||||
|
||||
prompt = (
|
||||
f"You are an explorer representing Team {team_name} "
|
||||
f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n"
|
||||
f"Current board state:\n{board_view}\n\n"
|
||||
"You may take one of the following actions:\n"
|
||||
" - [Mark:<row>,<col>] to claim an unmarked tile (rows and cols 0–2)\n"
|
||||
" - [Pass] if no unclaimed tiles remain\n\n"
|
||||
"Victory condition: Align three of your emblems in a straight line.\n"
|
||||
"All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n"
|
||||
"Example valid response:\n"
|
||||
"I should take the center stone before my rival.\n"
|
||||
"\\boxed{{[Mark:1,1]}}\n\n"
|
||||
"Example valid response (no moves left):\n"
|
||||
"No moves left; I will pass.\n"
|
||||
"\\boxed{{[Pass]}}\n"
|
||||
)
|
||||
return prompt
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Reset
|
||||
# ----------------------------------------------------------------
|
||||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||
"""
|
||||
Resets the environment to an initial state.
|
||||
|
||||
Args:
|
||||
num_players: must be 2 (Sun, Moon)
|
||||
seed: random seed (stored but unused for determinism)
|
||||
"""
|
||||
if num_players != 2:
|
||||
raise ValueError("Tic-Tac-Trail requires exactly 2 players.")
|
||||
|
||||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||||
|
||||
if seed is not None:
|
||||
random.seed(seed)
|
||||
|
||||
empty_board = [["_"] * 3 for _ in range(3)]
|
||||
|
||||
game_state: Dict[str, Any] = {
|
||||
"seed": seed or 42,
|
||||
"turn_count": 1,
|
||||
"current_player": "Sun",
|
||||
"board_state": empty_board,
|
||||
"player_symbols": {"Sun": "S", "Moon": "M"},
|
||||
"history": [{"player": "System", "message": "The ancient board awaits."}],
|
||||
"winner": None,
|
||||
"status": "ongoing",
|
||||
"available_moves": self._get_available_moves(empty_board),
|
||||
"scores": {"Sun": 0, "Moon": 0},
|
||||
}
|
||||
|
||||
role_mapping = {0: "Sun", 1: "Moon"}
|
||||
|
||||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
||||
|
||||
self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1)
|
||||
self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD)
|
||||
return self.state
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Step
|
||||
# ----------------------------------------------------------------
|
||||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||
"""
|
||||
Perform a single environment step for the current player.
|
||||
|
||||
Args:
|
||||
action: The action text submitted by the current player.
|
||||
|
||||
Returns:
|
||||
A tuple (done, info)
|
||||
"""
|
||||
player_id = self.state.current_player_id
|
||||
role_names = {0: "Sun", 1: "Moon"}
|
||||
current_team = role_names[player_id]
|
||||
other_team = role_names[1 - player_id]
|
||||
|
||||
# Log player action
|
||||
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
||||
|
||||
extracted = self._extract_answer_content(action)
|
||||
|
||||
# ---- Validation ----
|
||||
if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)):
|
||||
self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].")
|
||||
return self.state.step()
|
||||
|
||||
game_state = self.state.game_state
|
||||
board = game_state["board_state"]
|
||||
|
||||
if self.mark_pattern.match(extracted):
|
||||
m = self.mark_pattern.match(extracted)
|
||||
r, c = int(m.group(1)), int(m.group(2))
|
||||
if r not in range(3) or c not in range(3):
|
||||
self.state.set_invalid_move("Row or column index out of range.")
|
||||
return self.state.step()
|
||||
if board[r][c] != "_":
|
||||
self.state.set_invalid_move("Chosen cell already occupied.")
|
||||
return self.state.step()
|
||||
|
||||
# Apply the move
|
||||
board[r][c] = game_state["player_symbols"][current_team]
|
||||
game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."})
|
||||
else:
|
||||
# [Pass]
|
||||
available = self._get_available_moves(board)
|
||||
if len(available) > 0:
|
||||
self.state.set_invalid_move("Cannot pass while moves still available.")
|
||||
return self.state.step()
|
||||
game_state["history"].append({"player": current_team, "message": "Passed."})
|
||||
|
||||
# Update game_state
|
||||
game_state["available_moves"] = self._get_available_moves(board)
|
||||
|
||||
# ---- Check terminal conditions ----
|
||||
symbol_winner = self._check_winner(board)
|
||||
if symbol_winner:
|
||||
winning_team = "Sun" if symbol_winner == "S" else "Moon"
|
||||
game_state["winner"] = winning_team
|
||||
game_state["status"] = "finished"
|
||||
game_state["scores"][winning_team] = 1
|
||||
game_state["scores"][other_team] = 0
|
||||
self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!")
|
||||
return self.state.step()
|
||||
|
||||
if not game_state["available_moves"]:
|
||||
game_state["winner"] = None
|
||||
game_state["status"] = "draw"
|
||||
game_state["scores"]["Sun"] = 0.5
|
||||
game_state["scores"]["Moon"] = 0.5
|
||||
self.state.set_draw(reason="All tiles filled without a winning alignment.")
|
||||
return self.state.step()
|
||||
|
||||
# If ongoing
|
||||
game_state["turn_count"] += 1
|
||||
game_state["current_player"] = other_team
|
||||
game_state["status"] = "ongoing"
|
||||
self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD)
|
||||
|
||||
return self.state.step()
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Observation Retrieval
|
||||
# ----------------------------------------------------------------
|
||||
def get_observation(self) -> Tuple[int, List]:
|
||||
"""Return (player_id, observation_list) for current player."""
|
||||
return (self.state.current_player_id, self.state.observations)
|
||||
|
||||
# ----------------------------------------------------------------
|
||||
# Close
|
||||
# ----------------------------------------------------------------
|
||||
def close(self) -> Tuple[Dict, Dict]:
|
||||
"""Return final rewards and game info."""
|
||||
return self.state.rewards, self.state.game_info
|
||||
```
|
||||
Reference in New Issue
Block a user