240 lines
9.9 KiB
Python
240 lines
9.9 KiB
Python
```python
|
||
import re
|
||
import random
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
import textarena as ta
|
||
|
||
|
||
class TicTacTrailEnv(ta.Env):
|
||
"""
|
||
Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game.
|
||
|
||
Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid.
|
||
The first team to align three of their emblems horizontally, vertically, or diagonally wins.
|
||
"""
|
||
|
||
def __init__(self, max_turns: int = 9):
|
||
self.max_turns = max_turns
|
||
# Define regex patterns for allowed actions
|
||
self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$")
|
||
self.pass_pattern = re.compile(r"^\[Pass\]$")
|
||
self.num_players = 2
|
||
|
||
# ----------------------------------------------------------------
|
||
# Helper: Extract boxed content
|
||
# ----------------------------------------------------------------
|
||
def _extract_answer_content(self, action: str) -> str:
|
||
"""
|
||
Extract content from \\boxed{{}}. Returns stripped text.
|
||
"""
|
||
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
|
||
if not match:
|
||
# Try single braces fallback (\boxed{})
|
||
match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
|
||
return match.group(1).strip() if match else action.strip()
|
||
|
||
# ----------------------------------------------------------------
|
||
# Helper: Board display utility
|
||
# ----------------------------------------------------------------
|
||
def _board_to_str(self, board: List[List[str]]) -> str:
|
||
"""Convert board to a readable string representation."""
|
||
return "\n".join([" ".join(row) for row in board])
|
||
|
||
# ----------------------------------------------------------------
|
||
# Helper: Compute available (empty) cells
|
||
# ----------------------------------------------------------------
|
||
def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]:
|
||
moves: List[List[int]] = []
|
||
for r in range(3):
|
||
for c in range(3):
|
||
if board[r][c] == "_":
|
||
moves.append([r, c])
|
||
return moves
|
||
|
||
# ----------------------------------------------------------------
|
||
# Helper: Check for winner
|
||
# ----------------------------------------------------------------
|
||
def _check_winner(self, board: List[List[str]]) -> Optional[str]:
|
||
"""Return 'S' or 'M' if a symbol wins, else None."""
|
||
lines = []
|
||
# Rows and cols
|
||
for i in range(3):
|
||
lines.append(board[i])
|
||
lines.append([board[r][i] for r in range(3)])
|
||
# Diagonals
|
||
lines.append([board[i][i] for i in range(3)])
|
||
lines.append([board[i][2 - i] for i in range(3)])
|
||
|
||
for line in lines:
|
||
if line[0] != "_" and line.count(line[0]) == 3:
|
||
return line[0]
|
||
return None
|
||
|
||
# ----------------------------------------------------------------
|
||
# Player Prompt Generator
|
||
# ----------------------------------------------------------------
|
||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||
"""
|
||
Build instructions for a player based on the current board state.
|
||
"""
|
||
team_name = "Sun" if player_id == 0 else "Moon"
|
||
symbol = game_state["player_symbols"][team_name]
|
||
board_view = self._board_to_str(game_state["board_state"])
|
||
|
||
prompt = (
|
||
f"You are an explorer representing Team {team_name} "
|
||
f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n"
|
||
f"Current board state:\n{board_view}\n\n"
|
||
"You may take one of the following actions:\n"
|
||
" - [Mark:<row>,<col>] to claim an unmarked tile (rows and cols 0–2)\n"
|
||
" - [Pass] if no unclaimed tiles remain\n\n"
|
||
"Victory condition: Align three of your emblems in a straight line.\n"
|
||
"All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n"
|
||
"Example valid response:\n"
|
||
"I should take the center stone before my rival.\n"
|
||
"\\boxed{{[Mark:1,1]}}\n\n"
|
||
"Example valid response (no moves left):\n"
|
||
"No moves left; I will pass.\n"
|
||
"\\boxed{{[Pass]}}\n"
|
||
)
|
||
return prompt
|
||
|
||
# ----------------------------------------------------------------
|
||
# Reset
|
||
# ----------------------------------------------------------------
|
||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||
"""
|
||
Resets the environment to an initial state.
|
||
|
||
Args:
|
||
num_players: must be 2 (Sun, Moon)
|
||
seed: random seed (stored but unused for determinism)
|
||
"""
|
||
if num_players != 2:
|
||
raise ValueError("Tic-Tac-Trail requires exactly 2 players.")
|
||
|
||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||
|
||
if seed is not None:
|
||
random.seed(seed)
|
||
|
||
empty_board = [["_"] * 3 for _ in range(3)]
|
||
|
||
game_state: Dict[str, Any] = {
|
||
"seed": seed or 42,
|
||
"turn_count": 1,
|
||
"current_player": "Sun",
|
||
"board_state": empty_board,
|
||
"player_symbols": {"Sun": "S", "Moon": "M"},
|
||
"history": [{"player": "System", "message": "The ancient board awaits."}],
|
||
"winner": None,
|
||
"status": "ongoing",
|
||
"available_moves": self._get_available_moves(empty_board),
|
||
"scores": {"Sun": 0, "Moon": 0},
|
||
}
|
||
|
||
role_mapping = {0: "Sun", 1: "Moon"}
|
||
|
||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
||
|
||
self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1)
|
||
self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD)
|
||
return self.state
|
||
|
||
# ----------------------------------------------------------------
|
||
# Step
|
||
# ----------------------------------------------------------------
|
||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||
"""
|
||
Perform a single environment step for the current player.
|
||
|
||
Args:
|
||
action: The action text submitted by the current player.
|
||
|
||
Returns:
|
||
A tuple (done, info)
|
||
"""
|
||
player_id = self.state.current_player_id
|
||
role_names = {0: "Sun", 1: "Moon"}
|
||
current_team = role_names[player_id]
|
||
other_team = role_names[1 - player_id]
|
||
|
||
# Log player action
|
||
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
||
|
||
extracted = self._extract_answer_content(action)
|
||
|
||
# ---- Validation ----
|
||
if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)):
|
||
self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].")
|
||
return self.state.step()
|
||
|
||
game_state = self.state.game_state
|
||
board = game_state["board_state"]
|
||
|
||
if self.mark_pattern.match(extracted):
|
||
m = self.mark_pattern.match(extracted)
|
||
r, c = int(m.group(1)), int(m.group(2))
|
||
if r not in range(3) or c not in range(3):
|
||
self.state.set_invalid_move("Row or column index out of range.")
|
||
return self.state.step()
|
||
if board[r][c] != "_":
|
||
self.state.set_invalid_move("Chosen cell already occupied.")
|
||
return self.state.step()
|
||
|
||
# Apply the move
|
||
board[r][c] = game_state["player_symbols"][current_team]
|
||
game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."})
|
||
else:
|
||
# [Pass]
|
||
available = self._get_available_moves(board)
|
||
if len(available) > 0:
|
||
self.state.set_invalid_move("Cannot pass while moves still available.")
|
||
return self.state.step()
|
||
game_state["history"].append({"player": current_team, "message": "Passed."})
|
||
|
||
# Update game_state
|
||
game_state["available_moves"] = self._get_available_moves(board)
|
||
|
||
# ---- Check terminal conditions ----
|
||
symbol_winner = self._check_winner(board)
|
||
if symbol_winner:
|
||
winning_team = "Sun" if symbol_winner == "S" else "Moon"
|
||
game_state["winner"] = winning_team
|
||
game_state["status"] = "finished"
|
||
game_state["scores"][winning_team] = 1
|
||
game_state["scores"][other_team] = 0
|
||
self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!")
|
||
return self.state.step()
|
||
|
||
if not game_state["available_moves"]:
|
||
game_state["winner"] = None
|
||
game_state["status"] = "draw"
|
||
game_state["scores"]["Sun"] = 0.5
|
||
game_state["scores"]["Moon"] = 0.5
|
||
self.state.set_draw(reason="All tiles filled without a winning alignment.")
|
||
return self.state.step()
|
||
|
||
# If ongoing
|
||
game_state["turn_count"] += 1
|
||
game_state["current_player"] = other_team
|
||
game_state["status"] = "ongoing"
|
||
self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD)
|
||
|
||
return self.state.step()
|
||
|
||
# ----------------------------------------------------------------
|
||
# Observation Retrieval
|
||
# ----------------------------------------------------------------
|
||
def get_observation(self) -> Tuple[int, List]:
|
||
"""Return (player_id, observation_list) for current player."""
|
||
return (self.state.current_player_id, self.state.observations)
|
||
|
||
# ----------------------------------------------------------------
|
||
# Close
|
||
# ----------------------------------------------------------------
|
||
def close(self) -> Tuple[Dict, Dict]:
|
||
"""Return final rewards and game info."""
|
||
return self.state.rewards, self.state.game_info
|
||
``` |