240 lines
9.9 KiB
Python
240 lines
9.9 KiB
Python
|
|
```python
|
|||
|
|
import re
|
|||
|
|
import random
|
|||
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|||
|
|
|
|||
|
|
import textarena as ta
|
|||
|
|
|
|||
|
|
|
|||
|
|
class TicTacTrailEnv(ta.Env):
|
|||
|
|
"""
|
|||
|
|
Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game.
|
|||
|
|
|
|||
|
|
Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid.
|
|||
|
|
The first team to align three of their emblems horizontally, vertically, or diagonally wins.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, max_turns: int = 9):
|
|||
|
|
self.max_turns = max_turns
|
|||
|
|
# Define regex patterns for allowed actions
|
|||
|
|
self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$")
|
|||
|
|
self.pass_pattern = re.compile(r"^\[Pass\]$")
|
|||
|
|
self.num_players = 2
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Helper: Extract boxed content
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def _extract_answer_content(self, action: str) -> str:
|
|||
|
|
"""
|
|||
|
|
Extract content from \\boxed{{}}. Returns stripped text.
|
|||
|
|
"""
|
|||
|
|
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
|
|||
|
|
if not match:
|
|||
|
|
# Try single braces fallback (\boxed{})
|
|||
|
|
match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
|
|||
|
|
return match.group(1).strip() if match else action.strip()
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Helper: Board display utility
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def _board_to_str(self, board: List[List[str]]) -> str:
|
|||
|
|
"""Convert board to a readable string representation."""
|
|||
|
|
return "\n".join([" ".join(row) for row in board])
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Helper: Compute available (empty) cells
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]:
|
|||
|
|
moves: List[List[int]] = []
|
|||
|
|
for r in range(3):
|
|||
|
|
for c in range(3):
|
|||
|
|
if board[r][c] == "_":
|
|||
|
|
moves.append([r, c])
|
|||
|
|
return moves
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Helper: Check for winner
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def _check_winner(self, board: List[List[str]]) -> Optional[str]:
|
|||
|
|
"""Return 'S' or 'M' if a symbol wins, else None."""
|
|||
|
|
lines = []
|
|||
|
|
# Rows and cols
|
|||
|
|
for i in range(3):
|
|||
|
|
lines.append(board[i])
|
|||
|
|
lines.append([board[r][i] for r in range(3)])
|
|||
|
|
# Diagonals
|
|||
|
|
lines.append([board[i][i] for i in range(3)])
|
|||
|
|
lines.append([board[i][2 - i] for i in range(3)])
|
|||
|
|
|
|||
|
|
for line in lines:
|
|||
|
|
if line[0] != "_" and line.count(line[0]) == 3:
|
|||
|
|
return line[0]
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Player Prompt Generator
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
|||
|
|
"""
|
|||
|
|
Build instructions for a player based on the current board state.
|
|||
|
|
"""
|
|||
|
|
team_name = "Sun" if player_id == 0 else "Moon"
|
|||
|
|
symbol = game_state["player_symbols"][team_name]
|
|||
|
|
board_view = self._board_to_str(game_state["board_state"])
|
|||
|
|
|
|||
|
|
prompt = (
|
|||
|
|
f"You are an explorer representing Team {team_name} "
|
|||
|
|
f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n"
|
|||
|
|
f"Current board state:\n{board_view}\n\n"
|
|||
|
|
"You may take one of the following actions:\n"
|
|||
|
|
" - [Mark:<row>,<col>] to claim an unmarked tile (rows and cols 0–2)\n"
|
|||
|
|
" - [Pass] if no unclaimed tiles remain\n\n"
|
|||
|
|
"Victory condition: Align three of your emblems in a straight line.\n"
|
|||
|
|
"All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n"
|
|||
|
|
"Example valid response:\n"
|
|||
|
|
"I should take the center stone before my rival.\n"
|
|||
|
|
"\\boxed{{[Mark:1,1]}}\n\n"
|
|||
|
|
"Example valid response (no moves left):\n"
|
|||
|
|
"No moves left; I will pass.\n"
|
|||
|
|
"\\boxed{{[Pass]}}\n"
|
|||
|
|
)
|
|||
|
|
return prompt
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Reset
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
|||
|
|
"""
|
|||
|
|
Resets the environment to an initial state.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
num_players: must be 2 (Sun, Moon)
|
|||
|
|
seed: random seed (stored but unused for determinism)
|
|||
|
|
"""
|
|||
|
|
if num_players != 2:
|
|||
|
|
raise ValueError("Tic-Tac-Trail requires exactly 2 players.")
|
|||
|
|
|
|||
|
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
|||
|
|
|
|||
|
|
if seed is not None:
|
|||
|
|
random.seed(seed)
|
|||
|
|
|
|||
|
|
empty_board = [["_"] * 3 for _ in range(3)]
|
|||
|
|
|
|||
|
|
game_state: Dict[str, Any] = {
|
|||
|
|
"seed": seed or 42,
|
|||
|
|
"turn_count": 1,
|
|||
|
|
"current_player": "Sun",
|
|||
|
|
"board_state": empty_board,
|
|||
|
|
"player_symbols": {"Sun": "S", "Moon": "M"},
|
|||
|
|
"history": [{"player": "System", "message": "The ancient board awaits."}],
|
|||
|
|
"winner": None,
|
|||
|
|
"status": "ongoing",
|
|||
|
|
"available_moves": self._get_available_moves(empty_board),
|
|||
|
|
"scores": {"Sun": 0, "Moon": 0},
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
role_mapping = {0: "Sun", 1: "Moon"}
|
|||
|
|
|
|||
|
|
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
|||
|
|
|
|||
|
|
self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1)
|
|||
|
|
self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD)
|
|||
|
|
return self.state
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Step
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
|||
|
|
"""
|
|||
|
|
Perform a single environment step for the current player.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
action: The action text submitted by the current player.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
A tuple (done, info)
|
|||
|
|
"""
|
|||
|
|
player_id = self.state.current_player_id
|
|||
|
|
role_names = {0: "Sun", 1: "Moon"}
|
|||
|
|
current_team = role_names[player_id]
|
|||
|
|
other_team = role_names[1 - player_id]
|
|||
|
|
|
|||
|
|
# Log player action
|
|||
|
|
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
|||
|
|
|
|||
|
|
extracted = self._extract_answer_content(action)
|
|||
|
|
|
|||
|
|
# ---- Validation ----
|
|||
|
|
if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)):
|
|||
|
|
self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
game_state = self.state.game_state
|
|||
|
|
board = game_state["board_state"]
|
|||
|
|
|
|||
|
|
if self.mark_pattern.match(extracted):
|
|||
|
|
m = self.mark_pattern.match(extracted)
|
|||
|
|
r, c = int(m.group(1)), int(m.group(2))
|
|||
|
|
if r not in range(3) or c not in range(3):
|
|||
|
|
self.state.set_invalid_move("Row or column index out of range.")
|
|||
|
|
return self.state.step()
|
|||
|
|
if board[r][c] != "_":
|
|||
|
|
self.state.set_invalid_move("Chosen cell already occupied.")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Apply the move
|
|||
|
|
board[r][c] = game_state["player_symbols"][current_team]
|
|||
|
|
game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."})
|
|||
|
|
else:
|
|||
|
|
# [Pass]
|
|||
|
|
available = self._get_available_moves(board)
|
|||
|
|
if len(available) > 0:
|
|||
|
|
self.state.set_invalid_move("Cannot pass while moves still available.")
|
|||
|
|
return self.state.step()
|
|||
|
|
game_state["history"].append({"player": current_team, "message": "Passed."})
|
|||
|
|
|
|||
|
|
# Update game_state
|
|||
|
|
game_state["available_moves"] = self._get_available_moves(board)
|
|||
|
|
|
|||
|
|
# ---- Check terminal conditions ----
|
|||
|
|
symbol_winner = self._check_winner(board)
|
|||
|
|
if symbol_winner:
|
|||
|
|
winning_team = "Sun" if symbol_winner == "S" else "Moon"
|
|||
|
|
game_state["winner"] = winning_team
|
|||
|
|
game_state["status"] = "finished"
|
|||
|
|
game_state["scores"][winning_team] = 1
|
|||
|
|
game_state["scores"][other_team] = 0
|
|||
|
|
self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
if not game_state["available_moves"]:
|
|||
|
|
game_state["winner"] = None
|
|||
|
|
game_state["status"] = "draw"
|
|||
|
|
game_state["scores"]["Sun"] = 0.5
|
|||
|
|
game_state["scores"]["Moon"] = 0.5
|
|||
|
|
self.state.set_draw(reason="All tiles filled without a winning alignment.")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# If ongoing
|
|||
|
|
game_state["turn_count"] += 1
|
|||
|
|
game_state["current_player"] = other_team
|
|||
|
|
game_state["status"] = "ongoing"
|
|||
|
|
self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD)
|
|||
|
|
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Observation Retrieval
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def get_observation(self) -> Tuple[int, List]:
|
|||
|
|
"""Return (player_id, observation_list) for current player."""
|
|||
|
|
return (self.state.current_player_id, self.state.observations)
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
# Close
|
|||
|
|
# ----------------------------------------------------------------
|
|||
|
|
def close(self) -> Tuple[Dict, Dict]:
|
|||
|
|
"""Return final rewards and game info."""
|
|||
|
|
return self.state.rewards, self.state.game_info
|
|||
|
|
```
|