Files
2001-01-01 00:00:00 +00:00

240 lines
9.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
import random
from typing import Any, Dict, List, Optional, Tuple
import textarena as ta
class TicTacTrailEnv(ta.Env):
"""
Tic-Tac-Trail: A deterministic, turn-based tactical puzzle game.
Two explorers — Team Sun (S) and Team Moon (M) — claim tiles on an ancient 3×3 grid.
The first team to align three of their emblems horizontally, vertically, or diagonally wins.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
# Define regex patterns for allowed actions
self.mark_pattern = re.compile(r"^\[Mark:(0|1|2),(0|1|2)\]$")
self.pass_pattern = re.compile(r"^\[Pass\]$")
self.num_players = 2
# ----------------------------------------------------------------
# Helper: Extract boxed content
# ----------------------------------------------------------------
def _extract_answer_content(self, action: str) -> str:
"""
Extract content from \\boxed{{}}. Returns stripped text.
"""
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
if not match:
# Try single braces fallback (\boxed{})
match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
return match.group(1).strip() if match else action.strip()
# ----------------------------------------------------------------
# Helper: Board display utility
# ----------------------------------------------------------------
def _board_to_str(self, board: List[List[str]]) -> str:
"""Convert board to a readable string representation."""
return "\n".join([" ".join(row) for row in board])
# ----------------------------------------------------------------
# Helper: Compute available (empty) cells
# ----------------------------------------------------------------
def _get_available_moves(self, board: List[List[str]]) -> List[List[int]]:
moves: List[List[int]] = []
for r in range(3):
for c in range(3):
if board[r][c] == "_":
moves.append([r, c])
return moves
# ----------------------------------------------------------------
# Helper: Check for winner
# ----------------------------------------------------------------
def _check_winner(self, board: List[List[str]]) -> Optional[str]:
"""Return 'S' or 'M' if a symbol wins, else None."""
lines = []
# Rows and cols
for i in range(3):
lines.append(board[i])
lines.append([board[r][i] for r in range(3)])
# Diagonals
lines.append([board[i][i] for i in range(3)])
lines.append([board[i][2 - i] for i in range(3)])
for line in lines:
if line[0] != "_" and line.count(line[0]) == 3:
return line[0]
return None
# ----------------------------------------------------------------
# Player Prompt Generator
# ----------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Build instructions for a player based on the current board state.
"""
team_name = "Sun" if player_id == 0 else "Moon"
symbol = game_state["player_symbols"][team_name]
board_view = self._board_to_str(game_state["board_state"])
prompt = (
f"You are an explorer representing Team {team_name} "
f"({symbol}) claiming tiles on the ancient Tic-Tac-Trail.\n"
f"Current board state:\n{board_view}\n\n"
"You may take one of the following actions:\n"
" - [Mark:<row>,<col>] to claim an unmarked tile (rows and cols 02)\n"
" - [Pass] if no unclaimed tiles remain\n\n"
"Victory condition: Align three of your emblems in a straight line.\n"
"All actions must be enclosed in \\boxed{{}} at the end of your message.\n\n"
"Example valid response:\n"
"I should take the center stone before my rival.\n"
"\\boxed{{[Mark:1,1]}}\n\n"
"Example valid response (no moves left):\n"
"No moves left; I will pass.\n"
"\\boxed{{[Pass]}}\n"
)
return prompt
# ----------------------------------------------------------------
# Reset
# ----------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: must be 2 (Sun, Moon)
seed: random seed (stored but unused for determinism)
"""
if num_players != 2:
raise ValueError("Tic-Tac-Trail requires exactly 2 players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
if seed is not None:
random.seed(seed)
empty_board = [["_"] * 3 for _ in range(3)]
game_state: Dict[str, Any] = {
"seed": seed or 42,
"turn_count": 1,
"current_player": "Sun",
"board_state": empty_board,
"player_symbols": {"Sun": "S", "Moon": "M"},
"history": [{"player": "System", "message": "The ancient board awaits."}],
"winner": None,
"status": "ongoing",
"available_moves": self._get_available_moves(empty_board),
"scores": {"Sun": 0, "Moon": 0},
}
role_mapping = {0: "Sun", 1: "Moon"}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
self.state.add_observation("The ancient board awaits.", ta.ObservationType.GAME_MESSAGE, from_id=-1, to_id=-1)
self.state.add_observation(self._board_to_str(empty_board), ta.ObservationType.GAME_BOARD)
return self.state
# ----------------------------------------------------------------
# Step
# ----------------------------------------------------------------
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: The action text submitted by the current player.
Returns:
A tuple (done, info)
"""
player_id = self.state.current_player_id
role_names = {0: "Sun", 1: "Moon"}
current_team = role_names[player_id]
other_team = role_names[1 - player_id]
# Log player action
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
extracted = self._extract_answer_content(action)
# ---- Validation ----
if not (self.mark_pattern.match(extracted) or self.pass_pattern.match(extracted)):
self.state.set_invalid_move("Invalid format — must be [Mark:r,c] or [Pass].")
return self.state.step()
game_state = self.state.game_state
board = game_state["board_state"]
if self.mark_pattern.match(extracted):
m = self.mark_pattern.match(extracted)
r, c = int(m.group(1)), int(m.group(2))
if r not in range(3) or c not in range(3):
self.state.set_invalid_move("Row or column index out of range.")
return self.state.step()
if board[r][c] != "_":
self.state.set_invalid_move("Chosen cell already occupied.")
return self.state.step()
# Apply the move
board[r][c] = game_state["player_symbols"][current_team]
game_state["history"].append({"player": current_team, "message": f"Marked cell ({r},{c})."})
else:
# [Pass]
available = self._get_available_moves(board)
if len(available) > 0:
self.state.set_invalid_move("Cannot pass while moves still available.")
return self.state.step()
game_state["history"].append({"player": current_team, "message": "Passed."})
# Update game_state
game_state["available_moves"] = self._get_available_moves(board)
# ---- Check terminal conditions ----
symbol_winner = self._check_winner(board)
if symbol_winner:
winning_team = "Sun" if symbol_winner == "S" else "Moon"
game_state["winner"] = winning_team
game_state["status"] = "finished"
game_state["scores"][winning_team] = 1
game_state["scores"][other_team] = 0
self.state.set_winner(player_id if winning_team == current_team else 1 - player_id, reason=f"Team {winning_team} aligned three emblems!")
return self.state.step()
if not game_state["available_moves"]:
game_state["winner"] = None
game_state["status"] = "draw"
game_state["scores"]["Sun"] = 0.5
game_state["scores"]["Moon"] = 0.5
self.state.set_draw(reason="All tiles filled without a winning alignment.")
return self.state.step()
# If ongoing
game_state["turn_count"] += 1
game_state["current_player"] = other_team
game_state["status"] = "ongoing"
self.state.add_observation(self._board_to_str(board), ta.ObservationType.GAME_BOARD)
return self.state.step()
# ----------------------------------------------------------------
# Observation Retrieval
# ----------------------------------------------------------------
def get_observation(self) -> Tuple[int, List]:
"""Return (player_id, observation_list) for current player."""
return (self.state.current_player_id, self.state.observations)
# ----------------------------------------------------------------
# Close
# ----------------------------------------------------------------
def close(self) -> Tuple[Dict, Dict]:
"""Return final rewards and game info."""
return self.state.rewards, self.state.game_info
```