Files
test/env.py
2001-01-01 00:00:00 +00:00

237 lines
9.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
import random
from typing import Any, Dict, List, Optional, Tuple
import textarena as ta
class RunestoneClashEnv(ta.Env):
"""
Environment for "Runestone Clash": a deterministic turn-based two-player grid alignment battle.
Players alternate imprinting magical runes ("" for A, "" for B) on a 3×3 Stone Circle.
The first to align three runes in a straight line wins.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
# Compile regexes for quick validation
self.imprint_pattern = re.compile(r"^\[Imprint:(1|2|3),(1|2|3)\]$")
self.pass_pattern = re.compile(r"^\[Pass\]$")
# =====================================================
# Core Helpers
# =====================================================
def _extract_answer_content(self, action: str) -> str:
"""
Extract boxed content from \boxed{...} for machine parsing.
Falls back to returning the raw trimmed string on parse failure.
"""
match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
# =====================================================
# Initialization
# =====================================================
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial Runestone Clash state.
Args:
num_players: Number of players (must be 2).
seed: Deterministic seed for reproducible starts.
Returns:
None
"""
if num_players != 2:
raise ValueError("Runestone Clash requires exactly two players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
rng = random.Random(seed)
starting_player = rng.choice([0, 1])
game_state: Dict[str, Any] = {
"turn_number": 1,
"active_player": "PlayerA" if starting_player == 0 else "PlayerB",
"rune_grid": [["" for _ in range(3)] for _ in range(3)],
"players": {
"PlayerA": {"symbol": "", "imprints": 0, "skips": 0, "status": "active"},
"PlayerB": {"symbol": "", "imprints": 0, "skips": 0, "status": "active"},
},
"winner": None,
"draw": False,
"transcript": [],
"seed": seed,
}
# Set manually active player according to chosen start
self.state.reset(
game_state=game_state,
player_prompt_function=self._generate_player_prompt,
role_mapping={0: "PlayerA", 1: "PlayerB"}
)
self.state.manually_set_current_player_id(starting_player)
self.state.add_observation(
message="The Stone Circle hums with latent power. Runemages, prepare to begin.",
observation_type=ta.ObservationType.GAME_MESSAGE
)
return None
# =====================================================
# Player Prompt
# =====================================================
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Compose the role prompt shown to the current Runemage.
"""
role_name = "Runemage A" if player_id == 0 else "Runemage B"
player_key = "PlayerA" if player_id == 0 else "PlayerB"
opponent_key = "PlayerB" if player_id == 0 else "PlayerA"
player_symbol = game_state["players"][player_key]["symbol"]
opponent_symbol = game_state["players"][opponent_key]["symbol"]
grid = game_state["rune_grid"]
display_grid = "\n".join(
[
" ".join(f"[{(cell if cell else ' ')}]" for cell in row)
for row in grid
]
)
open_cells = sum(1 for row in grid for c in row if c == "")
turn_number = game_state["turn_number"]
prompt = (
f"You are {role_name}, facing your rival in Runestone Clash.\n"
f"The current Stone Circle (3×3) state:\n{display_grid}\n"
f"Your sigil: {player_symbol}\nOpponent's sigil: {opponent_symbol}\n"
f"Turn {turn_number}, open cells remaining: {open_cells}\n\n"
f"Allowed actions:\n"
f" - [Imprint:x,y] : Imprint your rune at coordinates x,y (13) if empty.\n"
f" - [Pass] : Skip your turn, only if cells remain.\n"
f"Ensure syntax matches exactly (e.g., [Imprint:2,3]).\n\n"
"Put your final answer within \\boxed{} at the end of your response.\n\n"
"Example valid response:\n"
"I will secure the center of the Stone Circle.\n"
"\\boxed{[Imprint:2,2]}\n\n"
"Example valid response:\n"
"The board is tight; I will bide my time.\n"
"\\boxed{[Pass]}"
)
return prompt
# =====================================================
# Step Logic
# =====================================================
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: Raw text from player action.
Returns:
Tuple (done, info)
"""
# Log the raw message
self.state.add_observation(
message=action,
observation_type=ta.ObservationType.PLAYER_ACTION,
from_id=self.state.current_player_id,
to_id=-1
)
current_id = self.state.current_player_id
current_player_key = "PlayerA" if current_id == 0 else "PlayerB"
opponent_player_key = "PlayerB" if current_id == 0 else "PlayerA"
game_state = self.state.game_state
# Extract boxed content and validate
parsed_action = self._extract_answer_content(action)
match_imprint = self.imprint_pattern.match(parsed_action)
match_pass = self.pass_pattern.match(parsed_action)
grid = game_state["rune_grid"]
def check_full_grid(g):
return all(c != "" for row in g for c in row)
# -------------------- VALIDATION --------------------
if not (match_imprint or match_pass):
self.state.set_invalid_move("Invalid syntax: does not match required action pattern.")
return self.state.step()
if match_imprint:
x, y = int(match_imprint.group(1)), int(match_imprint.group(2))
if not (1 <= x <= 3 and 1 <= y <= 3):
self.state.set_invalid_move(f"Invalid coordinates: cell ({x},{y}) is outside grid boundaries.")
return self.state.step()
if grid[x - 1][y - 1] != "":
self.state.set_invalid_move("Cell already claimed by another rune.")
return self.state.step()
# Perform imprint
symbol = game_state["players"][current_player_key]["symbol"]
grid[x - 1][y - 1] = symbol
game_state["players"][current_player_key]["imprints"] += 1
game_state["transcript"].append({"player": current_player_key, "action": f"[Imprint:{x},{y}]"})
self.state.add_observation(
message=f"{current_player_key} imprinted a rune at ({x},{y}).",
observation_type=ta.ObservationType.GAME_MESSAGE
)
elif match_pass:
if check_full_grid(grid):
self.state.set_invalid_move("Cannot pass: grid fully imprinted.")
return self.state.step()
game_state["players"][current_player_key]["skips"] += 1
game_state["transcript"].append({"player": current_player_key, "action": "[Pass]"})
self.state.add_observation(
message=f"{current_player_key} chose to pass this turn.",
observation_type=ta.ObservationType.GAME_MESSAGE
)
# -------------------- GAME STATE UPDATE --------------------
game_state["turn_number"] += 1
# -------------------- WIN CHECK --------------------
def check_win(symbol: str) -> bool:
g = grid
# Rows, columns
for i in range(3):
if g[i][0] == g[i][1] == g[i][2] == symbol and symbol != "":
return True
if g[0][i] == g[1][i] == g[2][i] == symbol and symbol != "":
return True
# Diagonals
if g[0][0] == g[1][1] == g[2][2] == symbol and symbol != "":
return True
if g[0][2] == g[1][1] == g[2][0] == symbol and symbol != "":
return True
return False
current_symbol = game_state["players"][current_player_key]["symbol"]
if check_win(current_symbol):
game_state["winner"] = current_player_key
game_state["players"][current_player_key]["status"] = "won"
game_state["players"][opponent_player_key]["status"] = "lost"
self.state.set_winner(player_id=current_id, reason=f"{current_player_key} aligned three runes and harnessed the Stone Circle!")
return self.state.step()
# -------------------- DRAW CHECK --------------------
if check_full_grid(grid) or game_state["turn_number"] > 9:
game_state["draw"] = True
self.state.set_draw(reason="The Stone Circle is filled; no alignment achieved.")
return self.state.step()
# -------------------- NEXT TURN --------------------
next_player = (current_id + 1) % 2
game_state["active_player"] = "PlayerA" if next_player == 0 else "PlayerB"
self.state.manually_set_current_player_id(next_player)
return self.state.step()
```