Add env.py from Openverse builder

This commit is contained in:
Openverse Builder
2001-01-01 00:00:00 +00:00
commit d72cb3ce93

237
env.py Normal file
View File

@@ -0,0 +1,237 @@
```python
import re
import random
from typing import Any, Dict, List, Optional, Tuple
import textarena as ta
class RunestoneClashEnv(ta.Env):
"""
Environment for "Runestone Clash": a deterministic turn-based two-player grid alignment battle.
Players alternate imprinting magical runes ("" for A, "" for B) on a 3×3 Stone Circle.
The first to align three runes in a straight line wins.
"""
def __init__(self, max_turns: int = 9):
self.max_turns = max_turns
# Compile regexes for quick validation
self.imprint_pattern = re.compile(r"^\[Imprint:(1|2|3),(1|2|3)\]$")
self.pass_pattern = re.compile(r"^\[Pass\]$")
# =====================================================
# Core Helpers
# =====================================================
def _extract_answer_content(self, action: str) -> str:
"""
Extract boxed content from \boxed{...} for machine parsing.
Falls back to returning the raw trimmed string on parse failure.
"""
match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
# =====================================================
# Initialization
# =====================================================
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial Runestone Clash state.
Args:
num_players: Number of players (must be 2).
seed: Deterministic seed for reproducible starts.
Returns:
None
"""
if num_players != 2:
raise ValueError("Runestone Clash requires exactly two players.")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
rng = random.Random(seed)
starting_player = rng.choice([0, 1])
game_state: Dict[str, Any] = {
"turn_number": 1,
"active_player": "PlayerA" if starting_player == 0 else "PlayerB",
"rune_grid": [["" for _ in range(3)] for _ in range(3)],
"players": {
"PlayerA": {"symbol": "", "imprints": 0, "skips": 0, "status": "active"},
"PlayerB": {"symbol": "", "imprints": 0, "skips": 0, "status": "active"},
},
"winner": None,
"draw": False,
"transcript": [],
"seed": seed,
}
# Set manually active player according to chosen start
self.state.reset(
game_state=game_state,
player_prompt_function=self._generate_player_prompt,
role_mapping={0: "PlayerA", 1: "PlayerB"}
)
self.state.manually_set_current_player_id(starting_player)
self.state.add_observation(
message="The Stone Circle hums with latent power. Runemages, prepare to begin.",
observation_type=ta.ObservationType.GAME_MESSAGE
)
return None
# =====================================================
# Player Prompt
# =====================================================
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Compose the role prompt shown to the current Runemage.
"""
role_name = "Runemage A" if player_id == 0 else "Runemage B"
player_key = "PlayerA" if player_id == 0 else "PlayerB"
opponent_key = "PlayerB" if player_id == 0 else "PlayerA"
player_symbol = game_state["players"][player_key]["symbol"]
opponent_symbol = game_state["players"][opponent_key]["symbol"]
grid = game_state["rune_grid"]
display_grid = "\n".join(
[
" ".join(f"[{(cell if cell else ' ')}]" for cell in row)
for row in grid
]
)
open_cells = sum(1 for row in grid for c in row if c == "")
turn_number = game_state["turn_number"]
prompt = (
f"You are {role_name}, facing your rival in Runestone Clash.\n"
f"The current Stone Circle (3×3) state:\n{display_grid}\n"
f"Your sigil: {player_symbol}\nOpponent's sigil: {opponent_symbol}\n"
f"Turn {turn_number}, open cells remaining: {open_cells}\n\n"
f"Allowed actions:\n"
f" - [Imprint:x,y] : Imprint your rune at coordinates x,y (13) if empty.\n"
f" - [Pass] : Skip your turn, only if cells remain.\n"
f"Ensure syntax matches exactly (e.g., [Imprint:2,3]).\n\n"
"Put your final answer within \\boxed{} at the end of your response.\n\n"
"Example valid response:\n"
"I will secure the center of the Stone Circle.\n"
"\\boxed{[Imprint:2,2]}\n\n"
"Example valid response:\n"
"The board is tight; I will bide my time.\n"
"\\boxed{[Pass]}"
)
return prompt
# =====================================================
# Step Logic
# =====================================================
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
Args:
action: Raw text from player action.
Returns:
Tuple (done, info)
"""
# Log the raw message
self.state.add_observation(
message=action,
observation_type=ta.ObservationType.PLAYER_ACTION,
from_id=self.state.current_player_id,
to_id=-1
)
current_id = self.state.current_player_id
current_player_key = "PlayerA" if current_id == 0 else "PlayerB"
opponent_player_key = "PlayerB" if current_id == 0 else "PlayerA"
game_state = self.state.game_state
# Extract boxed content and validate
parsed_action = self._extract_answer_content(action)
match_imprint = self.imprint_pattern.match(parsed_action)
match_pass = self.pass_pattern.match(parsed_action)
grid = game_state["rune_grid"]
def check_full_grid(g):
return all(c != "" for row in g for c in row)
# -------------------- VALIDATION --------------------
if not (match_imprint or match_pass):
self.state.set_invalid_move("Invalid syntax: does not match required action pattern.")
return self.state.step()
if match_imprint:
x, y = int(match_imprint.group(1)), int(match_imprint.group(2))
if not (1 <= x <= 3 and 1 <= y <= 3):
self.state.set_invalid_move(f"Invalid coordinates: cell ({x},{y}) is outside grid boundaries.")
return self.state.step()
if grid[x - 1][y - 1] != "":
self.state.set_invalid_move("Cell already claimed by another rune.")
return self.state.step()
# Perform imprint
symbol = game_state["players"][current_player_key]["symbol"]
grid[x - 1][y - 1] = symbol
game_state["players"][current_player_key]["imprints"] += 1
game_state["transcript"].append({"player": current_player_key, "action": f"[Imprint:{x},{y}]"})
self.state.add_observation(
message=f"{current_player_key} imprinted a rune at ({x},{y}).",
observation_type=ta.ObservationType.GAME_MESSAGE
)
elif match_pass:
if check_full_grid(grid):
self.state.set_invalid_move("Cannot pass: grid fully imprinted.")
return self.state.step()
game_state["players"][current_player_key]["skips"] += 1
game_state["transcript"].append({"player": current_player_key, "action": "[Pass]"})
self.state.add_observation(
message=f"{current_player_key} chose to pass this turn.",
observation_type=ta.ObservationType.GAME_MESSAGE
)
# -------------------- GAME STATE UPDATE --------------------
game_state["turn_number"] += 1
# -------------------- WIN CHECK --------------------
def check_win(symbol: str) -> bool:
g = grid
# Rows, columns
for i in range(3):
if g[i][0] == g[i][1] == g[i][2] == symbol and symbol != "":
return True
if g[0][i] == g[1][i] == g[2][i] == symbol and symbol != "":
return True
# Diagonals
if g[0][0] == g[1][1] == g[2][2] == symbol and symbol != "":
return True
if g[0][2] == g[1][1] == g[2][0] == symbol and symbol != "":
return True
return False
current_symbol = game_state["players"][current_player_key]["symbol"]
if check_win(current_symbol):
game_state["winner"] = current_player_key
game_state["players"][current_player_key]["status"] = "won"
game_state["players"][opponent_player_key]["status"] = "lost"
self.state.set_winner(player_id=current_id, reason=f"{current_player_key} aligned three runes and harnessed the Stone Circle!")
return self.state.step()
# -------------------- DRAW CHECK --------------------
if check_full_grid(grid) or game_state["turn_number"] > 9:
game_state["draw"] = True
self.state.set_draw(reason="The Stone Circle is filled; no alignment achieved.")
return self.state.step()
# -------------------- NEXT TURN --------------------
next_player = (current_id + 1) % 2
game_state["active_player"] = "PlayerA" if next_player == 0 else "PlayerB"
self.state.manually_set_current_player_id(next_player)
return self.state.step()
```