Add env.py from Openverse builder
This commit is contained in:
237
env.py
Normal file
237
env.py
Normal file
@@ -0,0 +1,237 @@
|
||||
```python
|
||||
import re
|
||||
import random
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import textarena as ta
|
||||
|
||||
|
||||
class RunestoneClashEnv(ta.Env):
|
||||
"""
|
||||
Environment for "Runestone Clash": a deterministic turn-based two-player grid alignment battle.
|
||||
Players alternate imprinting magical runes ("⚙" for A, "✶" for B) on a 3×3 Stone Circle.
|
||||
The first to align three runes in a straight line wins.
|
||||
"""
|
||||
|
||||
def __init__(self, max_turns: int = 9):
|
||||
self.max_turns = max_turns
|
||||
# Compile regexes for quick validation
|
||||
self.imprint_pattern = re.compile(r"^\[Imprint:(1|2|3),(1|2|3)\]$")
|
||||
self.pass_pattern = re.compile(r"^\[Pass\]$")
|
||||
|
||||
# =====================================================
|
||||
# Core Helpers
|
||||
# =====================================================
|
||||
def _extract_answer_content(self, action: str) -> str:
|
||||
"""
|
||||
Extract boxed content from \boxed{...} for machine parsing.
|
||||
Falls back to returning the raw trimmed string on parse failure.
|
||||
"""
|
||||
match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return action.strip()
|
||||
|
||||
# =====================================================
|
||||
# Initialization
|
||||
# =====================================================
|
||||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||
"""
|
||||
Resets the environment to an initial Runestone Clash state.
|
||||
|
||||
Args:
|
||||
num_players: Number of players (must be 2).
|
||||
seed: Deterministic seed for reproducible starts.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
if num_players != 2:
|
||||
raise ValueError("Runestone Clash requires exactly two players.")
|
||||
|
||||
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||||
rng = random.Random(seed)
|
||||
starting_player = rng.choice([0, 1])
|
||||
|
||||
game_state: Dict[str, Any] = {
|
||||
"turn_number": 1,
|
||||
"active_player": "PlayerA" if starting_player == 0 else "PlayerB",
|
||||
"rune_grid": [["" for _ in range(3)] for _ in range(3)],
|
||||
"players": {
|
||||
"PlayerA": {"symbol": "⚙", "imprints": 0, "skips": 0, "status": "active"},
|
||||
"PlayerB": {"symbol": "✶", "imprints": 0, "skips": 0, "status": "active"},
|
||||
},
|
||||
"winner": None,
|
||||
"draw": False,
|
||||
"transcript": [],
|
||||
"seed": seed,
|
||||
}
|
||||
|
||||
# Set manually active player according to chosen start
|
||||
self.state.reset(
|
||||
game_state=game_state,
|
||||
player_prompt_function=self._generate_player_prompt,
|
||||
role_mapping={0: "PlayerA", 1: "PlayerB"}
|
||||
)
|
||||
self.state.manually_set_current_player_id(starting_player)
|
||||
|
||||
self.state.add_observation(
|
||||
message="The Stone Circle hums with latent power. Runemages, prepare to begin.",
|
||||
observation_type=ta.ObservationType.GAME_MESSAGE
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
# =====================================================
|
||||
# Player Prompt
|
||||
# =====================================================
|
||||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Compose the role prompt shown to the current Runemage.
|
||||
"""
|
||||
role_name = "Runemage A" if player_id == 0 else "Runemage B"
|
||||
player_key = "PlayerA" if player_id == 0 else "PlayerB"
|
||||
opponent_key = "PlayerB" if player_id == 0 else "PlayerA"
|
||||
player_symbol = game_state["players"][player_key]["symbol"]
|
||||
opponent_symbol = game_state["players"][opponent_key]["symbol"]
|
||||
|
||||
grid = game_state["rune_grid"]
|
||||
display_grid = "\n".join(
|
||||
[
|
||||
" ".join(f"[{(cell if cell else ' ')}]" for cell in row)
|
||||
for row in grid
|
||||
]
|
||||
)
|
||||
open_cells = sum(1 for row in grid for c in row if c == "")
|
||||
turn_number = game_state["turn_number"]
|
||||
|
||||
prompt = (
|
||||
f"You are {role_name}, facing your rival in Runestone Clash.\n"
|
||||
f"The current Stone Circle (3×3) state:\n{display_grid}\n"
|
||||
f"Your sigil: {player_symbol}\nOpponent's sigil: {opponent_symbol}\n"
|
||||
f"Turn {turn_number}, open cells remaining: {open_cells}\n\n"
|
||||
f"Allowed actions:\n"
|
||||
f" - [Imprint:x,y] : Imprint your rune at coordinates x,y (1–3) if empty.\n"
|
||||
f" - [Pass] : Skip your turn, only if cells remain.\n"
|
||||
f"Ensure syntax matches exactly (e.g., [Imprint:2,3]).\n\n"
|
||||
"Put your final answer within \\boxed{} at the end of your response.\n\n"
|
||||
"Example valid response:\n"
|
||||
"I will secure the center of the Stone Circle.\n"
|
||||
"\\boxed{[Imprint:2,2]}\n\n"
|
||||
"Example valid response:\n"
|
||||
"The board is tight; I will bide my time.\n"
|
||||
"\\boxed{[Pass]}"
|
||||
)
|
||||
return prompt
|
||||
|
||||
# =====================================================
|
||||
# Step Logic
|
||||
# =====================================================
|
||||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||
"""
|
||||
Perform a single environment step for the current player.
|
||||
|
||||
Args:
|
||||
action: Raw text from player action.
|
||||
|
||||
Returns:
|
||||
Tuple (done, info)
|
||||
"""
|
||||
# Log the raw message
|
||||
self.state.add_observation(
|
||||
message=action,
|
||||
observation_type=ta.ObservationType.PLAYER_ACTION,
|
||||
from_id=self.state.current_player_id,
|
||||
to_id=-1
|
||||
)
|
||||
current_id = self.state.current_player_id
|
||||
current_player_key = "PlayerA" if current_id == 0 else "PlayerB"
|
||||
opponent_player_key = "PlayerB" if current_id == 0 else "PlayerA"
|
||||
game_state = self.state.game_state
|
||||
|
||||
# Extract boxed content and validate
|
||||
parsed_action = self._extract_answer_content(action)
|
||||
|
||||
match_imprint = self.imprint_pattern.match(parsed_action)
|
||||
match_pass = self.pass_pattern.match(parsed_action)
|
||||
grid = game_state["rune_grid"]
|
||||
|
||||
def check_full_grid(g):
|
||||
return all(c != "" for row in g for c in row)
|
||||
|
||||
# -------------------- VALIDATION --------------------
|
||||
if not (match_imprint or match_pass):
|
||||
self.state.set_invalid_move("Invalid syntax: does not match required action pattern.")
|
||||
return self.state.step()
|
||||
|
||||
if match_imprint:
|
||||
x, y = int(match_imprint.group(1)), int(match_imprint.group(2))
|
||||
if not (1 <= x <= 3 and 1 <= y <= 3):
|
||||
self.state.set_invalid_move(f"Invalid coordinates: cell ({x},{y}) is outside grid boundaries.")
|
||||
return self.state.step()
|
||||
if grid[x - 1][y - 1] != "":
|
||||
self.state.set_invalid_move("Cell already claimed by another rune.")
|
||||
return self.state.step()
|
||||
|
||||
# Perform imprint
|
||||
symbol = game_state["players"][current_player_key]["symbol"]
|
||||
grid[x - 1][y - 1] = symbol
|
||||
game_state["players"][current_player_key]["imprints"] += 1
|
||||
|
||||
game_state["transcript"].append({"player": current_player_key, "action": f"[Imprint:{x},{y}]"})
|
||||
self.state.add_observation(
|
||||
message=f"{current_player_key} imprinted a rune at ({x},{y}).",
|
||||
observation_type=ta.ObservationType.GAME_MESSAGE
|
||||
)
|
||||
|
||||
elif match_pass:
|
||||
if check_full_grid(grid):
|
||||
self.state.set_invalid_move("Cannot pass: grid fully imprinted.")
|
||||
return self.state.step()
|
||||
game_state["players"][current_player_key]["skips"] += 1
|
||||
game_state["transcript"].append({"player": current_player_key, "action": "[Pass]"})
|
||||
self.state.add_observation(
|
||||
message=f"{current_player_key} chose to pass this turn.",
|
||||
observation_type=ta.ObservationType.GAME_MESSAGE
|
||||
)
|
||||
|
||||
# -------------------- GAME STATE UPDATE --------------------
|
||||
game_state["turn_number"] += 1
|
||||
|
||||
# -------------------- WIN CHECK --------------------
|
||||
def check_win(symbol: str) -> bool:
|
||||
g = grid
|
||||
# Rows, columns
|
||||
for i in range(3):
|
||||
if g[i][0] == g[i][1] == g[i][2] == symbol and symbol != "":
|
||||
return True
|
||||
if g[0][i] == g[1][i] == g[2][i] == symbol and symbol != "":
|
||||
return True
|
||||
# Diagonals
|
||||
if g[0][0] == g[1][1] == g[2][2] == symbol and symbol != "":
|
||||
return True
|
||||
if g[0][2] == g[1][1] == g[2][0] == symbol and symbol != "":
|
||||
return True
|
||||
return False
|
||||
|
||||
current_symbol = game_state["players"][current_player_key]["symbol"]
|
||||
if check_win(current_symbol):
|
||||
game_state["winner"] = current_player_key
|
||||
game_state["players"][current_player_key]["status"] = "won"
|
||||
game_state["players"][opponent_player_key]["status"] = "lost"
|
||||
self.state.set_winner(player_id=current_id, reason=f"{current_player_key} aligned three runes and harnessed the Stone Circle!")
|
||||
return self.state.step()
|
||||
|
||||
# -------------------- DRAW CHECK --------------------
|
||||
if check_full_grid(grid) or game_state["turn_number"] > 9:
|
||||
game_state["draw"] = True
|
||||
self.state.set_draw(reason="The Stone Circle is filled; no alignment achieved.")
|
||||
return self.state.step()
|
||||
|
||||
# -------------------- NEXT TURN --------------------
|
||||
next_player = (current_id + 1) % 2
|
||||
game_state["active_player"] = "PlayerA" if next_player == 0 else "PlayerB"
|
||||
self.state.manually_set_current_player_id(next_player)
|
||||
|
||||
return self.state.step()
|
||||
```
|
||||
Reference in New Issue
Block a user