Add env.py from Openverse builder
This commit is contained in:
240
env.py
Normal file
240
env.py
Normal file
@@ -0,0 +1,240 @@
|
||||
```python
|
||||
import re
|
||||
import random
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
import textarena as ta
|
||||
|
||||
|
||||
class RunicGridEnv(ta.Env):
|
||||
"""
|
||||
TextArena Environment for the Runic Grid game.
|
||||
|
||||
Two rival mystics, the Solar Scribe (☼) and the Lunar Scribe (☽), compete
|
||||
to inscribe runes on a 3×3 grid until one achieves an aligned triad.
|
||||
"""
|
||||
|
||||
def __init__(self, max_turns: int = 9):
|
||||
self.max_turns = max_turns
|
||||
self.grammar_pattern = re.compile(r'^\[Inscribe:[0-2],[0-2]\]$')
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Helper Functions
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _extract_answer_content(self, action: str) -> str:
|
||||
"""
|
||||
Extract the content from inside a \\boxed{} structure in a player's response.
|
||||
|
||||
Args:
|
||||
action: The full text of the player's action, possibly containing \\boxed{}.
|
||||
|
||||
Returns:
|
||||
Extracted inner content string.
|
||||
"""
|
||||
match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
|
||||
if match:
|
||||
return match.group(1).strip()
|
||||
return action.strip()
|
||||
|
||||
def _check_winner(self, board: list[list[Optional[str]]], symbol: str) -> bool:
|
||||
"""Return True if the given symbol has a 3-in-a-line."""
|
||||
# Rows and columns
|
||||
for i in range(3):
|
||||
if all(board[i][c] == symbol for c in range(3)):
|
||||
return True
|
||||
if all(board[r][i] == symbol for r in range(3)):
|
||||
return True
|
||||
# Diagonals
|
||||
if all(board[i][i] == symbol for i in range(3)):
|
||||
return True
|
||||
if all(board[i][2 - i] == symbol for i in range(3)):
|
||||
return True
|
||||
return False
|
||||
|
||||
def _is_full(self, board: list[list[Optional[str]]]) -> bool:
|
||||
return all(cell is not None for row in board for cell in row)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Prompt Generator
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Generate the player prompt message describing current game status.
|
||||
"""
|
||||
|
||||
player_name = "Solar Scribe" if player_id == 0 else "Lunar Scribe"
|
||||
symbol = game_state["players"][player_name]["symbol"]
|
||||
board = game_state["board"]
|
||||
board_str = "\n".join(
|
||||
" ".join(cell if cell is not None else "." for cell in row) for row in board
|
||||
)
|
||||
|
||||
prompt = (
|
||||
f"You are the **{player_name}**, a mystic engraving runes upon the sacred Runic Tablet.\n"
|
||||
f"Your rune symbol: {symbol}\n\n"
|
||||
"Goal:\n"
|
||||
"- Form a continuous line of three of your runes horizontally, vertically, or diagonally before your opponent.\n\n"
|
||||
f"Current Runic Tablet:\n{board_str}\n\n"
|
||||
"Action Format:\n"
|
||||
"- To inscribe, use syntax: \u005cboxed{{[Inscribe:x,y]}}\n"
|
||||
"- Coordinates (x,y) range from 0 to 2.\n\n"
|
||||
"Example Coordinate Map:\n"
|
||||
"(0,0) (0,1) (0,2)\n"
|
||||
"(1,0) (1,1) (1,2)\n"
|
||||
"(2,0) (2,1) (2,2)\n\n"
|
||||
"Example valid response:\n"
|
||||
"I shall inscribe my rune on the center tile for strength.\n"
|
||||
"\\boxed{[Inscribe:1,1]}\n\n"
|
||||
"Example invalid response:\n"
|
||||
"I think I will go middle-right.\n"
|
||||
"\\boxed{[Move:1,2]}\n\n"
|
||||
"Put your final answer within \\boxed{} at the end of your response."
|
||||
)
|
||||
return prompt
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Reset
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||
"""
|
||||
Reset the environment to starting conditions for Runic Grid.
|
||||
|
||||
Args:
|
||||
num_players: Must be 2 for this game.
|
||||
seed: Optional deterministic seed for reproducibility.
|
||||
"""
|
||||
|
||||
if num_players != 2:
|
||||
raise ValueError("Runic Grid only supports 2 players.")
|
||||
|
||||
self.state = ta.TwoPlayerState(
|
||||
num_players=num_players, seed=seed, max_turns=self.max_turns
|
||||
)
|
||||
|
||||
board = [[None for _ in range(3)] for _ in range(3)]
|
||||
players = {
|
||||
"Solar Scribe": {"symbol": "☼", "actions": []},
|
||||
"Lunar Scribe": {"symbol": "☽", "actions": []},
|
||||
}
|
||||
|
||||
# Determine who starts (Solar Scribe always first, deterministic)
|
||||
current_player = "Solar Scribe"
|
||||
|
||||
game_state = {
|
||||
"turn_count": 0,
|
||||
"current_player": current_player,
|
||||
"board": board,
|
||||
"players": players,
|
||||
"winner": None,
|
||||
"outcome": "ongoing",
|
||||
"observations": [],
|
||||
}
|
||||
|
||||
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
|
||||
|
||||
# Add initial game messages
|
||||
self.state.add_observation(
|
||||
"Welcome to Runic Grid: The duel of Solar and Lunar Scribes.",
|
||||
ta.ObservationType.GAME_MESSAGE,
|
||||
)
|
||||
self.state.add_observation(
|
||||
f"The {current_player} begins and will place the first rune.",
|
||||
ta.ObservationType.GAME_MESSAGE,
|
||||
)
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Step
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||
"""
|
||||
Execute a single player step in the Runic Grid game.
|
||||
|
||||
Args:
|
||||
action: The player's proposed move text, including reasoning and \\boxed{} token.
|
||||
|
||||
Returns:
|
||||
(done, info): True if terminal, info dictionary from framework.
|
||||
"""
|
||||
|
||||
player_id = self.state.current_player_id
|
||||
player_name = "Solar Scribe" if player_id == 0 else "Lunar Scribe"
|
||||
opponent_id = 1 - player_id
|
||||
opponent_name = "Lunar Scribe" if player_id == 0 else "Solar Scribe"
|
||||
symbol = self.state.game_state["players"][player_name]["symbol"]
|
||||
|
||||
# Log the raw action
|
||||
self.state.add_observation(
|
||||
action,
|
||||
ta.ObservationType.PLAYER_ACTION,
|
||||
from_id=player_id,
|
||||
to_id=-1,
|
||||
)
|
||||
|
||||
# Parse and validate
|
||||
content = self._extract_answer_content(action)
|
||||
if not content or not isinstance(content, str):
|
||||
self.state.set_invalid_move(reason="Malformed boxed syntax")
|
||||
return self.state.step()
|
||||
|
||||
match = self.grammar_pattern.match(content)
|
||||
if not match:
|
||||
self.state.set_invalid_move(reason="Action does not match grammar [Inscribe:x,y]")
|
||||
return self.state.step()
|
||||
|
||||
try:
|
||||
x_y = content.strip("[]").split(":")[1]
|
||||
x, y = map(int, x_y.split(","))
|
||||
except Exception:
|
||||
self.state.set_invalid_move(reason="Malformed coordinates")
|
||||
return self.state.step()
|
||||
|
||||
board = self.state.game_state["board"]
|
||||
|
||||
if x not in range(3) or y not in range(3):
|
||||
self.state.set_invalid_move(reason="Coordinates out of bounds")
|
||||
return self.state.step()
|
||||
|
||||
if board[x][y] is not None:
|
||||
self.state.set_invalid_move(reason="Tile already inscribed")
|
||||
return self.state.step()
|
||||
|
||||
# Apply move
|
||||
board[x][y] = symbol
|
||||
self.state.game_state["turn_count"] += 1
|
||||
self.state.game_state["players"][player_name]["actions"].append(content)
|
||||
self.state.game_state["observations"].append(
|
||||
{"player": player_name, "action": content}
|
||||
)
|
||||
self.state.game_state["board"] = board
|
||||
self.state.add_observation(
|
||||
f"{player_name} inscribed {symbol} at ({x},{y}).",
|
||||
ta.ObservationType.GAME_MESSAGE,
|
||||
)
|
||||
|
||||
# Check terminal conditions
|
||||
if self._check_winner(board, symbol):
|
||||
self.state.game_state["winner"] = player_name
|
||||
self.state.game_state["outcome"] = "win"
|
||||
self.state.set_winner(player_id=player_id, reason=f"{player_name} achieved a triad alignment.")
|
||||
return self.state.step()
|
||||
|
||||
if self._is_full(board):
|
||||
self.state.game_state["outcome"] = "draw"
|
||||
self.state.set_draw(reason="Runic Tablet fully inscribed with no alignment.")
|
||||
return self.state.step()
|
||||
|
||||
# Otherwise continue
|
||||
self.state.game_state["current_player"] = opponent_name
|
||||
done, info = self.state.step()
|
||||
return done, info
|
||||
|
||||
# -------------------------------------------------------------------------
|
||||
# Close
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
def close(self) -> Tuple[Dict, Dict]:
|
||||
return self.state.rewards, self.state.game_info
|
||||
```
|
||||
Reference in New Issue
Block a user