Add env.py from Openverse builder
This commit is contained in:
233
env.py
Normal file
233
env.py
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
```python
|
||||||
|
import re
|
||||||
|
from typing import Any, Dict, Optional, Tuple, List
|
||||||
|
|
||||||
|
import textarena as ta
|
||||||
|
|
||||||
|
|
||||||
|
class StarGridDuelEnv(ta.Env):
|
||||||
|
"""
|
||||||
|
Implementation of the 'StarGrid Duel' game environment.
|
||||||
|
Deterministic two-player strategy game where navigators place energy beacons
|
||||||
|
on a 3x3 grid aiming to align three of their own beacons in a row, column, or diagonal.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, max_turns: int = 9):
|
||||||
|
self.max_turns = max_turns
|
||||||
|
# Compile regex patterns once
|
||||||
|
self.place_pattern = re.compile(r"^\[Place:\s*(A|B|C)(1|2|3)\]$")
|
||||||
|
# Cell labels in order
|
||||||
|
self.all_cells = [f"{r}{c}" for r in "ABC" for c in "123"]
|
||||||
|
|
||||||
|
# ------------------------ Helper Methods ------------------------
|
||||||
|
|
||||||
|
def _extract_answer_content(self, action: str) -> str:
|
||||||
|
"""
|
||||||
|
Extract content inside \boxed{} for machine parsing.
|
||||||
|
Falls back to entire content (trimmed) if no match.
|
||||||
|
"""
|
||||||
|
match = re.search(r"\\boxed\{\{([^}]*)\}\}", action)
|
||||||
|
if not match: # Also support single braces in case formatting differs
|
||||||
|
match = re.search(r"\\boxed\{([^}]*)\}", action)
|
||||||
|
if match:
|
||||||
|
return match.group(1).strip()
|
||||||
|
return action.strip()
|
||||||
|
|
||||||
|
def _check_victory(self, board: Dict[str, Optional[str]], color: str) -> bool:
|
||||||
|
"""Check all 8 winning line combinations for the specified color."""
|
||||||
|
lines = [
|
||||||
|
["A1", "A2", "A3"],
|
||||||
|
["B1", "B2", "B3"],
|
||||||
|
["C1", "C2", "C3"],
|
||||||
|
["A1", "B1", "C1"],
|
||||||
|
["A2", "B2", "C2"],
|
||||||
|
["A3", "B3", "C3"],
|
||||||
|
["A1", "B2", "C3"],
|
||||||
|
["A3", "B2", "C1"],
|
||||||
|
]
|
||||||
|
for line in lines:
|
||||||
|
if all(board[cell] == color for cell in line):
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def _generate_board_str(self, board: Dict[str, Optional[str]]) -> str:
|
||||||
|
"""Render the 3x3 StarGrid as a simple text table."""
|
||||||
|
rows = []
|
||||||
|
for r in "ABC":
|
||||||
|
row_cells = []
|
||||||
|
for c in "123":
|
||||||
|
val = board[f"{r}{c}"]
|
||||||
|
if val is None:
|
||||||
|
row_cells.append(f"{r}{c}")
|
||||||
|
else:
|
||||||
|
symbol = "B" if val == "Blue" else "C"
|
||||||
|
row_cells.append(symbol)
|
||||||
|
rows.append(" | ".join(row_cells))
|
||||||
|
return "\n".join(rows)
|
||||||
|
|
||||||
|
def _get_active_player_label(self, player_id: int) -> str:
|
||||||
|
return "Navigator Alpha" if player_id == 0 else "Navigator Beta"
|
||||||
|
|
||||||
|
def _cell_valid(self, cell: str) -> bool:
|
||||||
|
return cell in self.all_cells
|
||||||
|
|
||||||
|
# ------------------------ Core Env API ------------------------
|
||||||
|
|
||||||
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||||
|
"""
|
||||||
|
Resets the environment to an initial state.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_players: Number of players in the game. Must be 2.
|
||||||
|
seed: Optional seed for determinism.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
None (or self.state for compatibility)
|
||||||
|
"""
|
||||||
|
if num_players != 2:
|
||||||
|
raise ValueError("StarGrid Duel requires exactly 2 players.")
|
||||||
|
|
||||||
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||||||
|
|
||||||
|
empty_board = {cell: None for cell in self.all_cells}
|
||||||
|
game_state: Dict[str, Any] = {
|
||||||
|
"turn_index": 0,
|
||||||
|
"active_player": "A",
|
||||||
|
"board": empty_board,
|
||||||
|
"player_symbols": {"A": "Blue", "B": "Crimson"},
|
||||||
|
"move_history": [],
|
||||||
|
"winner": None,
|
||||||
|
"is_draw": False,
|
||||||
|
"observations": {"A": "", "B": ""},
|
||||||
|
"seed": seed,
|
||||||
|
}
|
||||||
|
|
||||||
|
role_mapping = {0: "Navigator Alpha", 1: "Navigator Beta"}
|
||||||
|
|
||||||
|
# Initialize internal game state
|
||||||
|
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
||||||
|
|
||||||
|
# Onboarding observations
|
||||||
|
onboarding_msg = (
|
||||||
|
"Welcome to StarGrid Duel!\n"
|
||||||
|
"Two navigators will alternately place energy beacons on the 3×3 StarGrid.\n"
|
||||||
|
"Your mission is to align three of your beacons in a line before your rival."
|
||||||
|
)
|
||||||
|
self.state.add_observation(onboarding_msg, ta.ObservationType.GAME_MESSAGE)
|
||||||
|
|
||||||
|
board_msg = self._generate_board_str(empty_board)
|
||||||
|
self.state.add_observation(board_msg, ta.ObservationType.GAME_BOARD)
|
||||||
|
|
||||||
|
return self.state
|
||||||
|
|
||||||
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||||
|
"""
|
||||||
|
Perform a single environment step for the current player.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
action: The action text submitted by the current player.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A tuple (done, info) where:
|
||||||
|
done: True if the episode has concluded
|
||||||
|
info: A ta.Info object with auxiliary details
|
||||||
|
"""
|
||||||
|
player_id = self.state.current_player_id
|
||||||
|
player_key = "A" if player_id == 0 else "B"
|
||||||
|
player_color = self.state.game_state["player_symbols"][player_key]
|
||||||
|
|
||||||
|
# 1. Log the raw player action
|
||||||
|
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
||||||
|
|
||||||
|
# 2. Extract the boxed content
|
||||||
|
extracted = self._extract_answer_content(action)
|
||||||
|
|
||||||
|
# 3. Validate the action pattern
|
||||||
|
match = self.place_pattern.match(extracted)
|
||||||
|
if not match:
|
||||||
|
self.state.set_invalid_move(reason="MalformedAction: The action must strictly follow '[Place: <cell_id>]' format.")
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
cell_id = f"{match.group(1)}{match.group(2)}"
|
||||||
|
|
||||||
|
if not self._cell_valid(cell_id):
|
||||||
|
self.state.set_invalid_move(reason=f"CellOutOfRange: {cell_id} is not a valid StarGrid coordinate.")
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
board = self.state.game_state["board"]
|
||||||
|
if board[cell_id] is not None:
|
||||||
|
self.state.set_invalid_move(reason=f"CellOccupied: {cell_id} is already occupied.")
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# 4. Execute valid action: place beacon
|
||||||
|
board[cell_id] = player_color
|
||||||
|
self.state.game_state["board"] = board
|
||||||
|
|
||||||
|
# Record move
|
||||||
|
self.state.game_state["move_history"].append(
|
||||||
|
{"player": player_key, "action": extracted}
|
||||||
|
)
|
||||||
|
|
||||||
|
# Increment turn index and rotate active player (unless terminal)
|
||||||
|
self.state.game_state["turn_index"] += 1
|
||||||
|
|
||||||
|
# 5. Check for victory
|
||||||
|
if self._check_victory(board, player_color):
|
||||||
|
self.state.game_state["winner"] = player_key
|
||||||
|
winner_str = self._get_active_player_label(player_id)
|
||||||
|
self.state.set_winner(player_id=player_id, reason=f"{winner_str} aligned three energy beacons in a line.")
|
||||||
|
board_str = self._generate_board_str(board)
|
||||||
|
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# 6. Check for draw (grid filled, no winner)
|
||||||
|
if all(v is not None for v in board.values()):
|
||||||
|
self.state.game_state["is_draw"] = True
|
||||||
|
self.state.set_draw(reason="All cells filled with no aligned beacons—a balanced standoff.")
|
||||||
|
board_str = self._generate_board_str(board)
|
||||||
|
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# 7. Update board observation for next player
|
||||||
|
board_str = self._generate_board_str(board)
|
||||||
|
self.state.add_observation(board_str, ta.ObservationType.GAME_BOARD)
|
||||||
|
|
||||||
|
# 8. Rotate turn
|
||||||
|
self.state.game_state["active_player"] = "B" if player_key == "A" else "A"
|
||||||
|
|
||||||
|
# Proceed to next step
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# -------------------- Player Prompt Generation --------------------
|
||||||
|
|
||||||
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Generate a detailed prompt for each player at the start or on every turn.
|
||||||
|
"""
|
||||||
|
role = "Navigator Alpha" if player_id == 0 else "Navigator Beta"
|
||||||
|
color = game_state["player_symbols"]["A" if player_id == 0 else "B"]
|
||||||
|
active_pid = game_state["active_player"]
|
||||||
|
active_label = "Navigator Alpha" if active_pid == "A" else "Navigator Beta"
|
||||||
|
|
||||||
|
board_repr = self._generate_board_str(game_state["board"])
|
||||||
|
open_cells = [cell for cell, val in game_state["board"].items() if val is None]
|
||||||
|
allowed_actions = [f"[Place: {cell}]" for cell in open_cells]
|
||||||
|
|
||||||
|
prompt = (
|
||||||
|
f"You are {role}, commanding the {color} energy.\n"
|
||||||
|
"Your goal: deploy energy beacons across the StarGrid to align three of your own in a straight line before your opponent.\n\n"
|
||||||
|
f"Current Board:\n{board_repr}\n\n"
|
||||||
|
f"Your Color: {color}\nActive Navigator: {active_label}\n\n"
|
||||||
|
f"Allowed Actions:\nFormat: [Place: <cell_id>]\nAvailable cells: {', '.join(open_cells) if open_cells else 'None'}\n\n"
|
||||||
|
"Response Format:\n"
|
||||||
|
"You may describe your reasoning, then finalize your move as:\n\n"
|
||||||
|
"Example valid response:\n"
|
||||||
|
"I will claim the center of the grid to control diagonals.\n"
|
||||||
|
"\\boxed{{[Place: B2]}}\n\n"
|
||||||
|
"Example invalid response:\n"
|
||||||
|
"I think I'll move now.\n"
|
||||||
|
"\\boxed{{[Move: B2]}}\n\n"
|
||||||
|
"Put your final answer within \\boxed{{}} at the end of your response."
|
||||||
|
)
|
||||||
|
return prompt
|
||||||
|
```
|
||||||
Reference in New Issue
Block a user