Files
testtest8/env.py

332 lines
14 KiB
Python

```python
import re
import random
from typing import Any, Dict, List, Optional, Tuple
import textarena as ta
class EchoMazeEnv(ta.Env):
"""
EchoMaze: The Labyrinth Duel
Deterministic, two-player, turn-based maze exploration game.
"""
def __init__(self, max_turns: int = 60, maze_size: int = 9):
"""
Initialize environment config (not game-state).
"""
self.max_turns = max_turns
self.maze_size = maze_size
self.valid_actions = [
"[Scan]",
"[Mark]",
"[Rest]",
"[Move: North]",
"[Move: South]",
"[Move: East]",
"[Move: West]",
]
# ----------------------------------------------------------------------
# Reset
# ----------------------------------------------------------------------
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
Args:
num_players: Must be 2
seed: random seed for determinism
"""
if num_players != 2:
raise ValueError("EchoMaze requires exactly 2 players (Sun and Moon).")
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
random.seed(seed)
# Generate base maze using seed for deterministic layout
maze_layout, exit_location, sun_start, moon_start = self._generate_maze(seed)
# Build game_state following Stage 1 schema
game_state: Dict[str, Any] = {
"maze_seed": seed,
"turn_count": 0,
"max_turns": self.max_turns,
"maze_layout": maze_layout,
"exit_location": exit_location,
"players": {
"Sun": {
"position": sun_start,
"markers": [],
"focus": 5,
"observations": [
f"Turn 1: Started at {tuple(sun_start)}."
],
"last_action": None,
},
"Moon": {
"position": moon_start,
"markers": [],
"focus": 5,
"observations": [
f"Turn 1: Started at {tuple(moon_start)}."
],
"last_action": None,
},
},
"public_transcript": [],
"winner": None,
"is_terminal": False,
"invalid_move_reason": None,
}
# Reset game state
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt,
role_mapping={0: "Sun", 1: "Moon"})
# Announce
self.state.add_observation("Welcome to EchoMaze: The Labyrinth Duel!", ta.ObservationType.GAME_MESSAGE)
self.state.add_observation(f"Exit Glyph hidden at {tuple(exit_location)} (secretly known to system).",
ta.ObservationType.GAME_MESSAGE)
return self.state
# ----------------------------------------------------------------------
# Step
# ----------------------------------------------------------------------
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
"""
player_id = self.state.current_player_id
player_name = "Sun" if player_id == 0 else "Moon"
self.state.add_observation(
action,
ta.ObservationType.PLAYER_ACTION,
from_id=player_id,
to_id=-1,
)
extracted_action = self._extract_answer_content(action)
current_state = self.state.game_state
player_data = current_state["players"][player_name]
# If game already terminal
if current_state["winner"] or current_state["is_terminal"]:
return self.state.step()
# --- Validation ---
if extracted_action not in self.valid_actions:
self.state.set_invalid_move("Unrecognized action syntax.")
current_state["invalid_move_reason"] = "Unrecognized action syntax."
current_state["is_terminal"] = True
return self.state.step()
if player_data["focus"] <= 0 and extracted_action != "[Rest]":
self.state.set_invalid_move("Insufficient focus to perform action.")
current_state["invalid_move_reason"] = "Insufficient focus to perform action."
current_state["is_terminal"] = True
return self.state.step()
# Execute effect
result_message = ""
if extracted_action.startswith("[Move:"):
direction = extracted_action.split(":")[1].strip(" ]")
result_message = self._process_move(player_name, direction, current_state)
elif extracted_action == "[Scan]":
result_message = self._process_scan(player_name, current_state)
player_data["focus"] -= 1
elif extracted_action == "[Mark]":
result_message = self._process_mark(player_name, current_state)
player_data["focus"] -= 1
elif extracted_action == "[Rest]":
result_message = self._process_rest(player_name, current_state)
player_data["last_action"] = extracted_action
current_state["public_transcript"].append(f"{player_name}: {extracted_action}")
current_state["turn_count"] += 1
# --- Check Terminal Conditions after action ---
exit_loc = current_state["exit_location"]
sun_pos = current_state["players"]["Sun"]["position"]
moon_pos = current_state["players"]["Moon"]["position"]
if sun_pos == exit_loc and moon_pos == exit_loc:
self.state.set_draw("Both players reached the Exit Glyph simultaneously.")
current_state["winner"] = "Draw"
current_state["is_terminal"] = True
elif sun_pos == exit_loc:
self.state.set_winner(0, "Sun reached the Exit Glyph.")
current_state["winner"] = "Sun"
current_state["is_terminal"] = True
elif moon_pos == exit_loc:
self.state.set_winner(1, "Moon reached the Exit Glyph.")
current_state["winner"] = "Moon"
current_state["is_terminal"] = True
elif current_state["turn_count"] >= self.max_turns:
sun_dist = self._manhattan_distance(sun_pos, exit_loc)
moon_dist = self._manhattan_distance(moon_pos, exit_loc)
if sun_dist < moon_dist:
self.state.set_winner(0, "Sun is closer to the Exit Glyph after max turns.")
current_state["winner"] = "Sun"
elif moon_dist < sun_dist:
self.state.set_winner(1, "Moon is closer to the Exit Glyph after max turns.")
current_state["winner"] = "Moon"
else:
self.state.set_draw("Equal distance to Exit Glyph after max turns.")
current_state["winner"] = "Draw"
current_state["is_terminal"] = True
# Log observation message
self.state.add_observation(result_message, ta.ObservationType.GAME_MESSAGE)
return self.state.step()
# ----------------------------------------------------------------------
# Helpers
# ----------------------------------------------------------------------
def _generate_maze(self, seed: int):
"""
Produces deterministic maze layout with walls (#), open cells (.), Exit (E).
Ensures reproducibility.
"""
size = self.maze_size
random.seed(seed)
maze = [["#" for _ in range(size)] for _ in range(size)]
# Create random open cells
for i in range(1, size - 1):
for j in range(1, size - 1):
maze[i][j] = "." if random.random() > 0.25 else "#"
# Place exit
exit_x, exit_y = random.randint(1, size - 2), random.randint(1, size - 2)
maze[exit_x][exit_y] = "E"
# Find top-left open for Sun
sun_start = self._find_open_cell(maze, from_top=True)
moon_start = self._find_open_cell(maze, from_top=False)
maze[sun_start[0]][sun_start[1]] = "S" # Mark starting
maze[moon_start[0]][moon_start[1]] = "M"
return maze, [exit_x, exit_y], sun_start, moon_start
def _find_open_cell(self, maze: List[List[str]], from_top: bool = True) -> List[int]:
size = len(maze)
row_range = range(size) if from_top else range(size - 1, -1, -1)
for i in row_range:
for j in row_range:
if maze[i][j] == ".":
return [i, j]
# Fallback if none open
return [1, 1] if from_top else [size - 2, size - 2]
def _extract_answer_content(self, action: str) -> str:
"""Extract content from \\boxed{}"""
match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
def _manhattan_distance(self, a: List[int], b: List[int]) -> int:
return abs(a[0] - b[0]) + abs(a[1] - b[1])
def _process_move(self, player: str, direction: str, game_state: Dict[str, Any]) -> str:
pos = game_state["players"][player]["position"]
x, y = pos
dx, dy = 0, 0
if direction == "North":
dx = -1
elif direction == "South":
dx = 1
elif direction == "East":
dy = 1
elif direction == "West":
dy = -1
new_x, new_y = x + dx, y + dy
maze = game_state["maze_layout"]
if not (0 <= new_x < len(maze) and 0 <= new_y < len(maze[0])):
self.state.set_invalid_move("Cannot move outside bounds.")
game_state["invalid_move_reason"] = "Cannot move outside bounds."
game_state["is_terminal"] = True
return f"{player} attempted to move outside bounds."
if maze[new_x][new_y] == "#":
self.state.set_invalid_move("Cannot move through wall or outside bounds.")
game_state["invalid_move_reason"] = "Cannot move through wall."
game_state["is_terminal"] = True
return f"{player} tried to move into a wall."
game_state["players"][player]["position"] = [new_x, new_y]
game_state["players"][player]["focus"] -= 1
return f"{player} moved {direction} to {(new_x, new_y)}."
def _process_scan(self, player: str, game_state: Dict[str, Any]) -> str:
pos = game_state["players"][player]["position"]
maze = game_state["maze_layout"]
dirs = {
"North": (pos[0] - 1, pos[1]),
"South": (pos[0] + 1, pos[1]),
"East": (pos[0], pos[1] + 1),
"West": (pos[0], pos[1] - 1),
}
result = {}
for dir_name, (x, y) in dirs.items():
if 0 <= x < len(maze) and 0 <= y < len(maze[0]):
result[dir_name] = "Wall" if maze[x][y] == "#" else "Open"
else:
result[dir_name] = "Out of bounds"
obs_msg = ", ".join(f"{k}: {v}" for k, v in result.items())
return f"{player} scanned surroundings. {obs_msg}"
def _process_mark(self, player: str, game_state: Dict[str, Any]) -> str:
pos = game_state["players"][player]["position"]
markers = game_state["players"][player]["markers"]
if pos not in markers:
markers.append(pos.copy())
return f"{player} marked the cell at {tuple(pos)}."
def _process_rest(self, player: str, game_state: Dict[str, Any]) -> str:
game_state["players"][player]["focus"] += 1
return f"{player} rested and recovered 1 Focus (now {game_state['players'][player]['focus']})."
# ----------------------------------------------------------------------
# Prompt
# ----------------------------------------------------------------------
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""
Generates player prompt at start of game.
"""
player_name = "Sun" if player_id == 0 else "Moon"
pos = tuple(game_state["players"][player_name]["position"])
focus = game_state["players"][player_name]["focus"]
intro = (
f"You are **Player {player_name}**, an explorer within the mystic underground labyrinth of EchoMaze.\n"
f"Your current position is {pos} with Focus = {focus}.\n"
"Your objective is to reach the Exit Glyph before your rival.\n"
"Actions must be exactly one of:\n"
" - [Move: North], [Move: South], [Move: East], [Move: West]\n"
" - [Scan] — Reveal walls around you.\n"
" - [Mark] — Leave a marker in this cell.\n"
" - [Rest] — Skip turn, regain 1 Focus.\n\n"
"Only one action per turn. Place it inside \\boxed{} like so:\n"
"Example valid response:\n"
"I decide to move north.\n"
"\\boxed{[Move: North]}\n\n"
"Example invalid response:\n"
"I will move upward.\n"
"\\boxed{[Move: Up]} <-- invalid action\n"
)
return intro
# ----------------------------------------------------------------------
# Framework helpers
# ----------------------------------------------------------------------
def get_observation(self) -> Tuple[int, List]:
"""Return observation for current player"""
return self.state.current_player_id, self.state.game_state
def close(self) -> Tuple[Dict, Dict]:
"""Return final info"""
return self.state.rewards, self.state.game_state
```