Initial commit from Openverse UI

2025-11-22 02:56:57 +00:00
commit 4807acee95
3 changed files with 557 additions and 0 deletions
--- a/env.py
+++ b/env.py
@@ -0,0 +1,332 @@
+```python
+import re
+import random
+from typing import Any, Dict, List, Optional, Tuple
+
+import textarena as ta
+
+
+class EchoMazeEnv(ta.Env):
+    """
+    EchoMaze: The Labyrinth Duel
+    Deterministic, two-player, turn-based maze exploration game.
+    """
+
+    def __init__(self, max_turns: int = 60, maze_size: int = 9):
+        """
+        Initialize environment config (not game-state).
+        """
+        self.max_turns = max_turns
+        self.maze_size = maze_size
+        self.valid_actions = [
+            "[Scan]",
+            "[Mark]",
+            "[Rest]",
+            "[Move: North]",
+            "[Move: South]",
+            "[Move: East]",
+            "[Move: West]",
+        ]
+
+    # ----------------------------------------------------------------------
+    # Reset
+    # ----------------------------------------------------------------------
+    def reset(self, num_players: int, seed: Optional[int] = None):
+        """
+        Resets the environment to an initial state.
+
+        Args:
+            num_players: Must be 2
+            seed: random seed for determinism
+        """
+        if num_players != 2:
+            raise ValueError("EchoMaze requires exactly 2 players (Sun and Moon).")
+
+        self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
+        random.seed(seed)
+
+        # Generate base maze using seed for deterministic layout
+        maze_layout, exit_location, sun_start, moon_start = self._generate_maze(seed)
+
+        # Build game_state following Stage 1 schema
+        game_state: Dict[str, Any] = {
+            "maze_seed": seed,
+            "turn_count": 0,
+            "max_turns": self.max_turns,
+            "maze_layout": maze_layout,
+            "exit_location": exit_location,
+            "players": {
+                "Sun": {
+                    "position": sun_start,
+                    "markers": [],
+                    "focus": 5,
+                    "observations": [
+                        f"Turn 1: Started at {tuple(sun_start)}."
+                    ],
+                    "last_action": None,
+                },
+                "Moon": {
+                    "position": moon_start,
+                    "markers": [],
+                    "focus": 5,
+                    "observations": [
+                        f"Turn 1: Started at {tuple(moon_start)}."
+                    ],
+                    "last_action": None,
+                },
+            },
+            "public_transcript": [],
+            "winner": None,
+            "is_terminal": False,
+            "invalid_move_reason": None,
+        }
+
+        # Reset game state
+        self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt,
+                         role_mapping={0: "Sun", 1: "Moon"})
+
+        # Announce
+        self.state.add_observation("Welcome to EchoMaze: The Labyrinth Duel!", ta.ObservationType.GAME_MESSAGE)
+        self.state.add_observation(f"Exit Glyph hidden at {tuple(exit_location)} (secretly known to system).",
+                                   ta.ObservationType.GAME_MESSAGE)
+        return self.state
+
+    # ----------------------------------------------------------------------
+    # Step
+    # ----------------------------------------------------------------------
+    def step(self, action: str) -> Tuple[bool, ta.Info]:
+        """
+        Perform a single environment step for the current player.
+        """
+        player_id = self.state.current_player_id
+        player_name = "Sun" if player_id == 0 else "Moon"
+
+        self.state.add_observation(
+            action,
+            ta.ObservationType.PLAYER_ACTION,
+            from_id=player_id,
+            to_id=-1,
+        )
+
+        extracted_action = self._extract_answer_content(action)
+        current_state = self.state.game_state
+        player_data = current_state["players"][player_name]
+
+        # If game already terminal
+        if current_state["winner"] or current_state["is_terminal"]:
+            return self.state.step()
+
+        # --- Validation ---
+        if extracted_action not in self.valid_actions:
+            self.state.set_invalid_move("Unrecognized action syntax.")
+            current_state["invalid_move_reason"] = "Unrecognized action syntax."
+            current_state["is_terminal"] = True
+            return self.state.step()
+
+        if player_data["focus"] <= 0 and extracted_action != "[Rest]":
+            self.state.set_invalid_move("Insufficient focus to perform action.")
+            current_state["invalid_move_reason"] = "Insufficient focus to perform action."
+            current_state["is_terminal"] = True
+            return self.state.step()
+
+        # Execute effect
+        result_message = ""
+        if extracted_action.startswith("[Move:"):
+            direction = extracted_action.split(":")[1].strip(" ]")
+            result_message = self._process_move(player_name, direction, current_state)
+        elif extracted_action == "[Scan]":
+            result_message = self._process_scan(player_name, current_state)
+            player_data["focus"] -= 1
+        elif extracted_action == "[Mark]":
+            result_message = self._process_mark(player_name, current_state)
+            player_data["focus"] -= 1
+        elif extracted_action == "[Rest]":
+            result_message = self._process_rest(player_name, current_state)
+
+        player_data["last_action"] = extracted_action
+        current_state["public_transcript"].append(f"{player_name}: {extracted_action}")
+        current_state["turn_count"] += 1
+
+        # --- Check Terminal Conditions after action ---
+        exit_loc = current_state["exit_location"]
+        sun_pos = current_state["players"]["Sun"]["position"]
+        moon_pos = current_state["players"]["Moon"]["position"]
+
+        if sun_pos == exit_loc and moon_pos == exit_loc:
+            self.state.set_draw("Both players reached the Exit Glyph simultaneously.")
+            current_state["winner"] = "Draw"
+            current_state["is_terminal"] = True
+        elif sun_pos == exit_loc:
+            self.state.set_winner(0, "Sun reached the Exit Glyph.")
+            current_state["winner"] = "Sun"
+            current_state["is_terminal"] = True
+        elif moon_pos == exit_loc:
+            self.state.set_winner(1, "Moon reached the Exit Glyph.")
+            current_state["winner"] = "Moon"
+            current_state["is_terminal"] = True
+        elif current_state["turn_count"] >= self.max_turns:
+            sun_dist = self._manhattan_distance(sun_pos, exit_loc)
+            moon_dist = self._manhattan_distance(moon_pos, exit_loc)
+            if sun_dist < moon_dist:
+                self.state.set_winner(0, "Sun is closer to the Exit Glyph after max turns.")
+                current_state["winner"] = "Sun"
+            elif moon_dist < sun_dist:
+                self.state.set_winner(1, "Moon is closer to the Exit Glyph after max turns.")
+                current_state["winner"] = "Moon"
+            else:
+                self.state.set_draw("Equal distance to Exit Glyph after max turns.")
+                current_state["winner"] = "Draw"
+            current_state["is_terminal"] = True
+
+        # Log observation message
+        self.state.add_observation(result_message, ta.ObservationType.GAME_MESSAGE)
+        return self.state.step()
+
+    # ----------------------------------------------------------------------
+    # Helpers
+    # ----------------------------------------------------------------------
+    def _generate_maze(self, seed: int):
+        """
+        Produces deterministic maze layout with walls (#), open cells (.), Exit (E).
+        Ensures reproducibility.
+        """
+        size = self.maze_size
+        random.seed(seed)
+        maze = [["#" for _ in range(size)] for _ in range(size)]
+
+        # Create random open cells
+        for i in range(1, size - 1):
+            for j in range(1, size - 1):
+                maze[i][j] = "." if random.random() > 0.25 else "#"
+
+        # Place exit
+        exit_x, exit_y = random.randint(1, size - 2), random.randint(1, size - 2)
+        maze[exit_x][exit_y] = "E"
+
+        # Find top-left open for Sun
+        sun_start = self._find_open_cell(maze, from_top=True)
+        moon_start = self._find_open_cell(maze, from_top=False)
+        maze[sun_start[0]][sun_start[1]] = "S"  # Mark starting
+        maze[moon_start[0]][moon_start[1]] = "M"
+
+        return maze, [exit_x, exit_y], sun_start, moon_start
+
+    def _find_open_cell(self, maze: List[List[str]], from_top: bool = True) -> List[int]:
+        size = len(maze)
+        row_range = range(size) if from_top else range(size - 1, -1, -1)
+        for i in row_range:
+            for j in row_range:
+                if maze[i][j] == ".":
+                    return [i, j]
+        # Fallback if none open
+        return [1, 1] if from_top else [size - 2, size - 2]
+
+    def _extract_answer_content(self, action: str) -> str:
+        """Extract content from \\boxed{}"""
+        match = re.search(r"\\boxed\{([^}]*)\}", action, re.DOTALL)
+        if match:
+            return match.group(1).strip()
+        return action.strip()
+
+    def _manhattan_distance(self, a: List[int], b: List[int]) -> int:
+        return abs(a[0] - b[0]) + abs(a[1] - b[1])
+
+    def _process_move(self, player: str, direction: str, game_state: Dict[str, Any]) -> str:
+        pos = game_state["players"][player]["position"]
+        x, y = pos
+        dx, dy = 0, 0
+        if direction == "North":
+            dx = -1
+        elif direction == "South":
+            dx = 1
+        elif direction == "East":
+            dy = 1
+        elif direction == "West":
+            dy = -1
+
+        new_x, new_y = x + dx, y + dy
+        maze = game_state["maze_layout"]
+        if not (0 <= new_x < len(maze) and 0 <= new_y < len(maze[0])):
+            self.state.set_invalid_move("Cannot move outside bounds.")
+            game_state["invalid_move_reason"] = "Cannot move outside bounds."
+            game_state["is_terminal"] = True
+            return f"{player} attempted to move outside bounds."
+        if maze[new_x][new_y] == "#":
+            self.state.set_invalid_move("Cannot move through wall or outside bounds.")
+            game_state["invalid_move_reason"] = "Cannot move through wall."
+            game_state["is_terminal"] = True
+            return f"{player} tried to move into a wall."
+
+        game_state["players"][player]["position"] = [new_x, new_y]
+        game_state["players"][player]["focus"] -= 1
+        return f"{player} moved {direction} to {(new_x, new_y)}."
+
+    def _process_scan(self, player: str, game_state: Dict[str, Any]) -> str:
+        pos = game_state["players"][player]["position"]
+        maze = game_state["maze_layout"]
+        dirs = {
+            "North": (pos[0] - 1, pos[1]),
+            "South": (pos[0] + 1, pos[1]),
+            "East": (pos[0], pos[1] + 1),
+            "West": (pos[0], pos[1] - 1),
+        }
+        result = {}
+        for dir_name, (x, y) in dirs.items():
+            if 0 <= x < len(maze) and 0 <= y < len(maze[0]):
+                result[dir_name] = "Wall" if maze[x][y] == "#" else "Open"
+            else:
+                result[dir_name] = "Out of bounds"
+        obs_msg = ", ".join(f"{k}: {v}" for k, v in result.items())
+        return f"{player} scanned surroundings. {obs_msg}"
+
+    def _process_mark(self, player: str, game_state: Dict[str, Any]) -> str:
+        pos = game_state["players"][player]["position"]
+        markers = game_state["players"][player]["markers"]
+        if pos not in markers:
+            markers.append(pos.copy())
+        return f"{player} marked the cell at {tuple(pos)}."
+
+    def _process_rest(self, player: str, game_state: Dict[str, Any]) -> str:
+        game_state["players"][player]["focus"] += 1
+        return f"{player} rested and recovered 1 Focus (now {game_state['players'][player]['focus']})."
+
+    # ----------------------------------------------------------------------
+    # Prompt
+    # ----------------------------------------------------------------------
+    def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
+        """
+        Generates player prompt at start of game.
+        """
+        player_name = "Sun" if player_id == 0 else "Moon"
+        pos = tuple(game_state["players"][player_name]["position"])
+        focus = game_state["players"][player_name]["focus"]
+        intro = (
+            f"You are **Player {player_name}**, an explorer within the mystic underground labyrinth of EchoMaze.\n"
+            f"Your current position is {pos} with Focus = {focus}.\n"
+            "Your objective is to reach the Exit Glyph before your rival.\n"
+            "Actions must be exactly one of:\n"
+            "  - [Move: North], [Move: South], [Move: East], [Move: West]\n"
+            "  - [Scan]  — Reveal walls around you.\n"
+            "  - [Mark]  — Leave a marker in this cell.\n"
+            "  - [Rest]  — Skip turn, regain 1 Focus.\n\n"
+            "Only one action per turn. Place it inside \\boxed{} like so:\n"
+            "Example valid response:\n"
+            "I decide to move north.\n"
+            "\\boxed{[Move: North]}\n\n"
+            "Example invalid response:\n"
+            "I will move upward.\n"
+            "\\boxed{[Move: Up]}    <-- invalid action\n"
+        )
+        return intro
+
+    # ----------------------------------------------------------------------
+    # Framework helpers
+    # ----------------------------------------------------------------------
+    def get_observation(self) -> Tuple[int, List]:
+        """Return observation for current player"""
+        return self.state.current_player_id, self.state.game_state
+
+    def close(self) -> Tuple[Dict, Dict]:
+        """Return final info"""
+        return self.state.rewards, self.state.game_state
+```