From 7551dfaf12b069d716f0387c6e4a503a38fb972c Mon Sep 17 00:00:00 2001 From: bobbycxy Date: Fri, 21 Nov 2025 08:04:39 +0000 Subject: [PATCH] Initial commit from Openverse UI --- README.md | 240 +++++++++++++++++++++++++++++++++++++++++++++++++ env.py | 229 ++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 0 3 files changed, 469 insertions(+) create mode 100644 README.md create mode 100644 env.py create mode 100644 pyproject.toml diff --git a/README.md b/README.md new file mode 100644 index 0000000..ff372ad --- /dev/null +++ b/README.md @@ -0,0 +1,240 @@ +# Game Design Document: **Labyrinth Conquest** + +--- + +## 1. Concept Paragraph + +**Game Concept:** +*Labyrinth Conquest* is a **turn-based, deterministic grid-navigation strategy game** for two players competing to retrieve a relic hidden within a shifting labyrinth. Each player commands an **Explorer**, represented by a marker on a square grid of tiles. The labyrinth contains walls, traps, and hazards that limit movement but are fully known to both players. Players alternate turns choosing actions to **Move**, **Rotate Tiles**, or **Activate Gadgets** in order to reach the central **Relic Tile** first. This design is **entirely original and unrelated to negotiation or trade-based gameplay**. The environment's challenge lies in spatial reasoning and path optimization. + +--- + +## 2. Roles and Win Condition + +**Roles:** +- **Player A** and **Player B** each control a distinct Explorer starting from opposite corners of the labyrinth. +- Both can observe the entire labyrinth state at all times. + +**Win Condition:** +- The first player to move their Explorer onto the **Relic Tile** wins the game immediately (`winner = current_player`). +- If neither player reaches the relic after a fixed number of turns (e.g., 40), the winner is the player **closest (by Manhattan distance)** to the relic. +- If both are equidistant, the result is declared a **Draw**. + +--- + +## 3. Turn Structure and Determinism + +- Players alternate turns strictly: Player A → Player B → Player A → … +- Each turn consists of **one valid action**. +- Determinism is ensured by: + - Fixed grid layout and trap positions controlled by RNG seed. + - Any randomized initial layout generation uses the provided `seed` for exact reproducibility. +- Maximum turn limit: **40 turns per player** (80 total). +- Game ends immediately if a terminal condition is met. + +--- + +## 4. Action Grammar (Machine-Parseable) + +### Action Types: +Players may issue exactly one of the following tokens per turn, enclosed in `\boxed{{}}` during play. + +--- + +#### 1. **[Move: ]** +- Moves the player’s Explorer one tile in a cardinal direction if no wall blocks the path. +- `` ∈ {`N`, `S`, `E`, `W`} + +**Regex:** +`^\[Move: (N|S|E|W)\]$` + +**Example valid:** `[Move: N]` +**Example invalid:** `[Move: north]` → Invalid because lowercase direction not allowed. + +--- + +#### 2. **[Rotate: ,,]** +- Rotates a specified tile at coordinates `(x,y)` one quarter-turn clockwise or counterclockwise. +- `` ∈ {`CW`, `CCW`} + +**Regex:** +`^\[Rotate: [0-9]+,[0-9]+,(CW|CCW)\]$` + +**Example valid:** `[Rotate: 2,3,CW]` +**Example invalid:** `[Rotate: x2,3,CW]` → Invalid because coordinate must be numeric. + +--- + +#### 3. **[Activate: ]** +- Triggers one of the special gadgets: opening traps or shifting a row. +- `` ∈ {`Bridge`, `TrapDisarm`, `RowShift`} + +**Regex:** +`^\[Activate: (Bridge|TrapDisarm|RowShift)\]$` + +**Example valid:** `[Activate: Bridge]` +**Example invalid:** `[Activate: Fly]` → Invalid gadget keyword. + +--- + +### Validation Notes: +Only one token per turn is permitted. Spacing, capitalization, and punctuation must **exactly** match these predefined grammars. + +--- + +## 5. Game State Schema + +```json +{ + "grid_size": 5, + "tiles": [ + ["floor", "wall", "trap", "floor", "floor"], + ["floor", "floor", "wall", "trap", "floor"], + ["floor", "wall", "relic", "floor", "floor"], + ["floor", "trap", "floor", "wall", "floor"], + ["startA", "floor", "floor", "floor", "startB"] + ], + "player_states": { + "A": { + "position": [0, 0], + "gadgets": ["Bridge", "TrapDisarm"], + "moves_taken": 5, + "distance_to_relic": 6 + }, + "B": { + "position": [4, 4], + "gadgets": ["RowShift"], + "moves_taken": 4, + "distance_to_relic": 8 + } + }, + "turn_number": 9, + "current_player": "A", + "seed": 42, + "action_history": [ + "A: [Move: E]", + "B: [Rotate: 3,3,CW]", + "A: [Activate: Bridge]" + ], + "winner": null, + "terminated": false, + "invalid_reason": null, + "observations": [ + "Game begins. Players start in opposite corners.", + "A moved east.", + "B rotated tile (3,3) clockwise." + ] +} +``` + +--- + +## 6. Initialization Rules + +- A seeded RNG (`seed` input at `reset`) controls: + - Tile placement (`wall`, `trap`, `floor`, `relic`) + - Starting gadget distributions. +- Starting layout: + - `startA` at `(0,0)`, `startB` at `(grid_size-1, grid_size-1)`, `relic` at center. + - Each player begins with **2 random gadgets**. +- The first observation announces the initial labyrinth map and coordinates. +- No random movement during play ensures full determinism post-reset. + +--- + +## 7. Validation and Error Handling + +**Illegal Actions Detected If:** +- The unboxed action string does not match any defined regex pattern → `Reason: "Invalid action format"` +- The target coordinate `(x,y)` is outside the grid → `Reason: "Tile out of bounds"` +- Attempted movement blocked by a wall → `Reason: "Wall blocks path"` +- Gadget already used → `Reason: "Gadget unavailable"` +- Player issues multiple actions or malformed tokens → `Reason: "Multiple or malformed commands"` + +When detected, the environment will call `set_invalid_move(player, reason)` and the opponent automatically wins unless `training_mode` allows retry. + +--- + +## 8. Terminal Conditions and Scoring + +**Terminal Checks Each Turn:** +1. If a player’s new position contains `"relic"`, `winner = current_player`. +2. If `turn_number >= max_turns`, compute `distance_to_relic` for both. + - Shorter distance → winner. + - Equal distance → `winner = null`, `draw = True`. +3. If an invalid move occurs, `winner = opponent`. + +**Scoring:** +- `Winner`: +1 point +- `Loser`: 0 points +- `Draw`: both get 0.5 points + +--- + +## 9. Player Prompt Specification + +Each `_generate_player_prompt` presents the labyrinth, Explorer positions, remaining gadgets, turn count, and explicit action grammar. + +**Prompt Outline:** + +``` +You are an Explorer navigating a shifting labyrinth. +Your goal is to reach the Relic Tile before your opponent by issuing one of the allowed commands. + +Available actions (case-sensitive): +- [Move: N|S|E|W] — Move one tile in a direction if no wall blocks the way. +- [Rotate: x,y,CW|CCW] — Rotate the tile at coordinates (x,y). +- [Activate: Bridge|TrapDisarm|RowShift] — Use one of your gadgets (if available). + +Current Turn: 9 +You are Player A. Opponent is Player B. +Your position: (0,0) +Relic position: (2,2) +Available gadgets: Bridge, TrapDisarm + +Respond with exactly one valid action token. +Put your final answer within \boxed{{}} at the end of your response. + +Example valid response: +I will move north to progress toward the relic. +\boxed{{[Move: N]}} + +Example invalid response: +\boxed{{Move north}} ← Invalid format; must include brackets and colon. +``` + +--- + +## 10. API Mapping Plan + +### `reset(seed=None)` +- Creates a deterministic labyrinth with walls, traps, relic, and player starts. +- Initializes `game_state` following schema. +- Adds initial observations describing layout and objectives. +- Returns `obs` for both players. + +### `step(player_id, action)` +- Extracts content using `_extract_answer_content`. +- Validates action format and feasibility. +- Updates positions, tile orientations, and available gadgets deterministically. +- Appends the action to `action_history` and `observations`. +- Checks terminal conditions; sets `terminated` and `winner` when satisfied. +- Returns updated observation and reward outcomes. + +### `_generate_player_prompt(player_id)` +- Builds the full text prompt described above, tailored to the player’s view of current state. +- Queries `game_state` for position, gadgets, current turn, and visible grid. +- Appends example output section. + +--- + +## 11. Copy-Check Against the Example + +This design features a **completely unique environment**: +- **Theme:** Spatial navigation and puzzle solving (not negotiation or economy). +- **Terminology:** Explorers, relic, labyrinth, tiles, gadgets — none appear in the example. +- **Game mechanics:** Grid movement and tile transformation — unrelated to offers, deals, or trade. +- **State keys:** (`tiles`, `gadgets`, `relic`, `turn_number`, etc.) are original. +- **Prompt text** describes an exploration challenge, not an agreement or exchange. + +Hence, *Labyrinth Conquest* satisfies the requirement to be a distinct, self-contained, deterministic, turn-based navigation environment. \ No newline at end of file diff --git a/env.py b/env.py new file mode 100644 index 0000000..83d1783 --- /dev/null +++ b/env.py @@ -0,0 +1,229 @@ +```python +import re +import random +from typing import Any, Dict, List, Optional, Tuple + +import textarena as ta + + +class LabyrinthConquestEnv(ta.Env): + """ + Environment implementation for the Labyrinth Conquest game (Stage 1 design). + Two-player deterministic turn-based grid navigation game. + """ + + def __init__(self, grid_size: int = 5, max_turns: int = 80): + self.grid_size = grid_size + self.max_turns = max_turns + self.move_pattern = re.compile(r'^\[Move: (N|S|E|W)\]$') + self.rotate_pattern = re.compile(r'^\[Rotate: ([0-9]+),([0-9]+),(CW|CCW)\]$') + self.activate_pattern = re.compile(r'^\[Activate: (Bridge|TrapDisarm|RowShift)\]$') + + # === Helper to extract boxed command ====================================== + def _extract_answer_content(self, action: str) -> str: + """Extract content inside \\boxed{...}. Returns stripped content string.""" + match = re.search(r'\\boxed\{\{?([^}]*)\}?\}', action, re.DOTALL) + if match: + return match.group(1).strip() + return action.strip() + + # === Reset =============================================================== + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + + Args: + num_players (int): Must be 2. + seed (Optional[int]): Optional seed for deterministic setup. + + Returns: + Optional: self.state for chaining if needed. + """ + if num_players != 2: + raise ValueError("Labyrinth Conquest is a two-player game.") + + self.random = random.Random(seed) + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + + size = self.grid_size + tiles = [["floor" for _ in range(size)] for _ in range(size)] + + for i in range(size): + for j in range(size): + if (i, j) == (0, 0): + tiles[i][j] = "startA" + elif (i, j) == (size - 1, size - 1): + tiles[i][j] = "startB" + elif (i, j) == (size // 2, size // 2): + tiles[i][j] = "relic" + else: + r = self.random.random() + if r < 0.1: + tiles[i][j] = "wall" + elif r < 0.2: + tiles[i][j] = "trap" + + all_gadgets = ["Bridge", "TrapDisarm", "RowShift"] + gA = self.random.sample(all_gadgets, k=2) + gB = self.random.sample(all_gadgets, k=2) + + player_states = { + "A": {"position": [0, 0], "gadgets": gA, "moves_taken": 0, "distance_to_relic": self._manhattan([0, 0], [size // 2, size // 2])}, + "B": {"position": [size - 1, size - 1], "gadgets": gB, "moves_taken": 0, "distance_to_relic": self._manhattan([size - 1, size - 1], [size // 2, size // 2])}, + } + + game_state = { + "grid_size": size, + "tiles": tiles, + "player_states": player_states, + "turn_number": 0, + "current_player": "A", + "seed": seed, + "action_history": [], + "winner": None, + "terminated": False, + "invalid_reason": None, + "observations": ["Game begins. Players start in opposite corners."], + } + + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt) + + layout_str = "\n".join(" ".join(row) for row in tiles) + self.state.add_observation(f"Initial labyrinth layout:\n{layout_str}", ta.ObservationType.GAME_BOARD) + + return self.state + + # === Step ================================================================ + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + + Args: + action (str): The action text submitted by the current player. + + Returns: + Tuple[bool, ta.Info]: done flag and info object from the state. + """ + pid = self.state.current_player_id + player_key = "A" if pid == 0 else "B" + opp_key = "B" if player_key == "A" else "A" + game_state = self.state.game_state + player_state = game_state["player_states"][player_key] + relic_pos = [self.grid_size // 2, self.grid_size // 2] + tiles = game_state["tiles"] + + self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=pid, to_id=-1) + content = self._extract_answer_content(action) + + # Validate action format + if not (self.move_pattern.match(content) or self.rotate_pattern.match(content) or self.activate_pattern.match(content)): + self.state.set_invalid_move(reason="Invalid action format") + return self.state.step() + + if self.move_pattern.match(content): + direction = self.move_pattern.match(content).group(1) + new_pos = player_state["position"].copy() + if direction == "N": + new_pos[0] -= 1 + elif direction == "S": + new_pos[0] += 1 + elif direction == "E": + new_pos[1] += 1 + elif direction == "W": + new_pos[1] -= 1 + + if not (0 <= new_pos[0] < self.grid_size and 0 <= new_pos[1] < self.grid_size): + self.state.set_invalid_move(reason="Tile out of bounds") + return self.state.step() + + if tiles[new_pos[0]][new_pos[1]] == "wall": + self.state.set_invalid_move(reason="Wall blocks path") + return self.state.step() + + player_state["position"] = new_pos + player_state["moves_taken"] += 1 + action_desc = f"{player_key} moved {direction}." + self.state.add_observation(action_desc, ta.ObservationType.GAME_MESSAGE) + game_state["action_history"].append(f"{player_key}: {content}") + game_state["observations"].append(action_desc) + elif self.rotate_pattern.match(content): + x, y, dir_rot = self.rotate_pattern.match(content).groups() + x, y = int(x), int(y) + if not (0 <= x < self.grid_size and 0 <= y < self.grid_size): + self.state.set_invalid_move(reason="Tile out of bounds") + return self.state.step() + desc = f"{player_key} rotated tile ({x},{y}) {dir_rot}." + self.state.add_observation(desc, ta.ObservationType.GAME_MESSAGE) + game_state["action_history"].append(f"{player_key}: {content}") + game_state["observations"].append(desc) + elif self.activate_pattern.match(content): + gadget = self.activate_pattern.match(content).group(1) + if gadget not in player_state["gadgets"]: + self.state.set_invalid_move(reason="Gadget unavailable") + return self.state.step() + player_state["gadgets"].remove(gadget) + desc = f"{player_key} activated {gadget}." + self.state.add_observation(desc, ta.ObservationType.GAME_MESSAGE) + game_state["action_history"].append(f"{player_key}: {content}") + game_state["observations"].append(desc) + + player_state["distance_to_relic"] = self._manhattan(player_state["position"], relic_pos) + game_state["turn_number"] += 1 + game_state["current_player"] = opp_key + + if self._same_pos(player_state["position"], relic_pos): + game_state["winner"] = player_key + self.state.set_winner(player_id=pid, reason=f"{player_key} reached the relic first.") + game_state["terminated"] = True + return self.state.step() + + if game_state["turn_number"] >= self.max_turns: + dA = game_state["player_states"]["A"]["distance_to_relic"] + dB = game_state["player_states"]["B"]["distance_to_relic"] + if dA < dB: + self.state.set_winner(player_id=0, reason="Player A closer to the relic.") + game_state["winner"] = "A" + elif dB < dA: + self.state.set_winner(player_id=1, reason="Player B closer to the relic.") + game_state["winner"] = "B" + else: + self.state.set_draw(reason="Equal distance to the relic.") + game_state["winner"] = None + game_state["terminated"] = True + return self.state.step() + + return self.state.step() + + # === Prompt ============================================================== + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + player_key = "A" if player_id == 0 else "B" + player_info = game_state["player_states"][player_key] + relic_pos = (self.grid_size // 2, self.grid_size // 2) + return ( + "You are an Explorer navigating a shifting labyrinth.\n" + "Your goal is to reach the Relic Tile before your opponent by issuing one of the allowed commands.\n\n" + "Available actions (case-sensitive):\n" + "- [Move: N|S|E|W] — Move one tile in a direction if no wall blocks the way.\n" + "- [Rotate: x,y,CW|CCW] — Rotate the tile at coordinates (x,y).\n" + "- [Activate: Bridge|TrapDisarm|RowShift] — Use one of your gadgets (if available).\n\n" + f"Current Turn: {game_state['turn_number']}\n" + f"You are Player {player_key}. Opponent is Player {'B' if player_key == 'A' else 'A'}.\n" + f"Your position: {tuple(player_info['position'])}\n" + f"Relic position: {relic_pos}\n" + f"Available gadgets: {', '.join(player_info['gadgets']) if player_info['gadgets'] else 'None'}\n\n" + "Respond with exactly one valid action token.\n" + "Put your final answer within \\boxed{{}} at the end of your response.\n\n" + "Example valid response:\n" + "I will move north to progress toward the relic.\n" + "\\boxed{{[Move: N]}}\n\n" + "Example invalid response:\n" + "\\boxed{{Move north}} ← Invalid format; must include brackets and colon." + ) + + # === Utility ============================================================= + def _manhattan(self, a: List[int], b: List[int]) -> int: + return abs(a[0] - b[0]) + abs(a[1] - b[1]) + + def _same_pos(self, a: List[int], b: List[int]) -> bool: + return a[0] == b[0] and a[1] == b[1] +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..e69de29