From d6f8ed4bf86b0811042f725df68cf12e226bf8cf Mon Sep 17 00:00:00 2001 From: bobbycxy Date: Fri, 21 Nov 2025 06:58:15 +0000 Subject: [PATCH] Initial commit from Openverse UI (via Openverse CLI) --- README.md | 2 + env.py | 278 +++++++++++++++++++++++++++++++++++++++++++++++++ environment.md | 215 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 495 insertions(+) create mode 100644 README.md create mode 100644 env.py create mode 100644 environment.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..2d8c0d2 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# testtest2 +Generated via Openverse \ No newline at end of file diff --git a/env.py b/env.py new file mode 100644 index 0000000..f184066 --- /dev/null +++ b/env.py @@ -0,0 +1,278 @@ +```python +import re +import random +from typing import Any, Dict, List, Optional, Tuple + +import textarena as ta + + +class LabyrinthCommandEnv(ta.Env): + """ + Deterministic, turn-based two-player tactical maze environment: "Labyrinth Command" + Two players (Explorer A and B) move through a deterministic maze to reach the Central Beacon. + """ + + def __init__(self, max_turns: int = 40, maze_width: int = 7, maze_height: int = 7): + self.max_turns = max_turns + self.maze_width = maze_width + self.maze_height = maze_height + self.move_pattern = re.compile(r"^\[Move:(North|South|East|West)\]$") + self.scan_pattern = re.compile(r"^\[Scan\]$") + self.wait_pattern = re.compile(r"^\[Wait\]$") + + # ------------------------------------------------------------------------- + # ========== Helper: Extract boxed command ========== + def _extract_answer_content(self, action: str) -> str: + """Extract content within \\boxed{{...}}.""" + match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL) + if match: + return match.group(1).strip() + match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL) + if match: + return match.group(1).strip() + return action.strip() + + # ------------------------------------------------------------------------- + # ========== Maze and visibility helpers ========== + def _generate_deterministic_maze(self, seed: int) -> List[List[str]]: + """Generate deterministic maze using random seeded layout of blocked cells.""" + random.seed(seed) + maze = [["." for _ in range(self.maze_width)] for _ in range(self.maze_height)] + num_blocks = (self.maze_width * self.maze_height) // 10 # about 10% blocked + for _ in range(num_blocks): + x = random.randint(0, self.maze_width - 1) + y = random.randint(0, self.maze_height - 1) + if (x, y) != (0, 0) and (x, y) != (self.maze_width - 1, self.maze_height - 1): + maze[y][x] = "X" + return maze + + def _compute_visible_map(self, maze: List[List[str]], pos: Tuple[int, int]) -> List[List[str]]: + """Compute a 3x3 visible map centered on pos.""" + visible = [] + for dy in range(-1, 2): + row = [] + for dx in range(-1, 2): + nx, ny = pos[0] + dx, pos[1] + dy + if 0 <= nx < self.maze_width and 0 <= ny < self.maze_height: + row.append(maze[ny][nx]) + else: + row.append("?") + visible.append(row) + return visible + + def _distance(self, a: Tuple[int, int], b: Tuple[int, int]) -> int: + return abs(a[0] - b[0]) + abs(a[1] - b[1]) + + # ------------------------------------------------------------------------- + # ========== Reset ========== + def reset(self, num_players: int, seed: Optional[int] = None): + """ + Resets the environment to an initial state. + + Args: + num_players: must be 2. + seed: optional deterministic seed. + """ + if num_players != 2: + raise ValueError("Labyrinth Command requires exactly 2 players.") + + seed = seed if seed is not None else random.randint(1, 999999) + self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns) + maze = self._generate_deterministic_maze(seed) + beacon_pos = (self.maze_width // 2, self.maze_height // 2) + maze[beacon_pos[1]][beacon_pos[0]] = "B" + + start_A = (0, 0) + start_B = (self.maze_width - 1, self.maze_height - 1) + + player_states = { + "A": { + "position": start_A, + "visible_map": self._compute_visible_map(maze, start_A), + "visited_cells": [list(start_A)], + "last_action": None, + }, + "B": { + "position": start_B, + "visible_map": self._compute_visible_map(maze, start_B), + "visited_cells": [list(start_B)], + "last_action": None, + }, + } + + cells_blocked = [[x, y] for y in range(self.maze_height) for x in range(self.maze_width) if maze[y][x] == "X"] + + game_state = { + "seed": seed, + "turn_index": 0, + "max_turns": self.max_turns, + "maze_width": self.maze_width, + "maze_height": self.maze_height, + "beacon_position": list(beacon_pos), + "cells_blocked": cells_blocked, + "player_states": player_states, + "transcript": [], + "winner": None, + "terminated": False, + } + + self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt) + self.state.add_observation(message="Welcome to Labyrinth Command!", observation_type=ta.ObservationType.GAME_MESSAGE) + self.state.add_observation(message=f"Seed: {seed} ensures deterministic maze generation.", observation_type=ta.ObservationType.GAME_MESSAGE) + return self.state + + # ------------------------------------------------------------------------- + # ========== Step ========== + def step(self, action: str) -> Tuple[bool, ta.Info]: + """ + Perform a single environment step for the current player. + """ + # log the player action + self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=self.state.current_player_id, to_id=-1) + player_id = self.state.current_player_id + player_label = "A" if player_id == 0 else "B" + opponent_label = "B" if player_label == "A" else "A" + + if self.state.done: + self.state.set_invalid_move("Game already finished.") + return self.state.step() + + answer = self._extract_answer_content(action) + gs = self.state.game_state + player_state = gs["player_states"][player_label] + opponent_state = gs["player_states"][opponent_label] + current_pos = tuple(player_state["position"]) + beacon = tuple(gs["beacon_position"]) + + # Validate action syntax + if not (self.move_pattern.match(answer) or self.scan_pattern.match(answer) or self.wait_pattern.match(answer)): + self.state.set_invalid_move(reason="Invalid token format.") + return self.state.step() + + new_pos = current_pos + maze_width, maze_height = gs["maze_width"], gs["maze_height"] + blocked = set(tuple(cell) for cell in gs["cells_blocked"]) + + # execute move if movement + if answer.startswith("[Move:"): + direction = answer[len("[Move:"):-1] + dx, dy = 0, 0 + if direction == "North": + dy = -1 + elif direction == "South": + dy = 1 + elif direction == "West": + dx = -1 + elif direction == "East": + dx = 1 + nx, ny = current_pos[0] + dx, current_pos[1] + dy + if not (0 <= nx < maze_width and 0 <= ny < maze_height): + self.state.set_invalid_move("Move out of bounds") + return self.state.step() + if (nx, ny) in blocked: + self.state.set_invalid_move("Cell blocked") + return self.state.step() + new_pos = (nx, ny) + player_state["position"] = list(new_pos) + player_state["visited_cells"].append(list(new_pos)) + player_state["visible_map"] = self._compute_visible_map( + [["X" if [x, y] in gs["cells_blocked"] else "." for x in range(maze_width)] for y in range(maze_height)], + new_pos, + ) + elif answer == "[Scan]": + player_state["visible_map"] = self._compute_visible_map( + [["X" if [x, y] in gs["cells_blocked"] else "." for x in range(maze_width)] for y in range(maze_height)], + current_pos, + ) + elif answer == "[Wait]": + pass # do nothing + + player_state["last_action"] = answer + gs["transcript"].append({"player": player_label, "action": answer}) + gs["turn_index"] += 1 + + # ===== Check terminal conditions ===== + reached_A = tuple(gs["player_states"]["A"]["position"]) == beacon + reached_B = tuple(gs["player_states"]["B"]["position"]) == beacon + + if reached_A and reached_B: + self.state.set_draw(reason="Both players reached the Beacon simultaneously.") + gs["winner"] = "Draw" + gs["terminated"] = True + return self.state.step() + elif reached_A: + self.state.set_winner(player_id=0, reason="Explorer A reached the Beacon first.") + gs["winner"] = "A" + gs["terminated"] = True + return self.state.step() + elif reached_B: + self.state.set_winner(player_id=1, reason="Explorer B reached the Beacon first.") + gs["winner"] = "B" + gs["terminated"] = True + return self.state.step() + + # Check turn limit + if self.state.check_turn_limit(): + posA = tuple(gs["player_states"]["A"]["position"]) + posB = tuple(gs["player_states"]["B"]["position"]) + distA = self._distance(posA, beacon) + distB = self._distance(posB, beacon) + if distA < distB: + self.state.set_winner(player_id=0, reason="Explorer A is closer to Beacon at turn limit.") + gs["winner"] = "A" + elif distB < distA: + self.state.set_winner(player_id=1, reason="Explorer B is closer to Beacon at turn limit.") + gs["winner"] = "B" + else: + self.state.set_draw(reason="Both explorers equally distant at turn limit.") + gs["winner"] = "Draw" + gs["terminated"] = True + + return self.state.step() + + # ------------------------------------------------------------------------- + # ========== Prompt ========== + def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str: + """Generate player prompt based on Stage 1 design.""" + player_label = "A" if player_id == 0 else "B" + state = game_state["player_states"][player_label] + pos = state["position"] + visible_map = "\n".join([" ".join(row) for row in state["visible_map"]]) + turn_index = game_state["turn_index"] + max_turns = game_state["max_turns"] + opponent_label = "B" if player_label == "A" else "A" + last_opp_action = ( + game_state["player_states"][opponent_label]["last_action"] or "None yet" + ) + + prompt = f""" +You are Explorer {player_label} navigating the labyrinth. Your goal is to reach the Central Beacon before your rival. +Each turn you may issue one command from this action grammar: + +[Move:North] | [Move:South] | [Move:East] | [Move:West] | [Scan] | [Wait] + +Remember: +- Maze bounds are 0 ≤ x < {game_state['maze_width']}, 0 ≤ y < {game_state['maze_height']}. +- Moving into blocked walls ('X') or out of bounds is invalid. +- The beacon lies at the labyrinth’s center at {game_state['beacon_position']}. +- You must wrap your command inside \\boxed{{}}. + +Current turn: {turn_index}/{max_turns} +Your current position: {pos} +Your visible 3×3 map: +{visible_map} + +Your opponent’s last known action: {last_opp_action} + +Example valid response: +I want to go north toward the Beacon. +\\boxed{{[Move:North]}} + +Example invalid response: +Let's go northeast! ← invalid direction keyword + +Now choose your next command carefully. +Put your final answer within \\boxed{{}} at the end of your response. +""".strip() + return prompt +``` \ No newline at end of file diff --git a/environment.md b/environment.md new file mode 100644 index 0000000..79526d7 --- /dev/null +++ b/environment.md @@ -0,0 +1,215 @@ +# **Game Design Document: “Labyrinth Command”** + +--- + +## 1. Concept Paragraph + +**“Labyrinth Command”** is a deterministic, turn-based two-player tactical maze exploration game. Two rival explorers are trapped inside a grid-shaped labyrinth and must reach the **Central Beacon** at the maze’s heart before their opponent. Each turn, players issue one command from a fixed grammar of movement and interaction tokens (e.g., `[Move:North]`, `[Scan]`, `[Wait]`). The maze layout, beacon position, and obstacles are generated deterministically from a single seed, ensuring reproducibility. The game is **not** related to any economic, negotiation, or resource-trading example—its theme focuses purely on spatial logic and exploration within a confined environment. + +--- + +## 2. Roles and Win Condition + +**Roles** +- **Explorer A** and **Explorer B** are rival adventurers in identical labyrinth conditions. +- Both start at distinct, opposite corners of the maze. + +**Objectives** +- Reach the **Central Beacon Cell (B)** before the opponent. +- A secondary scoring system tracks proximity to the Beacon at game end if neither player reaches it within the turn limit. + +**Win Rule** +1. A player *wins immediately* if they enter the Beacon cell first. +2. If both reach simultaneously on the same turn: **Draw**. +3. If turn limit expires with no beacon reached: player closer (Manhattan distance) to the Beacon **wins**. +4. If both are equally distant: **Draw**. + +--- + +## 3. Turn Structure and Determinism + +- The game proceeds in **alternating turns**, starting with Explorer A. +- Each turn = one player action followed by environment update and opponent observation. +- **Turn limit:** 20 turns per player (40 total). +- Maze generation and beacon placement use a **seed** value set at `reset`, guaranteeing fully deterministic structure and outcomes for identical seeds. +- All elements of randomness (e.g., obstacle positions) derive from this same seed. + +--- + +## 4. Action Grammar (Machine-Parseable) + +**Allowed Action Tokens (case-sensitive):** + +| Token Pattern | Meaning | +|----------------|----------| +| `[Move:Direction]` | Move one cell in a cardinal direction (`North`, `South`, `East`, `West`) if not blocked. | +| `[Scan]` | Reveal contents of adjacent cells to update the player’s visible map. | +| `[Wait]` | Skip the move, useful for strategic timing. | + +**Formal Patterns (Regex-style):** +1. `^\\[Move:(North|South|East|West)\\]$` +2. `^\\[Scan\\]$` +3. `^\\[Wait\\]$` + +**Examples** + +| Action | Validity | Explanation | +|--------|-----------|-------------| +| `[Move:North]` | ✅ Valid | Matches move pattern | +| `[Scan]` | ✅ Valid | Matches scan pattern | +| `[Wait]` | ✅ Valid | Matches wait pattern | +| `[Move:Northeast]` | ❌ Invalid | Direction not allowed | +| `[move:North]` | ❌ Invalid | Case-sensitive mismatch | +| `[Attack]` | ❌ Invalid | Unsupported token | + +--- + +## 5. Game State Schema + +```json +{ + "seed": 18457, + "turn_index": 6, + "max_turns": 40, + "maze_width": 7, + "maze_height": 7, + "beacon_position": [3, 3], + "cells_blocked": [[0,1],[2,2],[4,5]], + "player_states": { + "A": { + "position": [0,0], + "visible_map": [["?", "X", "?", "?"],["?", ".", ".", "?"],["?", "?", ".", "?"]], + "visited_cells": [[0,0],[1,0]], + "last_action": "[Move:South]" + }, + "B": { + "position": [6,6], + "visible_map": [["?", ".", "?"],[".", ".", "?"],["?", "?", "?"]], + "visited_cells": [[6,6]], + "last_action": "[Scan]" + } + }, + "transcript": [ + {"player":"A", "action":"[Move:South]"}, + {"player":"B", "action":"[Scan]"} + ], + "winner": null, + "terminated": false +} +``` + +--- + +## 6. Initialization Rules + +- Maze layout generated through seeded deterministic algorithm (`seed` provided or auto-generated). +- Both players placed: + - Explorer A → top-left corner `[0,0]` + - Explorer B → bottom-right corner `[width-1,height-1]` +- Beacon placed at center `(width//2, height//2)`. +- `visible_map` initialized with limited visibility: only 3×3 region around player marked or unknown. +- At `reset`, each player receives: + - Maze dimensions + - Starting coordinates + - Number of turns and win condition summary + +--- + +## 7. Validation and Error Handling + +**Invalid Move Detection Rules** +- Action not matching one of the defined regex patterns → `Invalid token format` +- Action would move explorer outside maze bounds → `Move out of bounds` +- Action would move explorer into blocked cell → `Cell blocked` +- Any attempt made after terminal state → `Game already finished` + +System calls `set_invalid_move(player, reason)` upon detection. + +--- + +## 8. Terminal Conditions and Scoring + +**Terminal Triggers** +1. Player enters the Beacon cell → Win for that player. +2. Both reach Beacon simultaneously → Draw. +3. Turn limit reached → Compare distance to Beacon. + - Smaller Manhattan distance → Win. + - Equal → Draw. + +**Scoring Computation** +- Winner gets `1`, loser `0`, draw `0.5`. +- Stored in `winner` key as `"A"`, `"B"`, or `"Draw"`. + +--- + +## 9. Player Prompt Specification + +**Prompt Content Outline** +- Game title and theme summary +- Player’s identity (Explorer A or B) +- Current turn number and limits +- Player’s current position, visible map grid, and last known opponent action +- List of allowable command formats +- Reminder to place final command inside `\boxed{{}}` +- Examples of valid vs invalid formatting + +**Prompt Example** +``` +You are Explorer A navigating the labyrinth. Your goal is to reach the Central Beacon before your rival. +You can issue ONE command per turn using the following grammar: + +[Move:North] | [Move:South] | [Move:East] | [Move:West] | [Scan] | [Wait] + +Remember: +- Moving into blocked walls or out of bounds is invalid. +- The beacon lies at the labyrinth’s center. +- You must wrap your command inside \\boxed{{}}. + +Example valid response: +I want to go north to advance toward the beacon. +\boxed{{[Move:North]}} + +Example invalid response: +Let’s head northeast. ← invalid direction keyword + +Now it is your turn. Choose your next command carefully. +Put your final answer within \\boxed{{}} at the end of your response. +``` + +**Helper:** `_extract_answer_content(self, action: str) -> str` +Extracts the content enclosed by `\boxed{{...}}` for validation and execution. + +--- + +## 10. API Mapping Plan + +**reset()** +- Generate deterministic maze grid based on seed. +- Initialize all fields of `game_state` per schema. +- Return initial observation for each player, including map visibility and rules summary. + +**step(player_action)** +- Use `_extract_answer_content` to unwrap the boxed token. +- Validate with grammar and state constraints. +- If invalid → call `set_invalid_move`. +- If valid → mutate player position/visibility, append to `transcript`. +- Perform terminal condition checks after each move; update `winner` and `terminated` appropriately. +- Return resulting state observation and game status. + +**_generate_player_prompt(player_id)** +- Construct text prompt per section 9. +- Include available moves, last opponent move, remaining turns, and map details. +- Append "Put your final answer within \\boxed{{}} at the end of your response." + +--- + +## 11. Copy-Check Against the Example + +- The **Labyrinth Command** game has an *exploration and spatial logic* theme, **not** negotiation, trade, or economy-related. +- All entities—**maze**, **beacon**, **blocked cells**, and **explorers**—are original constructs. +- Action tokens `[Move:…]`, `[Scan]`, `[Wait]`, and state keys (`beacon_position`, `cells_blocked`, `visible_map`) are unique to this design. +- No resource exchanges, offers, or bargaining are present. + +--- + +**End of Design Document – “Labyrinth Command”** \ No newline at end of file