Initial commit from Openverse UI
This commit is contained in:
278
env.py
Normal file
278
env.py
Normal file
@@ -0,0 +1,278 @@
|
|||||||
|
```python
|
||||||
|
import re
|
||||||
|
import random
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
|
||||||
|
import textarena as ta
|
||||||
|
|
||||||
|
|
||||||
|
class LabyrinthCommandEnv(ta.Env):
|
||||||
|
"""
|
||||||
|
Deterministic, turn-based two-player tactical maze environment: "Labyrinth Command"
|
||||||
|
Two players (Explorer A and B) move through a deterministic maze to reach the Central Beacon.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, max_turns: int = 40, maze_width: int = 7, maze_height: int = 7):
|
||||||
|
self.max_turns = max_turns
|
||||||
|
self.maze_width = maze_width
|
||||||
|
self.maze_height = maze_height
|
||||||
|
self.move_pattern = re.compile(r"^\[Move:(North|South|East|West)\]$")
|
||||||
|
self.scan_pattern = re.compile(r"^\[Scan\]$")
|
||||||
|
self.wait_pattern = re.compile(r"^\[Wait\]$")
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# ========== Helper: Extract boxed command ==========
|
||||||
|
def _extract_answer_content(self, action: str) -> str:
|
||||||
|
"""Extract content within \\boxed{{...}}."""
|
||||||
|
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
return match.group(1).strip()
|
||||||
|
match = re.search(r"\\boxed\{(.*?)\}", action, re.DOTALL)
|
||||||
|
if match:
|
||||||
|
return match.group(1).strip()
|
||||||
|
return action.strip()
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# ========== Maze and visibility helpers ==========
|
||||||
|
def _generate_deterministic_maze(self, seed: int) -> List[List[str]]:
|
||||||
|
"""Generate deterministic maze using random seeded layout of blocked cells."""
|
||||||
|
random.seed(seed)
|
||||||
|
maze = [["." for _ in range(self.maze_width)] for _ in range(self.maze_height)]
|
||||||
|
num_blocks = (self.maze_width * self.maze_height) // 10 # about 10% blocked
|
||||||
|
for _ in range(num_blocks):
|
||||||
|
x = random.randint(0, self.maze_width - 1)
|
||||||
|
y = random.randint(0, self.maze_height - 1)
|
||||||
|
if (x, y) != (0, 0) and (x, y) != (self.maze_width - 1, self.maze_height - 1):
|
||||||
|
maze[y][x] = "X"
|
||||||
|
return maze
|
||||||
|
|
||||||
|
def _compute_visible_map(self, maze: List[List[str]], pos: Tuple[int, int]) -> List[List[str]]:
|
||||||
|
"""Compute a 3x3 visible map centered on pos."""
|
||||||
|
visible = []
|
||||||
|
for dy in range(-1, 2):
|
||||||
|
row = []
|
||||||
|
for dx in range(-1, 2):
|
||||||
|
nx, ny = pos[0] + dx, pos[1] + dy
|
||||||
|
if 0 <= nx < self.maze_width and 0 <= ny < self.maze_height:
|
||||||
|
row.append(maze[ny][nx])
|
||||||
|
else:
|
||||||
|
row.append("?")
|
||||||
|
visible.append(row)
|
||||||
|
return visible
|
||||||
|
|
||||||
|
def _distance(self, a: Tuple[int, int], b: Tuple[int, int]) -> int:
|
||||||
|
return abs(a[0] - b[0]) + abs(a[1] - b[1])
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# ========== Reset ==========
|
||||||
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
||||||
|
"""
|
||||||
|
Resets the environment to an initial state.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
num_players: must be 2.
|
||||||
|
seed: optional deterministic seed.
|
||||||
|
"""
|
||||||
|
if num_players != 2:
|
||||||
|
raise ValueError("Labyrinth Command requires exactly 2 players.")
|
||||||
|
|
||||||
|
seed = seed if seed is not None else random.randint(1, 999999)
|
||||||
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
||||||
|
maze = self._generate_deterministic_maze(seed)
|
||||||
|
beacon_pos = (self.maze_width // 2, self.maze_height // 2)
|
||||||
|
maze[beacon_pos[1]][beacon_pos[0]] = "B"
|
||||||
|
|
||||||
|
start_A = (0, 0)
|
||||||
|
start_B = (self.maze_width - 1, self.maze_height - 1)
|
||||||
|
|
||||||
|
player_states = {
|
||||||
|
"A": {
|
||||||
|
"position": start_A,
|
||||||
|
"visible_map": self._compute_visible_map(maze, start_A),
|
||||||
|
"visited_cells": [list(start_A)],
|
||||||
|
"last_action": None,
|
||||||
|
},
|
||||||
|
"B": {
|
||||||
|
"position": start_B,
|
||||||
|
"visible_map": self._compute_visible_map(maze, start_B),
|
||||||
|
"visited_cells": [list(start_B)],
|
||||||
|
"last_action": None,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cells_blocked = [[x, y] for y in range(self.maze_height) for x in range(self.maze_width) if maze[y][x] == "X"]
|
||||||
|
|
||||||
|
game_state = {
|
||||||
|
"seed": seed,
|
||||||
|
"turn_index": 0,
|
||||||
|
"max_turns": self.max_turns,
|
||||||
|
"maze_width": self.maze_width,
|
||||||
|
"maze_height": self.maze_height,
|
||||||
|
"beacon_position": list(beacon_pos),
|
||||||
|
"cells_blocked": cells_blocked,
|
||||||
|
"player_states": player_states,
|
||||||
|
"transcript": [],
|
||||||
|
"winner": None,
|
||||||
|
"terminated": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt)
|
||||||
|
self.state.add_observation(message="Welcome to Labyrinth Command!", observation_type=ta.ObservationType.GAME_MESSAGE)
|
||||||
|
self.state.add_observation(message=f"Seed: {seed} ensures deterministic maze generation.", observation_type=ta.ObservationType.GAME_MESSAGE)
|
||||||
|
return self.state
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# ========== Step ==========
|
||||||
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
||||||
|
"""
|
||||||
|
Perform a single environment step for the current player.
|
||||||
|
"""
|
||||||
|
# log the player action
|
||||||
|
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=self.state.current_player_id, to_id=-1)
|
||||||
|
player_id = self.state.current_player_id
|
||||||
|
player_label = "A" if player_id == 0 else "B"
|
||||||
|
opponent_label = "B" if player_label == "A" else "A"
|
||||||
|
|
||||||
|
if self.state.done:
|
||||||
|
self.state.set_invalid_move("Game already finished.")
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
answer = self._extract_answer_content(action)
|
||||||
|
gs = self.state.game_state
|
||||||
|
player_state = gs["player_states"][player_label]
|
||||||
|
opponent_state = gs["player_states"][opponent_label]
|
||||||
|
current_pos = tuple(player_state["position"])
|
||||||
|
beacon = tuple(gs["beacon_position"])
|
||||||
|
|
||||||
|
# Validate action syntax
|
||||||
|
if not (self.move_pattern.match(answer) or self.scan_pattern.match(answer) or self.wait_pattern.match(answer)):
|
||||||
|
self.state.set_invalid_move(reason="Invalid token format.")
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
new_pos = current_pos
|
||||||
|
maze_width, maze_height = gs["maze_width"], gs["maze_height"]
|
||||||
|
blocked = set(tuple(cell) for cell in gs["cells_blocked"])
|
||||||
|
|
||||||
|
# execute move if movement
|
||||||
|
if answer.startswith("[Move:"):
|
||||||
|
direction = answer[len("[Move:"):-1]
|
||||||
|
dx, dy = 0, 0
|
||||||
|
if direction == "North":
|
||||||
|
dy = -1
|
||||||
|
elif direction == "South":
|
||||||
|
dy = 1
|
||||||
|
elif direction == "West":
|
||||||
|
dx = -1
|
||||||
|
elif direction == "East":
|
||||||
|
dx = 1
|
||||||
|
nx, ny = current_pos[0] + dx, current_pos[1] + dy
|
||||||
|
if not (0 <= nx < maze_width and 0 <= ny < maze_height):
|
||||||
|
self.state.set_invalid_move("Move out of bounds")
|
||||||
|
return self.state.step()
|
||||||
|
if (nx, ny) in blocked:
|
||||||
|
self.state.set_invalid_move("Cell blocked")
|
||||||
|
return self.state.step()
|
||||||
|
new_pos = (nx, ny)
|
||||||
|
player_state["position"] = list(new_pos)
|
||||||
|
player_state["visited_cells"].append(list(new_pos))
|
||||||
|
player_state["visible_map"] = self._compute_visible_map(
|
||||||
|
[["X" if [x, y] in gs["cells_blocked"] else "." for x in range(maze_width)] for y in range(maze_height)],
|
||||||
|
new_pos,
|
||||||
|
)
|
||||||
|
elif answer == "[Scan]":
|
||||||
|
player_state["visible_map"] = self._compute_visible_map(
|
||||||
|
[["X" if [x, y] in gs["cells_blocked"] else "." for x in range(maze_width)] for y in range(maze_height)],
|
||||||
|
current_pos,
|
||||||
|
)
|
||||||
|
elif answer == "[Wait]":
|
||||||
|
pass # do nothing
|
||||||
|
|
||||||
|
player_state["last_action"] = answer
|
||||||
|
gs["transcript"].append({"player": player_label, "action": answer})
|
||||||
|
gs["turn_index"] += 1
|
||||||
|
|
||||||
|
# ===== Check terminal conditions =====
|
||||||
|
reached_A = tuple(gs["player_states"]["A"]["position"]) == beacon
|
||||||
|
reached_B = tuple(gs["player_states"]["B"]["position"]) == beacon
|
||||||
|
|
||||||
|
if reached_A and reached_B:
|
||||||
|
self.state.set_draw(reason="Both players reached the Beacon simultaneously.")
|
||||||
|
gs["winner"] = "Draw"
|
||||||
|
gs["terminated"] = True
|
||||||
|
return self.state.step()
|
||||||
|
elif reached_A:
|
||||||
|
self.state.set_winner(player_id=0, reason="Explorer A reached the Beacon first.")
|
||||||
|
gs["winner"] = "A"
|
||||||
|
gs["terminated"] = True
|
||||||
|
return self.state.step()
|
||||||
|
elif reached_B:
|
||||||
|
self.state.set_winner(player_id=1, reason="Explorer B reached the Beacon first.")
|
||||||
|
gs["winner"] = "B"
|
||||||
|
gs["terminated"] = True
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# Check turn limit
|
||||||
|
if self.state.check_turn_limit():
|
||||||
|
posA = tuple(gs["player_states"]["A"]["position"])
|
||||||
|
posB = tuple(gs["player_states"]["B"]["position"])
|
||||||
|
distA = self._distance(posA, beacon)
|
||||||
|
distB = self._distance(posB, beacon)
|
||||||
|
if distA < distB:
|
||||||
|
self.state.set_winner(player_id=0, reason="Explorer A is closer to Beacon at turn limit.")
|
||||||
|
gs["winner"] = "A"
|
||||||
|
elif distB < distA:
|
||||||
|
self.state.set_winner(player_id=1, reason="Explorer B is closer to Beacon at turn limit.")
|
||||||
|
gs["winner"] = "B"
|
||||||
|
else:
|
||||||
|
self.state.set_draw(reason="Both explorers equally distant at turn limit.")
|
||||||
|
gs["winner"] = "Draw"
|
||||||
|
gs["terminated"] = True
|
||||||
|
|
||||||
|
return self.state.step()
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------
|
||||||
|
# ========== Prompt ==========
|
||||||
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
||||||
|
"""Generate player prompt based on Stage 1 design."""
|
||||||
|
player_label = "A" if player_id == 0 else "B"
|
||||||
|
state = game_state["player_states"][player_label]
|
||||||
|
pos = state["position"]
|
||||||
|
visible_map = "\n".join([" ".join(row) for row in state["visible_map"]])
|
||||||
|
turn_index = game_state["turn_index"]
|
||||||
|
max_turns = game_state["max_turns"]
|
||||||
|
opponent_label = "B" if player_label == "A" else "A"
|
||||||
|
last_opp_action = (
|
||||||
|
game_state["player_states"][opponent_label]["last_action"] or "None yet"
|
||||||
|
)
|
||||||
|
|
||||||
|
prompt = f"""
|
||||||
|
You are Explorer {player_label} navigating the labyrinth. Your goal is to reach the Central Beacon before your rival.
|
||||||
|
Each turn you may issue one command from this action grammar:
|
||||||
|
|
||||||
|
[Move:North] | [Move:South] | [Move:East] | [Move:West] | [Scan] | [Wait]
|
||||||
|
|
||||||
|
Remember:
|
||||||
|
- Maze bounds are 0 ≤ x < {game_state['maze_width']}, 0 ≤ y < {game_state['maze_height']}.
|
||||||
|
- Moving into blocked walls ('X') or out of bounds is invalid.
|
||||||
|
- The beacon lies at the labyrinth’s center at {game_state['beacon_position']}.
|
||||||
|
- You must wrap your command inside \\boxed{{}}.
|
||||||
|
|
||||||
|
Current turn: {turn_index}/{max_turns}
|
||||||
|
Your current position: {pos}
|
||||||
|
Your visible 3×3 map:
|
||||||
|
{visible_map}
|
||||||
|
|
||||||
|
Your opponent’s last known action: {last_opp_action}
|
||||||
|
|
||||||
|
Example valid response:
|
||||||
|
I want to go north toward the Beacon.
|
||||||
|
\\boxed{{[Move:North]}}
|
||||||
|
|
||||||
|
Example invalid response:
|
||||||
|
Let's go northeast! ← invalid direction keyword
|
||||||
|
|
||||||
|
Now choose your next command carefully.
|
||||||
|
Put your final answer within \\boxed{{}} at the end of your response.
|
||||||
|
""".strip()
|
||||||
|
return prompt
|
||||||
|
```
|
||||||
215
environment.md
Normal file
215
environment.md
Normal file
@@ -0,0 +1,215 @@
|
|||||||
|
# **Game Design Document: “Labyrinth Command”**
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 1. Concept Paragraph
|
||||||
|
|
||||||
|
**“Labyrinth Command”** is a deterministic, turn-based two-player tactical maze exploration game. Two rival explorers are trapped inside a grid-shaped labyrinth and must reach the **Central Beacon** at the maze’s heart before their opponent. Each turn, players issue one command from a fixed grammar of movement and interaction tokens (e.g., `[Move:North]`, `[Scan]`, `[Wait]`). The maze layout, beacon position, and obstacles are generated deterministically from a single seed, ensuring reproducibility. The game is **not** related to any economic, negotiation, or resource-trading example—its theme focuses purely on spatial logic and exploration within a confined environment.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 2. Roles and Win Condition
|
||||||
|
|
||||||
|
**Roles**
|
||||||
|
- **Explorer A** and **Explorer B** are rival adventurers in identical labyrinth conditions.
|
||||||
|
- Both start at distinct, opposite corners of the maze.
|
||||||
|
|
||||||
|
**Objectives**
|
||||||
|
- Reach the **Central Beacon Cell (B)** before the opponent.
|
||||||
|
- A secondary scoring system tracks proximity to the Beacon at game end if neither player reaches it within the turn limit.
|
||||||
|
|
||||||
|
**Win Rule**
|
||||||
|
1. A player *wins immediately* if they enter the Beacon cell first.
|
||||||
|
2. If both reach simultaneously on the same turn: **Draw**.
|
||||||
|
3. If turn limit expires with no beacon reached: player closer (Manhattan distance) to the Beacon **wins**.
|
||||||
|
4. If both are equally distant: **Draw**.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 3. Turn Structure and Determinism
|
||||||
|
|
||||||
|
- The game proceeds in **alternating turns**, starting with Explorer A.
|
||||||
|
- Each turn = one player action followed by environment update and opponent observation.
|
||||||
|
- **Turn limit:** 20 turns per player (40 total).
|
||||||
|
- Maze generation and beacon placement use a **seed** value set at `reset`, guaranteeing fully deterministic structure and outcomes for identical seeds.
|
||||||
|
- All elements of randomness (e.g., obstacle positions) derive from this same seed.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 4. Action Grammar (Machine-Parseable)
|
||||||
|
|
||||||
|
**Allowed Action Tokens (case-sensitive):**
|
||||||
|
|
||||||
|
| Token Pattern | Meaning |
|
||||||
|
|----------------|----------|
|
||||||
|
| `[Move:Direction]` | Move one cell in a cardinal direction (`North`, `South`, `East`, `West`) if not blocked. |
|
||||||
|
| `[Scan]` | Reveal contents of adjacent cells to update the player’s visible map. |
|
||||||
|
| `[Wait]` | Skip the move, useful for strategic timing. |
|
||||||
|
|
||||||
|
**Formal Patterns (Regex-style):**
|
||||||
|
1. `^\\[Move:(North|South|East|West)\\]$`
|
||||||
|
2. `^\\[Scan\\]$`
|
||||||
|
3. `^\\[Wait\\]$`
|
||||||
|
|
||||||
|
**Examples**
|
||||||
|
|
||||||
|
| Action | Validity | Explanation |
|
||||||
|
|--------|-----------|-------------|
|
||||||
|
| `[Move:North]` | ✅ Valid | Matches move pattern |
|
||||||
|
| `[Scan]` | ✅ Valid | Matches scan pattern |
|
||||||
|
| `[Wait]` | ✅ Valid | Matches wait pattern |
|
||||||
|
| `[Move:Northeast]` | ❌ Invalid | Direction not allowed |
|
||||||
|
| `[move:North]` | ❌ Invalid | Case-sensitive mismatch |
|
||||||
|
| `[Attack]` | ❌ Invalid | Unsupported token |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 5. Game State Schema
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"seed": 18457,
|
||||||
|
"turn_index": 6,
|
||||||
|
"max_turns": 40,
|
||||||
|
"maze_width": 7,
|
||||||
|
"maze_height": 7,
|
||||||
|
"beacon_position": [3, 3],
|
||||||
|
"cells_blocked": [[0,1],[2,2],[4,5]],
|
||||||
|
"player_states": {
|
||||||
|
"A": {
|
||||||
|
"position": [0,0],
|
||||||
|
"visible_map": [["?", "X", "?", "?"],["?", ".", ".", "?"],["?", "?", ".", "?"]],
|
||||||
|
"visited_cells": [[0,0],[1,0]],
|
||||||
|
"last_action": "[Move:South]"
|
||||||
|
},
|
||||||
|
"B": {
|
||||||
|
"position": [6,6],
|
||||||
|
"visible_map": [["?", ".", "?"],[".", ".", "?"],["?", "?", "?"]],
|
||||||
|
"visited_cells": [[6,6]],
|
||||||
|
"last_action": "[Scan]"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"transcript": [
|
||||||
|
{"player":"A", "action":"[Move:South]"},
|
||||||
|
{"player":"B", "action":"[Scan]"}
|
||||||
|
],
|
||||||
|
"winner": null,
|
||||||
|
"terminated": false
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 6. Initialization Rules
|
||||||
|
|
||||||
|
- Maze layout generated through seeded deterministic algorithm (`seed` provided or auto-generated).
|
||||||
|
- Both players placed:
|
||||||
|
- Explorer A → top-left corner `[0,0]`
|
||||||
|
- Explorer B → bottom-right corner `[width-1,height-1]`
|
||||||
|
- Beacon placed at center `(width//2, height//2)`.
|
||||||
|
- `visible_map` initialized with limited visibility: only 3×3 region around player marked or unknown.
|
||||||
|
- At `reset`, each player receives:
|
||||||
|
- Maze dimensions
|
||||||
|
- Starting coordinates
|
||||||
|
- Number of turns and win condition summary
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 7. Validation and Error Handling
|
||||||
|
|
||||||
|
**Invalid Move Detection Rules**
|
||||||
|
- Action not matching one of the defined regex patterns → `Invalid token format`
|
||||||
|
- Action would move explorer outside maze bounds → `Move out of bounds`
|
||||||
|
- Action would move explorer into blocked cell → `Cell blocked`
|
||||||
|
- Any attempt made after terminal state → `Game already finished`
|
||||||
|
|
||||||
|
System calls `set_invalid_move(player, reason)` upon detection.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 8. Terminal Conditions and Scoring
|
||||||
|
|
||||||
|
**Terminal Triggers**
|
||||||
|
1. Player enters the Beacon cell → Win for that player.
|
||||||
|
2. Both reach Beacon simultaneously → Draw.
|
||||||
|
3. Turn limit reached → Compare distance to Beacon.
|
||||||
|
- Smaller Manhattan distance → Win.
|
||||||
|
- Equal → Draw.
|
||||||
|
|
||||||
|
**Scoring Computation**
|
||||||
|
- Winner gets `1`, loser `0`, draw `0.5`.
|
||||||
|
- Stored in `winner` key as `"A"`, `"B"`, or `"Draw"`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 9. Player Prompt Specification
|
||||||
|
|
||||||
|
**Prompt Content Outline**
|
||||||
|
- Game title and theme summary
|
||||||
|
- Player’s identity (Explorer A or B)
|
||||||
|
- Current turn number and limits
|
||||||
|
- Player’s current position, visible map grid, and last known opponent action
|
||||||
|
- List of allowable command formats
|
||||||
|
- Reminder to place final command inside `\boxed{{}}`
|
||||||
|
- Examples of valid vs invalid formatting
|
||||||
|
|
||||||
|
**Prompt Example**
|
||||||
|
```
|
||||||
|
You are Explorer A navigating the labyrinth. Your goal is to reach the Central Beacon before your rival.
|
||||||
|
You can issue ONE command per turn using the following grammar:
|
||||||
|
|
||||||
|
[Move:North] | [Move:South] | [Move:East] | [Move:West] | [Scan] | [Wait]
|
||||||
|
|
||||||
|
Remember:
|
||||||
|
- Moving into blocked walls or out of bounds is invalid.
|
||||||
|
- The beacon lies at the labyrinth’s center.
|
||||||
|
- You must wrap your command inside \\boxed{{}}.
|
||||||
|
|
||||||
|
Example valid response:
|
||||||
|
I want to go north to advance toward the beacon.
|
||||||
|
\boxed{{[Move:North]}}
|
||||||
|
|
||||||
|
Example invalid response:
|
||||||
|
Let’s head northeast. ← invalid direction keyword
|
||||||
|
|
||||||
|
Now it is your turn. Choose your next command carefully.
|
||||||
|
Put your final answer within \\boxed{{}} at the end of your response.
|
||||||
|
```
|
||||||
|
|
||||||
|
**Helper:** `_extract_answer_content(self, action: str) -> str`
|
||||||
|
Extracts the content enclosed by `\boxed{{...}}` for validation and execution.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 10. API Mapping Plan
|
||||||
|
|
||||||
|
**reset()**
|
||||||
|
- Generate deterministic maze grid based on seed.
|
||||||
|
- Initialize all fields of `game_state` per schema.
|
||||||
|
- Return initial observation for each player, including map visibility and rules summary.
|
||||||
|
|
||||||
|
**step(player_action)**
|
||||||
|
- Use `_extract_answer_content` to unwrap the boxed token.
|
||||||
|
- Validate with grammar and state constraints.
|
||||||
|
- If invalid → call `set_invalid_move`.
|
||||||
|
- If valid → mutate player position/visibility, append to `transcript`.
|
||||||
|
- Perform terminal condition checks after each move; update `winner` and `terminated` appropriately.
|
||||||
|
- Return resulting state observation and game status.
|
||||||
|
|
||||||
|
**_generate_player_prompt(player_id)**
|
||||||
|
- Construct text prompt per section 9.
|
||||||
|
- Include available moves, last opponent move, remaining turns, and map details.
|
||||||
|
- Append "Put your final answer within \\boxed{{}} at the end of your response."
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 11. Copy-Check Against the Example
|
||||||
|
|
||||||
|
- The **Labyrinth Command** game has an *exploration and spatial logic* theme, **not** negotiation, trade, or economy-related.
|
||||||
|
- All entities—**maze**, **beacon**, **blocked cells**, and **explorers**—are original constructs.
|
||||||
|
- Action tokens `[Move:…]`, `[Scan]`, `[Wait]`, and state keys (`beacon_position`, `cells_blocked`, `visible_map`) are unique to this design.
|
||||||
|
- No resource exchanges, offers, or bargaining are present.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**End of Design Document – “Labyrinth Command”**
|
||||||
Reference in New Issue
Block a user