242 lines
9.9 KiB
Python
242 lines
9.9 KiB
Python
|
|
```python
|
|||
|
|
import re
|
|||
|
|
import random
|
|||
|
|
from typing import Any, Dict, Optional, Tuple, List
|
|||
|
|
|
|||
|
|
import textarena as ta
|
|||
|
|
|
|||
|
|
|
|||
|
|
class StellarOrchardEnv(ta.Env):
|
|||
|
|
"""
|
|||
|
|
Stellar Orchard – Turn-based deterministic horticulture strategy game
|
|||
|
|
Implements Stage 1 specification of "Stellar Orchard" environment.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, max_turns: int = 10):
|
|||
|
|
self.max_turns = max_turns
|
|||
|
|
# Grammar – exactly per Stage 1
|
|||
|
|
self.patterns = {
|
|||
|
|
"Plant": re.compile(r"^Plant:(A[1-5]|B[1-5])$"),
|
|||
|
|
"Nurture": re.compile(r"^Nurture:(A[1-5]|B[1-5])$"),
|
|||
|
|
"Harvest": re.compile(r"^Harvest:(A[1-5]|B[1-5])$"),
|
|||
|
|
"Pass": re.compile(r"^Pass$"),
|
|||
|
|
}
|
|||
|
|
self.weather_types = ["Radiant Skies", "Lunar Mist", "Crystal Winds"]
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------------
|
|||
|
|
# Helper: Extract boxed content
|
|||
|
|
def _extract_answer_content(self, action: str) -> str:
|
|||
|
|
"""Extract literal content inside \\boxed{{...}}."""
|
|||
|
|
match = re.search(r"\\boxed\{\{(.*?)\}\}", action, re.DOTALL)
|
|||
|
|
if match:
|
|||
|
|
return match.group(1).strip()
|
|||
|
|
# fallback to direct content
|
|||
|
|
return action.strip()
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------------
|
|||
|
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
|||
|
|
"""
|
|||
|
|
Resets the environment to an initial state.
|
|||
|
|
|
|||
|
|
Args:
|
|||
|
|
num_players: Must be 2 for Stellar Orchard.
|
|||
|
|
seed: Optional deterministic seed.
|
|||
|
|
|
|||
|
|
Returns:
|
|||
|
|
None
|
|||
|
|
"""
|
|||
|
|
if num_players != 2:
|
|||
|
|
raise ValueError("Stellar Orchard requires exactly 2 players.")
|
|||
|
|
|
|||
|
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns, error_allowance=1)
|
|||
|
|
random.seed(seed)
|
|||
|
|
|
|||
|
|
# deterministic environment setup
|
|||
|
|
soil_fertility = {f"A{i}": random.uniform(0.5, 1.0) for i in range(1, 6)}
|
|||
|
|
soil_fertility.update({f"B{i}": random.uniform(0.5, 1.0) for i in range(1, 6)})
|
|||
|
|
weather_pattern = self.weather_types[seed % len(self.weather_types)] if seed is not None else random.choice(self.weather_types)
|
|||
|
|
|
|||
|
|
plots: Dict[str, Dict[str, Any]] = {}
|
|||
|
|
for pid, owner in [("A", "A"), ("B", "B")]:
|
|||
|
|
for i in range(1, 6):
|
|||
|
|
plots[f"{pid}{i}"] = {"owner": owner, "status": "empty", "growth_level": 0}
|
|||
|
|
|
|||
|
|
game_state = {
|
|||
|
|
"turn_number": 0,
|
|||
|
|
"max_turns": self.max_turns,
|
|||
|
|
"active_player": "Solar Gardener",
|
|||
|
|
"plots": plots,
|
|||
|
|
"energy_points": {"A": 0, "B": 0},
|
|||
|
|
"soil_fertility": soil_fertility,
|
|||
|
|
"weather_pattern": weather_pattern,
|
|||
|
|
"transcript": [],
|
|||
|
|
"winner": None,
|
|||
|
|
"random_seed": seed if seed is not None else random.randint(0, 100000),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
role_mapping = {0: "Solar Gardener", 1: "Lunar Gardener"}
|
|||
|
|
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
|||
|
|
|
|||
|
|
self.state.add_observation("Welcome to Stellar Orchard!", ta.ObservationType.GAME_MESSAGE)
|
|||
|
|
self.state.add_observation(f"Weather pattern: {weather_pattern}", ta.ObservationType.GAME_MESSAGE)
|
|||
|
|
return None
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------------
|
|||
|
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
|||
|
|
"""Construct a prompt for a player according to design spec."""
|
|||
|
|
role = "Solar Gardener" if player_id == 0 else "Lunar Gardener"
|
|||
|
|
player_key = "A" if player_id == 0 else "B"
|
|||
|
|
opposite_key = "B" if player_id == 0 else "A"
|
|||
|
|
|
|||
|
|
# summarize plots
|
|||
|
|
plot_summary = "\n".join(
|
|||
|
|
[f"{pid}: {info['status']} (growth {info['growth_level']})" for pid, info in game_state["plots"].items() if info["owner"] == player_key]
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
soil_summary = ", ".join([f"{pid}:{game_state['soil_fertility'][pid]:.2f}" for pid in game_state["soil_fertility"] if pid.startswith(player_key)])
|
|||
|
|
|
|||
|
|
ep = game_state["energy_points"][player_key]
|
|||
|
|
weather = game_state["weather_pattern"]
|
|||
|
|
remaining_turns = game_state["max_turns"] - game_state["turn_number"]
|
|||
|
|
|
|||
|
|
valid_actions = (
|
|||
|
|
"Possible actions this turn:\n"
|
|||
|
|
" - Plant:<plot>\n"
|
|||
|
|
" - Nurture:<plot>\n"
|
|||
|
|
" - Harvest:<plot>\n"
|
|||
|
|
" - Pass"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
instr = (
|
|||
|
|
f"You are the {role}, a cosmic horticulturist tending glowing alien trees on Selora.\n"
|
|||
|
|
f"Your goal is to maximize Energy Points (EP) by cultivating your plots before the season ends.\n\n"
|
|||
|
|
f"Current Weather: {weather}\n"
|
|||
|
|
f"Soil Fertility (your plots): {soil_summary}\n"
|
|||
|
|
f"Your Energy Points: {ep}\n"
|
|||
|
|
f"Your Orchard Status:\n{plot_summary}\n\n"
|
|||
|
|
f"Turn: {game_state['turn_number']} | Remaining turns: {remaining_turns}\n\n"
|
|||
|
|
f"{valid_actions}\n\n"
|
|||
|
|
"Put your final answer within \\boxed{{}} at the end of your response.\n\n"
|
|||
|
|
"Example valid response:\n"
|
|||
|
|
"I will plant my first tree in A2.\n"
|
|||
|
|
"\\boxed{{Plant:A2}}\n\n"
|
|||
|
|
"Example invalid response:\n"
|
|||
|
|
"Let's go!\n"
|
|||
|
|
"\\boxed{{Grow:A2}} # Invalid keyword"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return instr
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------------
|
|||
|
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
|||
|
|
"""
|
|||
|
|
Perform a single turn step with validation and deterministic game update.
|
|||
|
|
"""
|
|||
|
|
player_id = self.state.current_player_id
|
|||
|
|
player_symbol = "A" if player_id == 0 else "B"
|
|||
|
|
role_name = "Solar Gardener" if player_symbol == "A" else "Lunar Gardener"
|
|||
|
|
|
|||
|
|
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
|||
|
|
game_state = self.state.game_state
|
|||
|
|
|
|||
|
|
literal = self._extract_answer_content(action)
|
|||
|
|
|
|||
|
|
# record transcript
|
|||
|
|
game_state["transcript"].append({"player": player_symbol, "action": literal})
|
|||
|
|
|
|||
|
|
# validation
|
|||
|
|
valid_type = None
|
|||
|
|
for k, pat in self.patterns.items():
|
|||
|
|
if pat.match(literal):
|
|||
|
|
valid_type = k
|
|||
|
|
break
|
|||
|
|
if valid_type is None:
|
|||
|
|
self.state.set_invalid_move(reason="Invalid format")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# process the deterministic mechanics
|
|||
|
|
plots = game_state["plots"]
|
|||
|
|
action_valid = True
|
|||
|
|
reason_if_invalid = ""
|
|||
|
|
target_plot = None
|
|||
|
|
if valid_type != "Pass":
|
|||
|
|
target_plot = literal.split(":")[1]
|
|||
|
|
|
|||
|
|
if not target_plot.startswith(player_symbol):
|
|||
|
|
action_valid = False
|
|||
|
|
reason_if_invalid = "Plot not owned by player."
|
|||
|
|
|
|||
|
|
if not action_valid:
|
|||
|
|
self.state.set_invalid_move(reason=reason_if_invalid)
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# Logic for each action type
|
|||
|
|
if valid_type == "Plant":
|
|||
|
|
plot = plots[target_plot]
|
|||
|
|
if plot["status"] != "empty":
|
|||
|
|
self.state.set_invalid_move(reason="Plot already occupied.")
|
|||
|
|
return self.state.step()
|
|||
|
|
plot["status"] = "seedling"
|
|||
|
|
plot["growth_level"] = 1
|
|||
|
|
elif valid_type == "Nurture":
|
|||
|
|
plot = plots[target_plot]
|
|||
|
|
if plot["status"] not in ["seedling", "growing"]:
|
|||
|
|
self.state.set_invalid_move(reason="No tree to nurture.")
|
|||
|
|
return self.state.step()
|
|||
|
|
if plot["growth_level"] >= 3:
|
|||
|
|
self.state.set_invalid_move(reason="Tree already fully grown.")
|
|||
|
|
return self.state.step()
|
|||
|
|
plot["growth_level"] += 1
|
|||
|
|
plot["status"] = "grown" if plot["growth_level"] >= 3 else "growing"
|
|||
|
|
elif valid_type == "Harvest":
|
|||
|
|
plot = plots[target_plot]
|
|||
|
|
if plot["status"] != "grown":
|
|||
|
|
self.state.set_invalid_move(reason="Tree not ready to harvest.")
|
|||
|
|
return self.state.step()
|
|||
|
|
# deterministic energy gain
|
|||
|
|
gain = int(10 * game_state["soil_fertility"][target_plot])
|
|||
|
|
game_state["energy_points"][player_symbol] += gain
|
|||
|
|
plot["status"] = "harvested"
|
|||
|
|
plot["growth_level"] = 0
|
|||
|
|
elif valid_type == "Pass":
|
|||
|
|
pass # nothing else happens
|
|||
|
|
|
|||
|
|
# increment turn number
|
|||
|
|
game_state["turn_number"] += 1
|
|||
|
|
game_state["active_player"] = "Solar Gardener" if player_symbol == "B" else "Lunar Gardener"
|
|||
|
|
|
|||
|
|
# terminal checks
|
|||
|
|
if self._check_terminal_conditions():
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
done, info = self.state.step()
|
|||
|
|
return done, info
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------------
|
|||
|
|
def _check_terminal_conditions(self) -> bool:
|
|||
|
|
"""Check game end (turn limit or all plots empty/harvested) and set outcome."""
|
|||
|
|
game_state = self.state.game_state
|
|||
|
|
if self.state.done:
|
|||
|
|
return True
|
|||
|
|
|
|||
|
|
plots = game_state["plots"]
|
|||
|
|
all_passive = all(p["status"] in ["empty", "harvested"] for p in plots.values())
|
|||
|
|
if all_passive or game_state["turn_number"] >= game_state["max_turns"]:
|
|||
|
|
ep = game_state["energy_points"]
|
|||
|
|
if ep["A"] == ep["B"]:
|
|||
|
|
self.state.set_draw(reason="Equal Energy Points. Draw.")
|
|||
|
|
else:
|
|||
|
|
winner = 0 if ep["A"] > ep["B"] else 1
|
|||
|
|
self.state.set_winner(player_id=winner, reason=f"Player {winner} had more Energy Points.")
|
|||
|
|
return True
|
|||
|
|
return False
|
|||
|
|
|
|||
|
|
# ----------------------------------------------------------------------
|
|||
|
|
def get_observation(self) -> Tuple[int, List]:
|
|||
|
|
"""Return current player's observation tuple."""
|
|||
|
|
return self.state.current_player_id, self.state.observations
|
|||
|
|
|
|||
|
|
def close(self) -> Tuple[Dict, Dict]:
|
|||
|
|
"""Finalize episode outputs."""
|
|||
|
|
return self.state.rewards, self.state.game_info
|
|||
|
|
```
|