249 lines
9.8 KiB
Python
249 lines
9.8 KiB
Python
|
|
```python
|
|||
|
|
import re
|
|||
|
|
import random
|
|||
|
|
from typing import Any, Dict, Optional, Tuple, List
|
|||
|
|
import textarena as ta
|
|||
|
|
|
|||
|
|
|
|||
|
|
class HoneyHeistBattleEnv(ta.Env):
|
|||
|
|
"""
|
|||
|
|
Environment for "Honey Heist: Battle of the Bears".
|
|||
|
|
Two rival bears compete for honey in a turn-based deterministic environment.
|
|||
|
|
"""
|
|||
|
|
|
|||
|
|
def __init__(self, max_turns: Optional[int] = 20):
|
|||
|
|
self.max_turns = max_turns
|
|||
|
|
# regex patterns for validation
|
|||
|
|
self.patterns = {
|
|||
|
|
"forage": re.compile(r"^\[Forage:(1|2|3)\]$"),
|
|||
|
|
"steal": re.compile(r"^\[Steal:(1|2|3)\]$"),
|
|||
|
|
"defend": re.compile(r"^\[Defend\]$"),
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
# Helper for extracting boxed content
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
def _extract_answer_content(self, action: str) -> str:
|
|||
|
|
"""Extract content inside \boxed{} or return full stripped string."""
|
|||
|
|
match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
|
|||
|
|
if match:
|
|||
|
|
return match.group(1).strip()
|
|||
|
|
return action.strip()
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
# Reset / Initialization
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
def reset(self, num_players: int, seed: Optional[int] = None):
|
|||
|
|
"""
|
|||
|
|
Resets the environment to an initial state.
|
|||
|
|
"""
|
|||
|
|
if num_players != 2:
|
|||
|
|
raise ValueError("Honey Heist: Battle of the Bears requires exactly 2 players.")
|
|||
|
|
|
|||
|
|
# Initialize TwoPlayerState
|
|||
|
|
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
|
|||
|
|
|
|||
|
|
rng = random.Random(seed)
|
|||
|
|
hive_honey = rng.randint(15, 20)
|
|||
|
|
|
|||
|
|
game_state = {
|
|||
|
|
"turn_number": 1,
|
|||
|
|
"current_player": "BearA",
|
|||
|
|
"hive_honey": hive_honey,
|
|||
|
|
"max_turns": self.max_turns,
|
|||
|
|
"players": {
|
|||
|
|
"BearA": {"stored_honey": 0, "last_action": None, "defending": False, "score": 0},
|
|||
|
|
"BearB": {"stored_honey": 0, "last_action": None, "defending": False, "score": 0},
|
|||
|
|
},
|
|||
|
|
"history": [],
|
|||
|
|
"winner": None,
|
|||
|
|
"draw": False,
|
|||
|
|
"seed": seed if seed is not None else 0,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
role_mapping = {0: "BearA", 1: "BearB"}
|
|||
|
|
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
|
|||
|
|
|
|||
|
|
self.state.add_observation("Welcome to Honey Heist: Battle of the Bears!", ta.ObservationType.GAME_MESSAGE)
|
|||
|
|
self.state.add_observation(
|
|||
|
|
f"The hive contains {hive_honey} units of honey. BearA goes first.",
|
|||
|
|
ta.ObservationType.GAME_MESSAGE
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
return self.state
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
# Player Prompt Generator
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
|
|||
|
|
"""Produce role-appropriate prompt for each bear."""
|
|||
|
|
role = "BearA" if player_id == 0 else "BearB"
|
|||
|
|
rival = "BearB" if role == "BearA" else "BearA"
|
|||
|
|
hive_honey = game_state["hive_honey"]
|
|||
|
|
player_honey = game_state["players"][role]["stored_honey"]
|
|||
|
|
rival_honey = game_state["players"][rival]["stored_honey"]
|
|||
|
|
turn_number = game_state["turn_number"]
|
|||
|
|
max_turns = game_state["max_turns"]
|
|||
|
|
|
|||
|
|
prompt = f"""
|
|||
|
|
You are a hungry bear competing for the last honey in the forest.
|
|||
|
|
|
|||
|
|
- Your goal: End the game with more honey than your rival.
|
|||
|
|
- Each turn, choose ONE of the following actions:
|
|||
|
|
[Forage:X] Gather X units (1–3) from the hive.
|
|||
|
|
[Defend] Protect your honey from theft this turn.
|
|||
|
|
[Steal:X] Steal X units (1–3) from your rival if they do not defend.
|
|||
|
|
|
|||
|
|
Game facts:
|
|||
|
|
- Hive honey remaining: {hive_honey}
|
|||
|
|
- Your stored honey: {player_honey}
|
|||
|
|
- Rival stored honey: {rival_honey}
|
|||
|
|
- Turn {turn_number} / {max_turns}
|
|||
|
|
|
|||
|
|
Format rule:
|
|||
|
|
State your reasoning briefly, then put your final action in the following format:
|
|||
|
|
|
|||
|
|
"Put your final answer within \\boxed{{}} at the end of your response."
|
|||
|
|
|
|||
|
|
Example valid response:
|
|||
|
|
I think foraging is safe early on.
|
|||
|
|
\\boxed{{[Forage:3]}}
|
|||
|
|
|
|||
|
|
Example invalid response:
|
|||
|
|
\\boxed{{Forage 3}} <-- Must include brackets and colon.
|
|||
|
|
"""
|
|||
|
|
return prompt.strip()
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
# Step - main game logic
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
def step(self, action: str) -> Tuple[bool, ta.Info]:
|
|||
|
|
"""
|
|||
|
|
Perform a single environment step for the current player.
|
|||
|
|
"""
|
|||
|
|
player_id = self.state.current_player_id
|
|||
|
|
player_role = "BearA" if player_id == 0 else "BearB"
|
|||
|
|
opponent_role = "BearB" if player_role == "BearA" else "BearA"
|
|||
|
|
|
|||
|
|
game_state = self.state.game_state
|
|||
|
|
|
|||
|
|
# record action to transcript
|
|||
|
|
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
|
|||
|
|
|
|||
|
|
# extract and validate
|
|||
|
|
token = self._extract_answer_content(action)
|
|||
|
|
if game_state["winner"] or game_state["draw"]:
|
|||
|
|
self.state.set_invalid_move("Game is already over.")
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
valid_action = False
|
|||
|
|
reason = None
|
|||
|
|
if self.patterns["forage"].match(token):
|
|||
|
|
valid_action = True
|
|||
|
|
act_type = "forage"
|
|||
|
|
qty = int(re.findall(r"\d+", token)[0])
|
|||
|
|
elif self.patterns["steal"].match(token):
|
|||
|
|
valid_action = True
|
|||
|
|
act_type = "steal"
|
|||
|
|
qty = int(re.findall(r"\d+", token)[0])
|
|||
|
|
elif self.patterns["defend"].match(token):
|
|||
|
|
valid_action = True
|
|||
|
|
act_type = "defend"
|
|||
|
|
qty = 0
|
|||
|
|
else:
|
|||
|
|
reason = "Invalid format, must use [Forage:X], [Steal:X], or [Defend]."
|
|||
|
|
|
|||
|
|
if not valid_action:
|
|||
|
|
self.state.set_invalid_move(reason)
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# handle action
|
|||
|
|
player_data = game_state["players"][player_role]
|
|||
|
|
opp_data = game_state["players"][opponent_role]
|
|||
|
|
|
|||
|
|
if act_type == "forage":
|
|||
|
|
if game_state["hive_honey"] < qty:
|
|||
|
|
self.state.set_invalid_move("Not enough honey in hive.")
|
|||
|
|
return self.state.step()
|
|||
|
|
# update hive and player
|
|||
|
|
game_state["hive_honey"] -= qty
|
|||
|
|
player_data["stored_honey"] += qty
|
|||
|
|
player_data["score"] = player_data["stored_honey"]
|
|||
|
|
|
|||
|
|
elif act_type == "defend":
|
|||
|
|
player_data["defending"] = True
|
|||
|
|
|
|||
|
|
elif act_type == "steal":
|
|||
|
|
if opp_data["stored_honey"] < qty:
|
|||
|
|
self.state.set_invalid_move("Opponent has insufficient honey.")
|
|||
|
|
return self.state.step()
|
|||
|
|
if opp_data["defending"]:
|
|||
|
|
transfer = 0 # blocked
|
|||
|
|
else:
|
|||
|
|
transfer = qty
|
|||
|
|
opp_data["stored_honey"] -= transfer
|
|||
|
|
player_data["stored_honey"] += transfer
|
|||
|
|
player_data["score"] = player_data["stored_honey"]
|
|||
|
|
opp_data["score"] = opp_data["stored_honey"]
|
|||
|
|
|
|||
|
|
# update metadata
|
|||
|
|
player_data["last_action"] = token
|
|||
|
|
entry = {"turn": game_state["turn_number"], "actor": player_role, "action": token}
|
|||
|
|
game_state["history"].append(entry)
|
|||
|
|
game_state["current_player"] = opponent_role
|
|||
|
|
|
|||
|
|
# Next player's defending status reset check
|
|||
|
|
# Every full round (both bears act), clear defending flags
|
|||
|
|
if player_role == "BearB":
|
|||
|
|
game_state["players"]["BearA"]["defending"] = False
|
|||
|
|
game_state["players"]["BearB"]["defending"] = False
|
|||
|
|
|
|||
|
|
# increment turn number
|
|||
|
|
game_state["turn_number"] += 1
|
|||
|
|
|
|||
|
|
# Check terminal conditions
|
|||
|
|
done = False
|
|||
|
|
reason_end = ""
|
|||
|
|
if game_state["hive_honey"] <= 0:
|
|||
|
|
done = True
|
|||
|
|
reason_end = "Hive honey depleted."
|
|||
|
|
elif game_state["turn_number"] > game_state["max_turns"]:
|
|||
|
|
done = True
|
|||
|
|
reason_end = "Maximum turns reached."
|
|||
|
|
else:
|
|||
|
|
total_honey = (
|
|||
|
|
game_state["hive_honey"]
|
|||
|
|
+ game_state["players"]["BearA"]["stored_honey"]
|
|||
|
|
+ game_state["players"]["BearB"]["stored_honey"]
|
|||
|
|
)
|
|||
|
|
if total_honey <= 0:
|
|||
|
|
done = True
|
|||
|
|
reason_end = "All honey depleted."
|
|||
|
|
|
|||
|
|
if done:
|
|||
|
|
a_honey = game_state["players"]["BearA"]["stored_honey"]
|
|||
|
|
b_honey = game_state["players"]["BearB"]["stored_honey"]
|
|||
|
|
|
|||
|
|
if a_honey > b_honey:
|
|||
|
|
game_state["winner"] = "BearA"
|
|||
|
|
self.state.set_winner(player_id=0, reason=reason_end + " BearA has more honey.")
|
|||
|
|
elif b_honey > a_honey:
|
|||
|
|
game_state["winner"] = "BearB"
|
|||
|
|
self.state.set_winner(player_id=1, reason=reason_end + " BearB has more honey.")
|
|||
|
|
else:
|
|||
|
|
game_state["draw"] = True
|
|||
|
|
self.state.set_draw(reason=reason_end + " Equal honey scores.")
|
|||
|
|
|
|||
|
|
return self.state.step()
|
|||
|
|
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
# Standard required Env accessors
|
|||
|
|
# --------------------------------------------------------------------- #
|
|||
|
|
def get_observation(self) -> Tuple[int, List]:
|
|||
|
|
"""Return current player's observations."""
|
|||
|
|
return self.state.current_player_id, self.state.observations
|
|||
|
|
|
|||
|
|
def close(self) -> Tuple[Dict, Dict]:
|
|||
|
|
"""Return rewards and game_info at close."""
|
|||
|
|
return self.state.rewards, self.state.game_info
|
|||
|
|
```
|