Files
honey-heist-battle/env.py
2001-01-01 00:00:00 +00:00

249 lines
9.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
```python
import re
import random
from typing import Any, Dict, Optional, Tuple, List
import textarena as ta
class HoneyHeistBattleEnv(ta.Env):
"""
Environment for "Honey Heist: Battle of the Bears".
Two rival bears compete for honey in a turn-based deterministic environment.
"""
def __init__(self, max_turns: Optional[int] = 20):
self.max_turns = max_turns
# regex patterns for validation
self.patterns = {
"forage": re.compile(r"^\[Forage:(1|2|3)\]$"),
"steal": re.compile(r"^\[Steal:(1|2|3)\]$"),
"defend": re.compile(r"^\[Defend\]$"),
}
# --------------------------------------------------------------------- #
# Helper for extracting boxed content
# --------------------------------------------------------------------- #
def _extract_answer_content(self, action: str) -> str:
"""Extract content inside \boxed{} or return full stripped string."""
match = re.search(r'\\boxed\{([^}]*)\}', action, re.DOTALL)
if match:
return match.group(1).strip()
return action.strip()
# --------------------------------------------------------------------- #
# Reset / Initialization
# --------------------------------------------------------------------- #
def reset(self, num_players: int, seed: Optional[int] = None):
"""
Resets the environment to an initial state.
"""
if num_players != 2:
raise ValueError("Honey Heist: Battle of the Bears requires exactly 2 players.")
# Initialize TwoPlayerState
self.state = ta.TwoPlayerState(num_players=num_players, seed=seed, max_turns=self.max_turns)
rng = random.Random(seed)
hive_honey = rng.randint(15, 20)
game_state = {
"turn_number": 1,
"current_player": "BearA",
"hive_honey": hive_honey,
"max_turns": self.max_turns,
"players": {
"BearA": {"stored_honey": 0, "last_action": None, "defending": False, "score": 0},
"BearB": {"stored_honey": 0, "last_action": None, "defending": False, "score": 0},
},
"history": [],
"winner": None,
"draw": False,
"seed": seed if seed is not None else 0,
}
role_mapping = {0: "BearA", 1: "BearB"}
self.state.reset(game_state=game_state, player_prompt_function=self._generate_player_prompt, role_mapping=role_mapping)
self.state.add_observation("Welcome to Honey Heist: Battle of the Bears!", ta.ObservationType.GAME_MESSAGE)
self.state.add_observation(
f"The hive contains {hive_honey} units of honey. BearA goes first.",
ta.ObservationType.GAME_MESSAGE
)
return self.state
# --------------------------------------------------------------------- #
# Player Prompt Generator
# --------------------------------------------------------------------- #
def _generate_player_prompt(self, player_id: int, game_state: Dict[str, Any]) -> str:
"""Produce role-appropriate prompt for each bear."""
role = "BearA" if player_id == 0 else "BearB"
rival = "BearB" if role == "BearA" else "BearA"
hive_honey = game_state["hive_honey"]
player_honey = game_state["players"][role]["stored_honey"]
rival_honey = game_state["players"][rival]["stored_honey"]
turn_number = game_state["turn_number"]
max_turns = game_state["max_turns"]
prompt = f"""
You are a hungry bear competing for the last honey in the forest.
- Your goal: End the game with more honey than your rival.
- Each turn, choose ONE of the following actions:
[Forage:X] Gather X units (13) from the hive.
[Defend] Protect your honey from theft this turn.
[Steal:X] Steal X units (13) from your rival if they do not defend.
Game facts:
- Hive honey remaining: {hive_honey}
- Your stored honey: {player_honey}
- Rival stored honey: {rival_honey}
- Turn {turn_number} / {max_turns}
Format rule:
State your reasoning briefly, then put your final action in the following format:
"Put your final answer within \\boxed{{}} at the end of your response."
Example valid response:
I think foraging is safe early on.
\\boxed{{[Forage:3]}}
Example invalid response:
\\boxed{{Forage 3}} <-- Must include brackets and colon.
"""
return prompt.strip()
# --------------------------------------------------------------------- #
# Step - main game logic
# --------------------------------------------------------------------- #
def step(self, action: str) -> Tuple[bool, ta.Info]:
"""
Perform a single environment step for the current player.
"""
player_id = self.state.current_player_id
player_role = "BearA" if player_id == 0 else "BearB"
opponent_role = "BearB" if player_role == "BearA" else "BearA"
game_state = self.state.game_state
# record action to transcript
self.state.add_observation(action, ta.ObservationType.PLAYER_ACTION, from_id=player_id, to_id=-1)
# extract and validate
token = self._extract_answer_content(action)
if game_state["winner"] or game_state["draw"]:
self.state.set_invalid_move("Game is already over.")
return self.state.step()
valid_action = False
reason = None
if self.patterns["forage"].match(token):
valid_action = True
act_type = "forage"
qty = int(re.findall(r"\d+", token)[0])
elif self.patterns["steal"].match(token):
valid_action = True
act_type = "steal"
qty = int(re.findall(r"\d+", token)[0])
elif self.patterns["defend"].match(token):
valid_action = True
act_type = "defend"
qty = 0
else:
reason = "Invalid format, must use [Forage:X], [Steal:X], or [Defend]."
if not valid_action:
self.state.set_invalid_move(reason)
return self.state.step()
# handle action
player_data = game_state["players"][player_role]
opp_data = game_state["players"][opponent_role]
if act_type == "forage":
if game_state["hive_honey"] < qty:
self.state.set_invalid_move("Not enough honey in hive.")
return self.state.step()
# update hive and player
game_state["hive_honey"] -= qty
player_data["stored_honey"] += qty
player_data["score"] = player_data["stored_honey"]
elif act_type == "defend":
player_data["defending"] = True
elif act_type == "steal":
if opp_data["stored_honey"] < qty:
self.state.set_invalid_move("Opponent has insufficient honey.")
return self.state.step()
if opp_data["defending"]:
transfer = 0 # blocked
else:
transfer = qty
opp_data["stored_honey"] -= transfer
player_data["stored_honey"] += transfer
player_data["score"] = player_data["stored_honey"]
opp_data["score"] = opp_data["stored_honey"]
# update metadata
player_data["last_action"] = token
entry = {"turn": game_state["turn_number"], "actor": player_role, "action": token}
game_state["history"].append(entry)
game_state["current_player"] = opponent_role
# Next player's defending status reset check
# Every full round (both bears act), clear defending flags
if player_role == "BearB":
game_state["players"]["BearA"]["defending"] = False
game_state["players"]["BearB"]["defending"] = False
# increment turn number
game_state["turn_number"] += 1
# Check terminal conditions
done = False
reason_end = ""
if game_state["hive_honey"] <= 0:
done = True
reason_end = "Hive honey depleted."
elif game_state["turn_number"] > game_state["max_turns"]:
done = True
reason_end = "Maximum turns reached."
else:
total_honey = (
game_state["hive_honey"]
+ game_state["players"]["BearA"]["stored_honey"]
+ game_state["players"]["BearB"]["stored_honey"]
)
if total_honey <= 0:
done = True
reason_end = "All honey depleted."
if done:
a_honey = game_state["players"]["BearA"]["stored_honey"]
b_honey = game_state["players"]["BearB"]["stored_honey"]
if a_honey > b_honey:
game_state["winner"] = "BearA"
self.state.set_winner(player_id=0, reason=reason_end + " BearA has more honey.")
elif b_honey > a_honey:
game_state["winner"] = "BearB"
self.state.set_winner(player_id=1, reason=reason_end + " BearB has more honey.")
else:
game_state["draw"] = True
self.state.set_draw(reason=reason_end + " Equal honey scores.")
return self.state.step()
# --------------------------------------------------------------------- #
# Standard required Env accessors
# --------------------------------------------------------------------- #
def get_observation(self) -> Tuple[int, List]:
"""Return current player's observations."""
return self.state.current_player_id, self.state.observations
def close(self) -> Tuple[Dict, Dict]:
"""Return rewards and game_info at close."""
return self.state.rewards, self.state.game_info
```