Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 12 additions & 15 deletions docs/src/tutorials/cmab_zooming.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"from pybandits.cmab import CmabBernoulli\n",
"from pybandits.quantitative_model import CmabZoomingModel"
Expand Down Expand Up @@ -202,11 +201,7 @@
"n_batches = 10\n",
"batch_size = 100\n",
"n_rounds = n_batches * batch_size\n",
"raw_context_data = np.random.normal(0, 1, (n_rounds, n_features))\n",
"\n",
"# Standardize the context data\n",
"scaler = StandardScaler()\n",
"context_data = scaler.fit_transform(raw_context_data)\n",
"context_data = np.random.uniform(0, 1, (n_rounds, n_features))\n",
"\n",
"# Preview the context data\n",
"pd.DataFrame(context_data[:5], columns=[f\"Feature {i + 1}\" for i in range(n_features)])"
Expand Down Expand Up @@ -313,24 +308,24 @@
"outputs": [],
"source": [
"# Define test contexts\n",
"test_contexts = [\n",
" [2.0, -1.0, 0.0], # High feature 1, low feature 2\n",
" [-1.0, 2.0, 0.0], # Low feature 1, high feature 2\n",
" [1.0, 1.0, 0.0], # High feature 1 and 2\n",
" [-1.0, -1.0, 0.0], # Low feature 1 and 2\n",
"]\n",
"test_contexts = scaler.transform(test_contexts)\n",
"test_contexts = np.array(\n",
" [\n",
" [1.0, 0.0, 0.0], # High feature 1, low feature 2\n",
" [0.0, 1.0, 0.0], # Low feature 1, high feature 2\n",
" [1.0, 1.0, 0.0], # High feature 1 and 2\n",
" [0.0, 0.0, 0.0], # Low feature 1 and 2\n",
" ]\n",
")\n",
"\n",
"# Test predictions\n",
"results = []\n",
"for i, context in enumerate(test_contexts):\n",
" context_reshaped = context.reshape(1, -1)\n",
" pred_actions, probs, weighted_sums = cmab.predict(context=context_reshaped)\n",
" chosen_action_quantity = pred_actions[0]\n",
" chosen_action_probs = {action: probs[0][chosen_action_quantity] for action in actions}\n",
" chosen_action = chosen_action_quantity[0]\n",
" chosen_quantities = chosen_action_quantity[1][0]\n",
" chosen_action_probs = probs[0][chosen_action_quantity]\n",
" chosen_action_probs = probs[0][chosen_action](chosen_quantities)\n",
"\n",
" # Sample optimal quantity for the chosen action\n",
" # In a real application, you would have a method to test different quantities\n",
Expand All @@ -347,6 +342,7 @@
" {\n",
" \"Context\": context,\n",
" \"Chosen Action\": chosen_action,\n",
" \"Chosen Qunatity\": chosen_quantities,\n",
" \"Action Probabilities\": chosen_action_probs,\n",
" \"Optimal Quantity\": optimal_quantity,\n",
" \"Expected Reward\": expected_reward,\n",
Expand All @@ -368,6 +364,7 @@
" print(f\"\\nTest {i + 1}: {context_type}\")\n",
" print(f\"Context: {result['Context']}\")\n",
" print(f\"Chosen Action: {result['Chosen Action']}\")\n",
" print(f\"Chosen Quantity: {result['Chosen Qunatity']}\")\n",
" print(f\"Action Probabilities: {result['Action Probabilities']}\")\n",
" print(f\"Optimal Quantity: {result['Optimal Quantity']:.2f}\")\n",
" print(f\"Expected Reward: {result['Expected Reward']}\")"
Expand Down
56 changes: 47 additions & 9 deletions pybandits/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,22 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union, _GenericAlias, get_args, get_origin
from typing import (
Any,
Callable,
Dict,
List,
Mapping,
NewType,
Optional,
Tuple,
Union,
_GenericAlias,
get_args,
get_origin,
)

import numpy as np
from typing_extensions import Self

from pybandits.pydantic_version_compatibility import (
Expand All @@ -45,10 +59,12 @@
MOProbability = List[Probability]
MOProbabilityWeight = List[ProbabilityWeight]
# QuantitativeProbability generalizes probability to include both action quantities and their associated probability
QuantitativeProbability = Tuple[Tuple[Tuple[Float01, ...], Probability], ...]
QuantitativeProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], ProbabilityWeight], ...]
QuantitativeMOProbability = Tuple[Tuple[Tuple[Float01, ...], List[Probability]], ...]
QuantitativeMOProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], List[ProbabilityWeight]], ...]
QuantitativeProbability = Callable[[np.ndarray], Probability]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name of the type is misleading...
maybe it should be: QuantitativeProbabilityFunction?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@ronshiff1 Maybe ContinuousProbability? I just didn't want it to be too long.

QuantitativeWeight = Callable[[np.ndarray], float]
QuantitativeProbabilityWeight = Tuple[QuantitativeProbability, QuantitativeWeight]
QuantitativeMOProbability = Callable[[np.ndarray], MOProbability]
QuantitativeMOProbabilityWeight = Tuple[Callable[[np.ndarray], MOProbability], Callable[[np.ndarray], float]]

UnifiedProbability = Union[Probability, QuantitativeProbability]
UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight]
UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability]
Expand Down Expand Up @@ -79,10 +95,10 @@
ActionRewardLikelihood = NewType(
"ActionRewardLikelihood",
Union[
Dict[UnifiedActionId, float],
Dict[UnifiedActionId, List[float]],
Dict[UnifiedActionId, Probability],
Dict[UnifiedActionId, List[Probability]],
Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
Dict[ActionId, Union[List[float], Callable[[np.ndarray], List[float]]]],
Dict[ActionId, Union[Probability, Callable[[np.ndarray], Probability]]],
Dict[ActionId, Union[List[Probability], Callable[[np.ndarray], List[Probability]]]],
],
)
ACTION_IDS_PREFIX = "action_ids_"
Expand Down Expand Up @@ -190,6 +206,28 @@ def _get_field_type(cls, key: str) -> Any:
annotation = get_args(annotation)
return annotation

@classmethod
def _normalize_field(cls, v: Any, field_name: str) -> Any:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why do we need this?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I use it in validators of "pre" behavior to get the default value of an ungiven attribute

"""
Normalize a field value to its default if None.

This utility method ensures that optional fields receive their default
values when not explicitly provided.

Parameters
----------
v : Any
The field value to normalize.
field_name : str
Name of the field in the model.

Returns
-------
Any
The original value if not None, otherwise the field's default value.
"""
return v if v is not None else cls.model_fields[field_name].default

if pydantic_version == PYDANTIC_VERSION_1:

@classproperty
Expand Down
25 changes: 18 additions & 7 deletions pybandits/cmab_simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@
ParametricActionProbability,
Simulator,
)
from pybandits.utils import extract_argument_names_from_function
from pybandits.utils import (
OptimizationFailedError,
extract_argument_names_from_function,
maximize_by_quantity,
)

CmabProbabilityValue = Union[ParametricActionProbability, DoubleParametricActionProbability]
CmabActionProbabilityGroundTruth = Dict[ActionId, CmabProbabilityValue]
Expand Down Expand Up @@ -232,13 +236,20 @@ def _finalize_step(self, batch_results: pd.DataFrame, update_kwargs: Dict[str, n
for a, q, g, c in zip(action_id, quantity, group_id, update_kwargs["context"])
]
batch_results.loc[:, "selected_prob_reward"] = selected_prob_reward

def get_max_prob_for_action(g: str, a: ActionId, c: np.ndarray, m) -> float:
"""Get maximum probability for an action, handling optimization failures."""
if isinstance(m, QuantitativeModel):
try:
opt_q = maximize_by_quantity((lambda q: self.probs_reward[g][a](c, q)), m.dimension)
return self.probs_reward[g][a](c, opt_q)
except OptimizationFailedError as e:
raise ValueError(f"Optimization failed for action {a}: {e}")
else:
return self.probs_reward[g][a](c)

max_prob_reward = [
max(
self._maximize_prob_reward((lambda q: self.probs_reward[g][a](c, q)), m.dimension)
if isinstance(m, QuantitativeModel)
else self.probs_reward[g][a](c)
for a, m in self.mab.actions.items()
)
max(get_max_prob_for_action(g, a, c, m) for a, m in self.mab.actions.items())
for g, c in zip(group_id, update_kwargs["context"])
]
batch_results.loc[:, "max_prob_reward"] = max_prob_reward
Expand Down
78 changes: 19 additions & 59 deletions pybandits/mab.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@
Probability,
ProbabilityWeight,
PyBanditsBaseModel,
QuantitativeMOProbability,
QuantitativeMOProbabilityWeight,
QuantitativeProbability,
QuantitativeProbabilityWeight,
Serializable,
UnifiedActionId,
)
Expand All @@ -52,7 +56,7 @@
validate_call,
)
from pybandits.quantitative_model import QuantitativeModel
from pybandits.strategy import Strategy
from pybandits.strategy import BaseStrategy
from pybandits.utils import extract_argument_names_from_function


Expand All @@ -79,12 +83,12 @@ class BaseMab(PyBanditsBaseModel, ABC):
"""

actions_manager: ActionsManager
strategy: Strategy
strategy: BaseStrategy
epsilon: Optional[Float01] = None
default_action: Optional[UnifiedActionId] = None
version: Optional[str] = None
deprecated_adwin_keys: ClassVar[List[str]] = ["adaptive_window_size", "actions_memory", "rewards_memory"]
current_supported_version_th: ClassVar[str] = "3.0.0"
_deprecated_adwin_keys: ClassVar[List[str]] = ["adaptive_window_size", "actions_memory", "rewards_memory"]
_current_supported_version_th: ClassVar[str] = "3.0.0"

def __init__(
self,
Expand Down Expand Up @@ -232,32 +236,13 @@ def update(
def _transform_nested_list(lst: List[List[Dict]]):
return [{k: v for d in single_action_dicts for k, v in d.items()} for single_action_dicts in zip(*lst)]

@staticmethod
def _is_so_standard_action(value: Any) -> bool:
# Probability ProbabilityWeight
return isinstance(value, float) or (isinstance(value, tuple) and isinstance(value[0], float))

@staticmethod
def _is_so_quantitative_action(value: Any) -> bool:
return isinstance(value, tuple) and isinstance(value[0], tuple)

@classmethod
def _is_standard_action(cls, value: Any) -> bool:
return cls._is_so_standard_action(value) or (isinstance(value, list) and cls._is_so_standard_action(value[0]))

@classmethod
def _is_quantitative_action(cls, value: Any) -> bool:
return cls._is_so_quantitative_action(value) or (
isinstance(value, list) and cls._is_so_quantitative_action(value[0])
)

def _get_action_probabilities(
self, forbidden_actions: Optional[Set[ActionId]] = None, **kwargs
) -> Union[
List[Dict[UnifiedActionId, Probability]],
List[Dict[UnifiedActionId, ProbabilityWeight]],
List[Dict[UnifiedActionId, MOProbability]],
List[Dict[UnifiedActionId, MOProbabilityWeight]],
List[Dict[ActionId, Union[Probability, QuantitativeProbability]]],
List[Dict[ActionId, Union[ProbabilityWeight, QuantitativeProbabilityWeight]]],
List[Dict[ActionId, Union[MOProbability, QuantitativeMOProbability]]],
List[Dict[ActionId, Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight]]],
]:
"""
Get the probability of getting a positive reward for each action.
Expand All @@ -280,34 +265,9 @@ def _get_action_probabilities(
action: model.sample_proba(**kwargs) for action, model in self.actions.items() if action in valid_actions
}
# Handle standard actions for which the value is a (probability, weight) tuple
actions_transformations = [
[{key: proba} for proba in value]
for key, value in action_probabilities.items()
if self._is_standard_action(value[0])
]
actions_transformations = self._transform_nested_list(actions_transformations)
# Handle quantitative actions, for which the value is a tuple of
# tuples of (quantity, (probability, weight) or probability)
quantitative_actions_transformations = [
[{(key, quantity): proba for quantity, proba in sample} for sample in value]
for key, value in action_probabilities.items()
if self._is_quantitative_action(value[0])
]
quantitative_actions_transformations = self._transform_nested_list(quantitative_actions_transformations)
if not actions_transformations and not quantitative_actions_transformations:
return []
if not actions_transformations: # No standard actions
actions_transformations = [dict() for _ in range(len(quantitative_actions_transformations))]
if not quantitative_actions_transformations: # No quantitative actions
quantitative_actions_transformations = [dict() for _ in range(len(actions_transformations))]
if len(actions_transformations) != len(quantitative_actions_transformations):
raise ValueError("The number of standard and quantitative actions should be the same.")
action_probabilities = [
{**actions_dict, **quantitative_actions_dict}
for actions_dict, quantitative_actions_dict in zip(
actions_transformations, quantitative_actions_transformations
)
]
actions_transformations = [[{key: proba} for proba in value] for key, value in action_probabilities.items()]
action_probabilities = self._transform_nested_list(actions_transformations)

return action_probabilities

@abstractmethod
Expand Down Expand Up @@ -399,7 +359,7 @@ def _select_epsilon_greedy_action(
if self.default_action:
selected_action = self.default_action
else:
actions = list(set(a[0] if isinstance(a, tuple) else a for a in p.keys()))
actions = list(p.keys())
selected_action = random.choice(actions)
if isinstance(self.actions[selected_action], QuantitativeModel):
selected_action = (
Expand Down Expand Up @@ -463,7 +423,7 @@ def update_old_state(
state["actions_manager"]["actions"] = state.pop("actions")
state["actions_manager"]["delta"] = delta

for key in cls.deprecated_adwin_keys:
for key in cls._deprecated_adwin_keys:
if key in state["actions_manager"]:
state["actions_manager"].pop(key)

Expand Down Expand Up @@ -496,10 +456,10 @@ def from_old_state(

state_dict = json.loads(state)
if ("version" in state_dict) and (
version.parse(state_dict["version"]) >= version.parse(cls.current_supported_version_th)
version.parse(state_dict["version"]) >= version.parse(cls._current_supported_version_th)
):
raise ValueError(
f"The state is expected to be in the old format of PyBandits < {cls.current_supported_version_th}."
f"The state is expected to be in the old format of PyBandits < {cls._current_supported_version_th}."
)
state_dict = cls.update_old_state(state_dict, delta)
state = json.dumps(state_dict)
Expand Down
9 changes: 7 additions & 2 deletions pybandits/offline_policy_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,9 +1023,13 @@ def estimate_policy(
# finalize the dataframe shape to #samples X #mc experiments
mc_actions = pd.DataFrame(mc_actions).T

# Get unique actions that actually appear in the test set (to match validation requirements)
# The action array contains encoded indices, so we need to map them back to action IDs
unique_actions_in_test = sorted(set(self._test_data["action_ids"]))

# for each sample / each action, count the occurrence frequency during MC iteration
mc_action_counts = pd.DataFrame(0, index=mc_actions.index, columns=self._test_data["unique_actions"])
for action in self._test_data["unique_actions"]:
mc_action_counts = pd.DataFrame(0, index=mc_actions.index, columns=unique_actions_in_test)
for action in unique_actions_in_test:
mc_action_counts[action] = (mc_actions == action).sum(axis=1)
estimated_policy = mc_action_counts / n_mc_experiments

Expand Down Expand Up @@ -1110,6 +1114,7 @@ def evaluate(
axis=0,
)
if save_path:
os.makedirs(save_path, exist_ok=True)
multi_objective_estimated_policy_value_df.to_csv(os.path.join(save_path, "estimated_policy_value.csv"))

if visualize:
Expand Down
Loading
Loading