diff --git a/docs/src/tutorials/cmab_zooming.ipynb b/docs/src/tutorials/cmab_zooming.ipynb
index 805078a..606dd41 100644
--- a/docs/src/tutorials/cmab_zooming.ipynb
+++ b/docs/src/tutorials/cmab_zooming.ipynb
@@ -28,7 +28,6 @@
     "import matplotlib.pyplot as plt\n",
     "import numpy as np\n",
     "import pandas as pd\n",
-    "from sklearn.preprocessing import StandardScaler\n",
     "\n",
     "from pybandits.cmab import CmabBernoulli\n",
     "from pybandits.quantitative_model import CmabZoomingModel"
@@ -202,11 +201,7 @@
     "n_batches = 10\n",
     "batch_size = 100\n",
     "n_rounds = n_batches * batch_size\n",
-    "raw_context_data = np.random.normal(0, 1, (n_rounds, n_features))\n",
-    "\n",
-    "# Standardize the context data\n",
-    "scaler = StandardScaler()\n",
-    "context_data = scaler.fit_transform(raw_context_data)\n",
+    "context_data = np.random.uniform(0, 1, (n_rounds, n_features))\n",
     "\n",
     "# Preview the context data\n",
     "pd.DataFrame(context_data[:5], columns=[f\"Feature {i + 1}\" for i in range(n_features)])"
@@ -313,13 +308,14 @@
    "outputs": [],
    "source": [
     "# Define test contexts\n",
-    "test_contexts = [\n",
-    "    [2.0, -1.0, 0.0],  # High feature 1, low feature 2\n",
-    "    [-1.0, 2.0, 0.0],  # Low feature 1, high feature 2\n",
-    "    [1.0, 1.0, 0.0],  # High feature 1 and 2\n",
-    "    [-1.0, -1.0, 0.0],  # Low feature 1 and 2\n",
-    "]\n",
-    "test_contexts = scaler.transform(test_contexts)\n",
+    "test_contexts = np.array(\n",
+    "    [\n",
+    "        [1.0, 0.0, 0.0],  # High feature 1, low feature 2\n",
+    "        [0.0, 1.0, 0.0],  # Low feature 1, high feature 2\n",
+    "        [1.0, 1.0, 0.0],  # High feature 1 and 2\n",
+    "        [0.0, 0.0, 0.0],  # Low feature 1 and 2\n",
+    "    ]\n",
+    ")\n",
     "\n",
     "# Test predictions\n",
     "results = []\n",
@@ -327,10 +323,9 @@
     "    context_reshaped = context.reshape(1, -1)\n",
     "    pred_actions, probs, weighted_sums = cmab.predict(context=context_reshaped)\n",
     "    chosen_action_quantity = pred_actions[0]\n",
-    "    chosen_action_probs = {action: probs[0][chosen_action_quantity] for action in actions}\n",
     "    chosen_action = chosen_action_quantity[0]\n",
     "    chosen_quantities = chosen_action_quantity[1][0]\n",
-    "    chosen_action_probs = probs[0][chosen_action_quantity]\n",
+    "    chosen_action_probs = probs[0][chosen_action](chosen_quantities)\n",
     "\n",
     "    # Sample optimal quantity for the chosen action\n",
     "    # In a real application, you would have a method to test different quantities\n",
@@ -347,6 +342,7 @@
     "        {\n",
     "            \"Context\": context,\n",
     "            \"Chosen Action\": chosen_action,\n",
+    "            \"Chosen Qunatity\": chosen_quantities,\n",
     "            \"Action Probabilities\": chosen_action_probs,\n",
     "            \"Optimal Quantity\": optimal_quantity,\n",
     "            \"Expected Reward\": expected_reward,\n",
@@ -368,6 +364,7 @@
     "    print(f\"\\nTest {i + 1}: {context_type}\")\n",
     "    print(f\"Context: {result['Context']}\")\n",
     "    print(f\"Chosen Action: {result['Chosen Action']}\")\n",
+    "    print(f\"Chosen Quantity: {result['Chosen Qunatity']}\")\n",
     "    print(f\"Action Probabilities: {result['Action Probabilities']}\")\n",
     "    print(f\"Optimal Quantity: {result['Optimal Quantity']:.2f}\")\n",
     "    print(f\"Expected Reward: {result['Expected Reward']}\")"
diff --git a/pybandits/base.py b/pybandits/base.py
index 836e040..9fcbf0a 100644
--- a/pybandits/base.py
+++ b/pybandits/base.py
@@ -20,8 +20,22 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-from typing import Any, Dict, List, Mapping, NewType, Optional, Tuple, Union, _GenericAlias, get_args, get_origin
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Mapping,
+    NewType,
+    Optional,
+    Tuple,
+    Union,
+    _GenericAlias,
+    get_args,
+    get_origin,
+)
 
+import numpy as np
 from typing_extensions import Self
 
 from pybandits.pydantic_version_compatibility import (
@@ -45,10 +59,12 @@
 MOProbability = List[Probability]
 MOProbabilityWeight = List[ProbabilityWeight]
 # QuantitativeProbability generalizes probability to include both action quantities and their associated probability
-QuantitativeProbability = Tuple[Tuple[Tuple[Float01, ...], Probability], ...]
-QuantitativeProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], ProbabilityWeight], ...]
-QuantitativeMOProbability = Tuple[Tuple[Tuple[Float01, ...], List[Probability]], ...]
-QuantitativeMOProbabilityWeight = Tuple[Tuple[Tuple[Float01, ...], List[ProbabilityWeight]], ...]
+QuantitativeProbability = Callable[[np.ndarray], Probability]
+QuantitativeWeight = Callable[[np.ndarray], float]
+QuantitativeProbabilityWeight = Tuple[QuantitativeProbability, QuantitativeWeight]
+QuantitativeMOProbability = Callable[[np.ndarray], MOProbability]
+QuantitativeMOProbabilityWeight = Tuple[Callable[[np.ndarray], MOProbability], Callable[[np.ndarray], float]]
+
 UnifiedProbability = Union[Probability, QuantitativeProbability]
 UnifiedProbabilityWeight = Union[ProbabilityWeight, QuantitativeProbabilityWeight]
 UnifiedMOProbability = Union[MOProbability, QuantitativeMOProbability]
@@ -79,10 +95,10 @@
 ActionRewardLikelihood = NewType(
     "ActionRewardLikelihood",
     Union[
-        Dict[UnifiedActionId, float],
-        Dict[UnifiedActionId, List[float]],
-        Dict[UnifiedActionId, Probability],
-        Dict[UnifiedActionId, List[Probability]],
+        Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
+        Dict[ActionId, Union[List[float], Callable[[np.ndarray], List[float]]]],
+        Dict[ActionId, Union[Probability, Callable[[np.ndarray], Probability]]],
+        Dict[ActionId, Union[List[Probability], Callable[[np.ndarray], List[Probability]]]],
     ],
 )
 ACTION_IDS_PREFIX = "action_ids_"
@@ -190,6 +206,28 @@ def _get_field_type(cls, key: str) -> Any:
             annotation = get_args(annotation)
         return annotation
 
+    @classmethod
+    def _normalize_field(cls, v: Any, field_name: str) -> Any:
+        """
+        Normalize a field value to its default if None.
+
+        This utility method ensures that optional fields receive their default
+        values when not explicitly provided.
+
+        Parameters
+        ----------
+        v : Any
+            The field value to normalize.
+        field_name : str
+            Name of the field in the model.
+
+        Returns
+        -------
+        Any
+            The original value if not None, otherwise the field's default value.
+        """
+        return v if v is not None else cls.model_fields[field_name].default
+
     if pydantic_version == PYDANTIC_VERSION_1:
 
         @classproperty
diff --git a/pybandits/cmab_simulator.py b/pybandits/cmab_simulator.py
index 9778453..2fb786b 100644
--- a/pybandits/cmab_simulator.py
+++ b/pybandits/cmab_simulator.py
@@ -35,7 +35,11 @@
     ParametricActionProbability,
     Simulator,
 )
-from pybandits.utils import extract_argument_names_from_function
+from pybandits.utils import (
+    OptimizationFailedError,
+    extract_argument_names_from_function,
+    maximize_by_quantity,
+)
 
 CmabProbabilityValue = Union[ParametricActionProbability, DoubleParametricActionProbability]
 CmabActionProbabilityGroundTruth = Dict[ActionId, CmabProbabilityValue]
@@ -232,13 +236,20 @@ def _finalize_step(self, batch_results: pd.DataFrame, update_kwargs: Dict[str, n
             for a, q, g, c in zip(action_id, quantity, group_id, update_kwargs["context"])
         ]
         batch_results.loc[:, "selected_prob_reward"] = selected_prob_reward
+
+        def get_max_prob_for_action(g: str, a: ActionId, c: np.ndarray, m) -> float:
+            """Get maximum probability for an action, handling optimization failures."""
+            if isinstance(m, QuantitativeModel):
+                try:
+                    opt_q = maximize_by_quantity((lambda q: self.probs_reward[g][a](c, q)), m.dimension)
+                    return self.probs_reward[g][a](c, opt_q)
+                except OptimizationFailedError as e:
+                    raise ValueError(f"Optimization failed for action {a}: {e}")
+            else:
+                return self.probs_reward[g][a](c)
+
         max_prob_reward = [
-            max(
-                self._maximize_prob_reward((lambda q: self.probs_reward[g][a](c, q)), m.dimension)
-                if isinstance(m, QuantitativeModel)
-                else self.probs_reward[g][a](c)
-                for a, m in self.mab.actions.items()
-            )
+            max(get_max_prob_for_action(g, a, c, m) for a, m in self.mab.actions.items())
             for g, c in zip(group_id, update_kwargs["context"])
         ]
         batch_results.loc[:, "max_prob_reward"] = max_prob_reward
diff --git a/pybandits/mab.py b/pybandits/mab.py
index 52d6602..49329e7 100644
--- a/pybandits/mab.py
+++ b/pybandits/mab.py
@@ -43,6 +43,10 @@
     Probability,
     ProbabilityWeight,
     PyBanditsBaseModel,
+    QuantitativeMOProbability,
+    QuantitativeMOProbabilityWeight,
+    QuantitativeProbability,
+    QuantitativeProbabilityWeight,
     Serializable,
     UnifiedActionId,
 )
@@ -52,7 +56,7 @@
     validate_call,
 )
 from pybandits.quantitative_model import QuantitativeModel
-from pybandits.strategy import Strategy
+from pybandits.strategy import BaseStrategy
 from pybandits.utils import extract_argument_names_from_function
 
 
@@ -79,12 +83,12 @@ class BaseMab(PyBanditsBaseModel, ABC):
     """
 
     actions_manager: ActionsManager
-    strategy: Strategy
+    strategy: BaseStrategy
     epsilon: Optional[Float01] = None
     default_action: Optional[UnifiedActionId] = None
     version: Optional[str] = None
-    deprecated_adwin_keys: ClassVar[List[str]] = ["adaptive_window_size", "actions_memory", "rewards_memory"]
-    current_supported_version_th: ClassVar[str] = "3.0.0"
+    _deprecated_adwin_keys: ClassVar[List[str]] = ["adaptive_window_size", "actions_memory", "rewards_memory"]
+    _current_supported_version_th: ClassVar[str] = "3.0.0"
 
     def __init__(
         self,
@@ -232,32 +236,13 @@ def update(
     def _transform_nested_list(lst: List[List[Dict]]):
         return [{k: v for d in single_action_dicts for k, v in d.items()} for single_action_dicts in zip(*lst)]
 
-    @staticmethod
-    def _is_so_standard_action(value: Any) -> bool:
-        #       Probability                                      ProbabilityWeight
-        return isinstance(value, float) or (isinstance(value, tuple) and isinstance(value[0], float))
-
-    @staticmethod
-    def _is_so_quantitative_action(value: Any) -> bool:
-        return isinstance(value, tuple) and isinstance(value[0], tuple)
-
-    @classmethod
-    def _is_standard_action(cls, value: Any) -> bool:
-        return cls._is_so_standard_action(value) or (isinstance(value, list) and cls._is_so_standard_action(value[0]))
-
-    @classmethod
-    def _is_quantitative_action(cls, value: Any) -> bool:
-        return cls._is_so_quantitative_action(value) or (
-            isinstance(value, list) and cls._is_so_quantitative_action(value[0])
-        )
-
     def _get_action_probabilities(
         self, forbidden_actions: Optional[Set[ActionId]] = None, **kwargs
     ) -> Union[
-        List[Dict[UnifiedActionId, Probability]],
-        List[Dict[UnifiedActionId, ProbabilityWeight]],
-        List[Dict[UnifiedActionId, MOProbability]],
-        List[Dict[UnifiedActionId, MOProbabilityWeight]],
+        List[Dict[ActionId, Union[Probability, QuantitativeProbability]]],
+        List[Dict[ActionId, Union[ProbabilityWeight, QuantitativeProbabilityWeight]]],
+        List[Dict[ActionId, Union[MOProbability, QuantitativeMOProbability]]],
+        List[Dict[ActionId, Union[MOProbabilityWeight, QuantitativeMOProbabilityWeight]]],
     ]:
         """
         Get the probability of getting a positive reward for each action.
@@ -280,34 +265,9 @@ def _get_action_probabilities(
             action: model.sample_proba(**kwargs) for action, model in self.actions.items() if action in valid_actions
         }
         # Handle standard actions for which the value is a (probability, weight) tuple
-        actions_transformations = [
-            [{key: proba} for proba in value]
-            for key, value in action_probabilities.items()
-            if self._is_standard_action(value[0])
-        ]
-        actions_transformations = self._transform_nested_list(actions_transformations)
-        # Handle quantitative actions, for which the value is a tuple of
-        # tuples of (quantity, (probability, weight) or probability)
-        quantitative_actions_transformations = [
-            [{(key, quantity): proba for quantity, proba in sample} for sample in value]
-            for key, value in action_probabilities.items()
-            if self._is_quantitative_action(value[0])
-        ]
-        quantitative_actions_transformations = self._transform_nested_list(quantitative_actions_transformations)
-        if not actions_transformations and not quantitative_actions_transformations:
-            return []
-        if not actions_transformations:  # No standard actions
-            actions_transformations = [dict() for _ in range(len(quantitative_actions_transformations))]
-        if not quantitative_actions_transformations:  # No quantitative actions
-            quantitative_actions_transformations = [dict() for _ in range(len(actions_transformations))]
-        if len(actions_transformations) != len(quantitative_actions_transformations):
-            raise ValueError("The number of standard and quantitative actions should be the same.")
-        action_probabilities = [
-            {**actions_dict, **quantitative_actions_dict}
-            for actions_dict, quantitative_actions_dict in zip(
-                actions_transformations, quantitative_actions_transformations
-            )
-        ]
+        actions_transformations = [[{key: proba} for proba in value] for key, value in action_probabilities.items()]
+        action_probabilities = self._transform_nested_list(actions_transformations)
+
         return action_probabilities
 
     @abstractmethod
@@ -399,7 +359,7 @@ def _select_epsilon_greedy_action(
                 if self.default_action:
                     selected_action = self.default_action
                 else:
-                    actions = list(set(a[0] if isinstance(a, tuple) else a for a in p.keys()))
+                    actions = list(p.keys())
                     selected_action = random.choice(actions)
                     if isinstance(self.actions[selected_action], QuantitativeModel):
                         selected_action = (
@@ -463,7 +423,7 @@ def update_old_state(
             state["actions_manager"]["actions"] = state.pop("actions")
             state["actions_manager"]["delta"] = delta
 
-        for key in cls.deprecated_adwin_keys:
+        for key in cls._deprecated_adwin_keys:
             if key in state["actions_manager"]:
                 state["actions_manager"].pop(key)
 
@@ -496,10 +456,10 @@ def from_old_state(
 
         state_dict = json.loads(state)
         if ("version" in state_dict) and (
-            version.parse(state_dict["version"]) >= version.parse(cls.current_supported_version_th)
+            version.parse(state_dict["version"]) >= version.parse(cls._current_supported_version_th)
         ):
             raise ValueError(
-                f"The state is expected to be in the old format of PyBandits < {cls.current_supported_version_th}."
+                f"The state is expected to be in the old format of PyBandits < {cls._current_supported_version_th}."
             )
         state_dict = cls.update_old_state(state_dict, delta)
         state = json.dumps(state_dict)
diff --git a/pybandits/offline_policy_evaluator.py b/pybandits/offline_policy_evaluator.py
index 886e3e7..a4be697 100644
--- a/pybandits/offline_policy_evaluator.py
+++ b/pybandits/offline_policy_evaluator.py
@@ -1023,9 +1023,13 @@ def estimate_policy(
         # finalize the dataframe shape to #samples X #mc experiments
         mc_actions = pd.DataFrame(mc_actions).T
 
+        # Get unique actions that actually appear in the test set (to match validation requirements)
+        # The action array contains encoded indices, so we need to map them back to action IDs
+        unique_actions_in_test = sorted(set(self._test_data["action_ids"]))
+
         # for each sample / each action, count the occurrence frequency during MC iteration
-        mc_action_counts = pd.DataFrame(0, index=mc_actions.index, columns=self._test_data["unique_actions"])
-        for action in self._test_data["unique_actions"]:
+        mc_action_counts = pd.DataFrame(0, index=mc_actions.index, columns=unique_actions_in_test)
+        for action in unique_actions_in_test:
             mc_action_counts[action] = (mc_actions == action).sum(axis=1)
         estimated_policy = mc_action_counts / n_mc_experiments
 
@@ -1110,6 +1114,7 @@ def evaluate(
                 axis=0,
             )
         if save_path:
+            os.makedirs(save_path, exist_ok=True)
             multi_objective_estimated_policy_value_df.to_csv(os.path.join(save_path, "estimated_policy_value.csv"))
 
         if visualize:
diff --git a/pybandits/quantitative_model.py b/pybandits/quantitative_model.py
index f445cfc..9b71ca5 100644
--- a/pybandits/quantitative_model.py
+++ b/pybandits/quantitative_model.py
@@ -34,13 +34,23 @@
 from scipy.stats import beta
 from typing_extensions import Self
 
-from pybandits.base import BinaryReward, Float01, PyBanditsBaseModel, QuantitativeProbability
+from pybandits.base import (
+    BinaryReward,
+    Float01,
+    Probability,
+    ProbabilityWeight,
+    PyBanditsBaseModel,
+    QuantitativeProbability,
+    QuantitativeProbabilityWeight,
+    QuantitativeWeight,
+)
 from pybandits.base_model import BaseModelCC, BaseModelSO
 from pybandits.model import BayesianNeuralNetwork, Beta, Model
 from pybandits.pydantic_version_compatibility import (
     PYDANTIC_VERSION_1,
     PYDANTIC_VERSION_2,
     NonNegativeFloat,
+    NonNegativeInt,
     PositiveInt,
     PrivateAttr,
     field_validator,
@@ -68,12 +78,19 @@ class QuantitativeModel(BaseModelSO, ABC):
     def sample_proba(self, **kwargs) -> List[QuantitativeProbability]:
         """
         Sample the model.
+
+        Returns
+        -------
+        List[QuantitativeProbability]
+            A list of callable functions, each taking a location (Tuple[Float01, ...])
+            and returning the probability at that location.
+            List length is equal to the number of samples.
         """
 
     @validate_call(config=dict(arbitrary_types_allowed=True))
     def _update(
         self,
-        quantities: List[Union[float, List[float]]],
+        quantities: Optional[List[Union[float, List[float]]]],
         rewards: Union[List[BinaryReward], List[List[BinaryReward]]],
         **kwargs,
     ):
@@ -94,7 +111,7 @@ def _update(
     @abstractmethod
     def _quantitative_update(
         self,
-        quantities: Optional[List[Union[float, List[float], None]]],
+        quantities: List[Union[float, List[float], None]],
         rewards: Union[List[BinaryReward], List[List[BinaryReward]]],
         **kwargs,
     ):
@@ -304,7 +321,7 @@ def __contains__(self, value: Union[float, np.ndarray]) -> bool:
         bool
             Whether the value is contained in the segment.
         """
-        if (isinstance(value, np.ndarray) and value.shape != self.intervals_array.shape[1]) or (
+        if (isinstance(value, np.ndarray) and value.shape[0] != self.intervals_array.shape[0]) or (
             isinstance(value, float) and len(self.intervals_array) != 1
         ):
             raise ValueError("Tested value must have the same shape as the intervals.")
@@ -503,25 +520,46 @@ def _generate_initial_segments(cls, dimension: PositiveInt) -> List[Tuple[Tuple[
 
     def sample_proba(self, **kwargs) -> List[QuantitativeProbability]:
         """
-        Sample an action value from each of the intervals.
+        Sample probability functions from the model.
+
+        Returns
+        -------
+        List[QuantitativeProbability]
+            A list of functions that evaluate probability at any given location.
         """
-        result = []
+        # Get sampled probabilities from each segment model
+        segment_probabilities = {}
         for segment, model in self.segmented_actions.items():
-            sampled_proba = model.sample_proba(**kwargs)
-            random_point = np.random.random((len(sampled_proba), len(segment.intervals)))
-            scaled_quantity = segment.mins.T + random_point * (segment.maxs.T - segment.mins.T)
+            segment_probabilities[segment] = model.sample_proba(**kwargs)
+        return self._to_quantitative_probabilities(segment_probabilities)
 
-            result.append(tuple((tuple(quantity), prob) for quantity, prob in zip(scaled_quantity, sampled_proba)))
-        result = list(zip(*result))
-        return result
+    @abstractmethod
+    def _to_quantitative_probabilities(
+        self, segment_probabilities: Dict[Segment, Union[List[Probability], List[ProbabilityWeight]]]
+    ) -> Union[List[QuantitativeProbability], List[QuantitativeProbabilityWeight]]:
+        """
+        Convert the segment probabilities to quantitative probabilities.
+
+        Parameters
+        ----------
+        segment_probabilities : Dict[Segment, Union[List[Probability], List[ProbabilityWeight]]]
+            The probabilities of each segment.
 
-    def _quantitative_update(self, quantities: List[Union[float, np.ndarray]], rewards: List[BinaryReward], **kwargs):
+        Returns
+        -------
+        Union[List[QuantitativeProbability], List[QuantitativeProbabilityWeight]]
+            The quantitative probabilities.
+        """
+
+    def _quantitative_update(
+        self, quantities: List[Union[float, List[float], None]], rewards: List[BinaryReward], **kwargs
+    ):
         """
         Update the model parameters.
 
         Parameters
         ----------
-        quantities : List[Union[float, np.ndarray]]
+        quantities : List[Union[float, List[float], None]],
             The value associated with each action.
         rewards: List[BinaryReward]
             The reward for each sample.
@@ -533,14 +571,14 @@ def _quantitative_update(self, quantities: List[Union[float, np.ndarray]], rewar
         self._update_segmentation(quantities, segments, rewards, **kwargs)
 
     def _map_and_update_segment_models(
-        self, quantities: List[Union[float, np.ndarray]], rewards: List[BinaryReward], **kwargs
+        self, quantities: List[Union[float, List[float], None]], rewards: List[BinaryReward], **kwargs
     ) -> List[Segment]:
         """
         Map and update the segment models.
 
         Parameters
         ----------
-        quantities : List[Union[float, np.ndarray]]
+        quantities : List[Union[float, List[float], None]]
             The value associated with each action.
         rewards: List[BinaryReward]
             The reward for each sample.
@@ -569,13 +607,16 @@ def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], **
             Context for update.
         """
 
-    def _map_values_to_segments(self, quantities: List[Union[float, np.ndarray]]) -> List[Segment]:
+    def _map_values_to_segments(
+        self,
+        quantities: List[Union[float, List[float], None]],
+    ) -> List[Segment]:
         segments = [segment for value in quantities for segment in self.segmented_actions.keys() if value in segment]
         return segments
 
     def _update_segmentation(
         self,
-        quantities: List[Union[float, np.ndarray]],
+        quantities: List[Union[float, List[float], None]],
         segments: List[Segment],
         rewards: List[BinaryReward],
         **kwargs,
@@ -587,14 +628,14 @@ def _update_segmentation(
 
         Parameters
         ----------
-        quantities
-        segments
-        rewards
-        kwargs
-
-        Returns
-        -------
-
+        quantities : List[Union[float, List[float], None]]
+            The value associated with each action.
+        segments : List[Segment]
+            All segments in the model.
+        rewards : List[BinaryReward]
+            Rewards for update.
+        kwargs : Dict[str, Any]
+            Keyword arguments for update.
         """
         segments_counts = Counter(segments)
         num_segments = len(self.sub_actions)
@@ -613,7 +654,7 @@ def _update_segmentation(
     def _merge_adjacent_nuisance_segments(
         self,
         nuisance_segments: List[Segment],
-        quantities: List[Union[float, np.ndarray]],
+        quantities: List[Union[float, List[float], None]],
         segments: List[Segment],
         rewards: List[BinaryReward],
         **kwargs,
@@ -625,7 +666,7 @@ def _merge_adjacent_nuisance_segments(
         ----------
         nuisance_segments : List[Segment]
             List of segments to consider for merging.
-        quantities : List[Union[float, np.ndarray]]
+        quantities : List[Union[float, List[float], None]]
             The value associated with each action.
         segments : List[Segment]
             All segments in the model.
@@ -656,7 +697,7 @@ def _merge_adjacent_nuisance_segments(
     def _split_segments_of_interest(
         self,
         interest_segments: List[Segment],
-        quantities: List[Union[float, np.ndarray]],
+        quantities: List[Union[float, List[float], None]],
         segments: List[Segment],
         rewards: List[BinaryReward],
         **kwargs,
@@ -668,7 +709,7 @@ def _split_segments_of_interest(
         ----------
         interest_segments : List[Segment]
             List of segments to consider for splitting.
-        quantities : List[Union[float, np.ndarray]]
+        quantities : List[Union[float, List[float], None]]
             The value associated with each action.
         segments : List[Segment]
             All segments in the model.
@@ -716,11 +757,11 @@ def is_similar_performance(self, segment1: Segment, segment2: Segment) -> bool:
     def _filter_by_segment(
         self,
         reference_segment: Segment,
-        quantities: List[Union[float, np.ndarray]],
+        quantities: List[Union[float, List[float], None]],
         segments: List[Segment],
         rewards: List[BinaryReward],
         **kwargs,
-    ) -> Tuple[List[Union[float, np.ndarray]], List[BinaryReward], Dict[str, Any]]:
+    ) -> Tuple[List[Union[float, List[float], None]], List[BinaryReward], Dict[str, Any]]:
         """
         Filter and update the segments models.
 
@@ -730,14 +771,14 @@ def _filter_by_segment(
             Reference segment to filter upon.
         segments : List[Segment]
             Segments to filter.
-        quantities : List[Union[float, np.ndarray]]
+        quantities : List[Union[float, List[float], None]]
             Values to filter.
         rewards : List[BinaryReward]
             Rewards to filter.
 
         Returns
         -------
-        filtered_values : List[Union[float, np.ndarray]]
+        filtered_values : List[Union[float, List[float], None]]
             Filtered quantities.
         filtered_rewards : List[BinaryReward]
             Filtered rewards.
@@ -799,10 +840,46 @@ def _init_base_model(self):
         """
         self._base_model = Beta()
 
+    def _to_quantitative_probabilities(
+        self, segment_probabilities: Dict[Segment, List[Probability]]
+    ) -> List[QuantitativeProbability]:
+        """
+        Convert the segment probabilities to quantitative probabilities.
+
+        Parameters
+        ----------
+        segment_probabilities : Dict[Segment, List[Probability]]
+            The probabilities of each segment.
+
+        Returns
+        -------
+        List[QuantitativeProbability]
+            The quantitative probabilities.
+        """
+        result = []
+        max_samples = max(len(probas) for probas in segment_probabilities.values())
+        for sample_idx in range(max_samples):
+
+            def create_probability_function(sample_idx: int) -> QuantitativeProbability:
+                def probability_function(quantity: np.ndarray) -> Probability:
+                    """
+                    Evaluate probability at the given quantity.
+                    """
+                    for segment in segment_probabilities.keys():
+                        if quantity in segment:
+                            segment_probas_for_segment = segment_probabilities[segment]
+                            return segment_probas_for_segment[sample_idx]
+                    return 0.0
+
+                return probability_function
+
+            result.append(create_probability_function(sample_idx))
+        return result
+
     @validate_call
     def _quantitative_update(
         self,
-        quantities: Optional[List[Union[float, List[float], None]]],
+        quantities: List[Union[float, List[float], None]],
         rewards: Union[List[BinaryReward], List[List[BinaryReward]]],
     ):
         """
@@ -926,10 +1003,53 @@ def _init_base_model(self):
         """
         self._base_model = BayesianNeuralNetwork.cold_start(**self.base_model_cold_start_kwargs)
 
+    def _to_quantitative_probabilities(
+        self, segment_probabilities: Dict[Segment, List[ProbabilityWeight]]
+    ) -> List[QuantitativeProbabilityWeight]:
+        """
+        Convert the segment probabilities and weights to quantitative probabilities and weights.
+
+        Parameters
+        ----------
+        segment_probabilities : Dict[Segment, List[ProbabilityWeight]]
+            The probabilities and weights of each segment.
+
+        Returns
+        -------
+        List[QuantitativeProbabilityWeight]
+            The quantitative probabilities and weights.
+        """
+        result = []
+        max_samples = max(len(probas) for probas in segment_probabilities.values())
+        n_outputs = len(next(iter(segment_probabilities.values()))[0])
+        for sample_idx in range(max_samples):
+
+            def create_probability_or_weight_function(
+                sample_idx: NonNegativeInt, output_index: NonNegativeInt
+            ) -> Union[QuantitativeProbability, QuantitativeWeight]:
+                def output_function(quantity: np.ndarray) -> Union[Probability, float]:  # Probability or weight
+                    """
+                    Evaluate output at the given quantity.
+                    """
+                    for segment in segment_probabilities.keys():
+                        if quantity in segment:
+                            segment_probas_for_segment = segment_probabilities[segment]
+                            return segment_probas_for_segment[sample_idx][output_index]  # Probability or weight
+                    return 0.0
+
+                return output_function
+
+            result.append(
+                tuple(
+                    create_probability_or_weight_function(sample_idx, output_index) for output_index in range(n_outputs)
+                )
+            )
+        return result
+
     @validate_call(config=dict(arbitrary_types_allowed=True))
     def _quantitative_update(
         self,
-        quantities: Optional[List[Union[float, List[float], None]]],
+        quantities: List[Union[float, List[float], None]],
         rewards: List[BinaryReward],
         context: ArrayLike,
     ):
diff --git a/pybandits/simulator.py b/pybandits/simulator.py
index 3b30220..9358ffa 100644
--- a/pybandits/simulator.py
+++ b/pybandits/simulator.py
@@ -23,11 +23,10 @@
 import os.path
 import random
 from abc import ABC, abstractmethod
-from functools import cached_property, lru_cache
+from functools import cached_property
 from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 
 import numpy as np
-import optuna
 import pandas as pd
 from bokeh.core.enums import Palette
 from bokeh.layouts import layout
@@ -336,51 +335,6 @@ def _step(
         self._results = pd.concat((self._results, batch_results), ignore_index=True)
         self.mab.update(actions=actions, rewards=rewards, quantities=quantities, **update_kwargs)
 
-    @staticmethod
-    @lru_cache
-    def _maximize_prob_reward(
-        prob_reward_func: Callable[[np.ndarray], Probability], input_dimension: PositiveInt, n_trials: PositiveInt = 100
-    ) -> Probability:
-        """
-        Maximize the probability of reward for the given function.
-
-        Parameters
-        ----------
-        prob_reward_func : Callable[[np.ndarray], Probability]
-            The probability of reward function.
-        input_dimension : PositiveInt
-            The input dimension.
-        n_trials : PositiveInt, defaults to 100
-            The number of otimization trials.
-
-        Returns
-        -------
-        Probability
-            The global maxima of prob_reward_func.
-        """
-
-        def objective(trial):
-            # Sample points from [0,1] for each dimension
-            points = [trial.suggest_float(f"x{i}", 0, 1) for i in range(input_dimension)]
-            return prob_reward_func(np.array(points))
-
-        # Configure TPE sampler with multivariate optimization
-        sampler = optuna.samplers.TPESampler(
-            multivariate=True,  # Enable multivariate optimization
-            group=True,  # Sample joint distribution of parameters
-            constant_liar=True,  # Better parallel optimization handling
-        )
-
-        # Create and configure the study
-        study = optuna.create_study(sampler=sampler, direction="maximize")
-
-        # Run optimization
-        study.optimize(objective, n_jobs=-1, n_trials=n_trials)  # Use all available cores
-        best_value = study.best_value
-        if (not isinstance(best_value, float)) or (best_value < 0) or (best_value > 1):
-            raise ValueError("The best value must be a float in the interval [0, 1].")
-        return best_value
-
     @abstractmethod
     def _draw_rewards(
         self, actions: List[UnifiedActionId], metadata: Dict[str, List], update_kwargs: Dict[str, np.ndarray]
diff --git a/pybandits/smab_simulator.py b/pybandits/smab_simulator.py
index 6c8737a..a56f4e0 100644
--- a/pybandits/smab_simulator.py
+++ b/pybandits/smab_simulator.py
@@ -31,7 +31,11 @@
 from pybandits.quantitative_model import QuantitativeModel
 from pybandits.simulator import Simulator
 from pybandits.smab import BaseSmabBernoulli
-from pybandits.utils import extract_argument_names_from_function
+from pybandits.utils import (
+    OptimizationFailedError,
+    extract_argument_names_from_function,
+    maximize_by_quantity,
+)
 
 #                                        quantity
 ParametricActionProbability = Callable[[np.ndarray], Probability]
@@ -188,13 +192,18 @@ def _finalize_step(self, batch_results: pd.DataFrame, update_kwargs: Dict[str, n
         quantity = batch_results.loc[:, "quantities"]
         selected_prob_reward = [self._extract_ground_truth((a, q)) for a, q in zip(action_id, quantity)]
         batch_results.loc[:, "selected_prob_reward"] = selected_prob_reward
-        max_prob_reward = [
-            max(
-                self._maximize_prob_reward((lambda q: self.probs_reward[a](q)), m.dimension)
-                if isinstance(m, QuantitativeModel)
-                else self.probs_reward[a]
-                for a, m in self.mab.actions.items()
-            )
-        ] * len(batch_results)
+
+        def get_max_prob_for_action(a: ActionId, m) -> float:
+            """Get maximum probability for an action, handling optimization failures."""
+            if isinstance(m, QuantitativeModel):
+                try:
+                    opt_q = maximize_by_quantity(lambda q: self.probs_reward[a](q), m.dimension)
+                    return self.probs_reward[a](opt_q)
+                except OptimizationFailedError as e:
+                    raise ValueError(f"Optimization failed for action {a}: {e}")
+            else:
+                return self.probs_reward[a]
+
+        max_prob_reward = [max(get_max_prob_for_action(a, m) for a, m in self.mab.actions.items())] * len(batch_results)
         batch_results.loc[:, "max_prob_reward"] = max_prob_reward
         return batch_results
diff --git a/pybandits/strategy.py b/pybandits/strategy.py
index e1ac5fe..b517baf 100644
--- a/pybandits/strategy.py
+++ b/pybandits/strategy.py
@@ -22,42 +22,289 @@
 
 from abc import ABC, abstractmethod
 from random import random
-from typing import Dict, List, Optional, TypeVar, Union
+from typing import Any, Callable, ClassVar, Dict, Generator, List, Optional, Type, TypeVar, Union
 
 import numpy as np
-from scipy.stats import ttest_ind_from_stats
+from loguru import logger
 from typing_extensions import Self
 
-from pybandits.base import ActionId, Float01, Probability, PyBanditsBaseModel, UnifiedActionId
+from pybandits.base import ActionId, Float01, PyBanditsBaseModel, UnifiedActionId
 from pybandits.base_model import BaseModel
-from pybandits.model import BayesianNeuralNetworkMOCC, Beta, BetaMOCC
-from pybandits.pydantic_version_compatibility import field_validator, validate_call
+from pybandits.pydantic_version_compatibility import PrivateAttr, field_validator, validate_call
+from pybandits.quantitative_model import QuantitativeModel
+from pybandits.utils import OptimizationFailedError, maximize_by_quantity
 
-StrategyType = TypeVar("StrategyType", bound="Strategy")
+StrategyType = TypeVar("StrategyType", bound="BaseStrategy")
 
 
-class Strategy(PyBanditsBaseModel, ABC):
+class BaseStrategy(PyBanditsBaseModel, ABC):
     """
-    Strategy to select actions in multi-armed bandits.
+    Abstract base strategy for selecting actions in multi-armed bandits.
+
+    This class defines the interface that all bandit strategies must implement.
+    Strategies determine how to select actions based on their estimated rewards
+    and other criteria.
     """
 
+    @validate_call
     @abstractmethod
     def select_action(
-        self, p: Dict[UnifiedActionId, float], actions: Optional[Dict[ActionId, BaseModel]]
+        self,
+        p: Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
+        actions: Dict[ActionId, BaseModel],
+        **kwargs,
     ) -> UnifiedActionId:
         """
-        Select the action.
+        Select an action based on the strategy's selection criteria.
+
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[float, Callable[[np.ndarray], float]]]
+            Dictionary mapping action IDs to either:
+            - float: Fixed probability of positive reward
+            - Callable: Function that computes probability given quantity vector
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their associated models.
+        **kwargs
+            Additional strategy-specific parameters.
+
+        Returns
+        -------
+        UnifiedActionId
+            The selected action ID, either a simple ActionId or a tuple of
+            (ActionId, quantity_vector) for quantitative actions.
         """
 
-    @classmethod
+
+class SingleObjectiveStrategy(BaseStrategy, ABC):
+    """
+    Abstract strategy for single-objective multi-armed bandits.
+
+    This class handles bandits where each action has a single scalar reward.
+    It provides a framework for refining actions based on constraints and
+    selecting the best action according to strategy-specific criteria.
+
+    """
+
+    _dummy_quantitative_action: ClassVar[str] = "dummy_quantitative_action"
+
     @validate_call
-    def numerize_field(cls, v, field_name: str):
-        return v if v is not None else cls.model_fields[field_name].default
+    def select_action(
+        self,
+        p: Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
+        actions: Dict[ActionId, BaseModel],
+        constraint: Optional[Callable[[np.ndarray], bool]] = None,
+    ) -> UnifiedActionId:
+        """
+        Select an action for single-objective optimization.
+
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[float, Callable[[np.ndarray], float]]]
+            Dictionary mapping action IDs to either:
+            - float: Fixed probability of positive reward
+            - Callable: Function that computes probability given quantity vector
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their associated models.
+        constraint : Optional[Callable[[np.ndarray], bool]], default=None
+            Optional constraint function that returns True if a quantity vector
+            satisfies the constraints.
+
+        Returns
+        -------
+        UnifiedActionId
+            The selected action ID, either a simple ActionId or a tuple of
+            (ActionId, quantity_vector) for quantitative actions.
+        """
+        constraint_list = [constraint] if constraint is not None else None
+        refined_p = self.refine_p(p, actions, constraint_list)
+        best_unified_action = self._select_from_refined_actions(refined_p, actions, constraint_list)
+        return best_unified_action
+
+    def refine_p(
+        self,
+        p: Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
+        actions: Dict[ActionId, BaseModel],
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+    ) -> Dict[UnifiedActionId, float]:
+        """
+        Refine action probabilities by evaluating quantitative actions and filtering.
+
+        This method processes both standard and quantitative actions, evaluating
+        quantitative functions at optimal points and filtering actions based on
+        strategy-specific criteria.
+
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[float, Callable[[np.ndarray], float]]]
+            Dictionary of actions and their probability functions or values.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary of actions and their associated models.
+        constraint_list : Optional[List[Callable[[np.ndarray], bool]]]
+            List of constraint functions for quantitative actions.
+
+        Returns
+        -------
+        refined_p: Dict[UnifiedActionId, float]
+            Dictionary mapping unified action IDs to their refined probability values.
+        """
+        if not p or not actions:
+            return {}
+        prerequisites = self.get_prerequisites(p, actions, constraint_list)
+        refined_p = {}
+        for action, proba in p.items():
+            model = actions[action]
+            if callable(proba):  # Quantitative action
+                quantity = self._verify_and_select_from_quantitative_action(
+                    proba, model, constraint_list, **prerequisites
+                )
+                if quantity is not None:
+                    proba_value = proba(quantity)
+                    refined_p[(action, tuple(quantity))] = proba_value
+            elif self._verify_action(proba, **prerequisites):  # Standard action
+                refined_p[action] = proba
+        return refined_p
+
+    @abstractmethod
+    def get_prerequisites(
+        self,
+        p: Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
+        actions: Dict[ActionId, BaseModel],
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+    ) -> Dict[str, Any]:
+        """
+        Compute prerequisites needed for strategy-specific action selection.
+
+        This method allows strategies to pre-compute values needed for their
+        selection logic, such as the best available reward for cost control.
+
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[float, Callable[[np.ndarray], float]]]
+            Dictionary mapping action IDs to probability functions or values.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their associated models.
+        constraint_list : Optional[List[Callable[[np.ndarray], bool]]]
+            List of constraint functions for quantitative actions.
+
+        Returns
+        -------
+        Dict[str, Any]
+            Dictionary of prerequisite values needed by the strategy.
+        """
+
+    @abstractmethod
+    def _select_from_refined_actions(
+        self,
+        refined_p: Dict[UnifiedActionId, float],
+        actions: Dict[ActionId, BaseModel],
+        constraint: Optional[Callable[[np.ndarray], bool]] = None,
+    ) -> UnifiedActionId:
+        """
+        Apply strategy-specific logic to select from refined actions.
+
+        Parameters
+        ----------
+        refined_p : Dict[UnifiedActionId, float]
+            Dictionary of unified action IDs to their refined probability values.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their associated models.
+        constraint : Optional[Callable[[np.ndarray], bool]], default=None
+            Optional constraint function for additional filtering.
+
+        Returns
+        -------
+        UnifiedActionId
+            The selected unified action ID based on strategy criteria.
+        """
+
+    @abstractmethod
+    def _verify_action(self, score: float, **kwargs) -> bool:
+        """
+        Determine if a standard action should be considered for selection.
+
+        Parameters
+        ----------
+        score : float
+            The probability or score associated with the action.
+        **kwargs
+            Additional strategy-specific parameters from prerequisites.
+
+        Returns
+        -------
+        bool
+            True if the action meets the strategy's criteria for consideration,
+            False otherwise.
+        """
+
+    @abstractmethod
+    def _verify_and_select_from_quantitative_action(
+        self,
+        score_func: Callable[[np.ndarray], float],
+        model: BaseModel,
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+        **kwargs,
+    ) -> Optional[np.ndarray]:
+        """
+        Find optimal quantity for a quantitative action if it meets criteria.
+
+        Parameters
+        ----------
+        score_func : Callable[[np.ndarray], float]
+            Function that computes probability/score given a quantity vector.
+        model : BaseModel
+            The model associated with this quantitative action.
+        constraint_list : Optional[List[Callable[[np.ndarray], bool]]]
+            List of constraint functions that quantity must satisfy.
+        **kwargs
+            Additional strategy-specific parameters from prerequisites.
+
+        Returns
+        -------
+        Optional[np.ndarray]
+            Optimal quantity vector if the action meets criteria,
+            None if it should not be considered.
+        """
+
+    def verify_and_select_from_quantitative_action(
+        self,
+        score_func: Callable[[np.ndarray], float],
+        model: BaseModel,
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+    ) -> Optional[np.ndarray]:
+        """
+        Public interface for verifying and selecting from quantitative actions.
+
+        This method wraps the private implementation to provide a clean public API
+        for finding optimal quantities for quantitative actions.
+
+        Parameters
+        ----------
+        score_func : Callable[[np.ndarray], float]
+            Function that computes probability/score given a quantity vector.
+        model : BaseModel
+            The model associated with this quantitative action.
+        constraint_list : Optional[List[Callable[[np.ndarray], bool]]]
+            List of constraint functions that quantity must satisfy.
 
+        Returns
+        -------
+        Optional[np.ndarray]
+            Optimal quantity vector if found, None otherwise.
+        """
+        p = {self._dummy_quantitative_action: score_func}
+        actions = {self._dummy_quantitative_action: model}
+        prerequisites = self.get_prerequisites(p, actions, constraint_list)
+        return self._verify_and_select_from_quantitative_action(score_func, model, constraint_list, **prerequisites)
 
-class ClassicBandit(Strategy):
+
+class ClassicBandit(SingleObjectiveStrategy):
     """
-    Classic multi-armed bandits strategy.
+    Classic Thompson Sampling strategy for multi-armed bandits.
+
+    This strategy implements pure exploitation by always selecting the action
+    with the highest sampled probability of reward. It considers all actions
+    without any filtering or cost considerations.
 
     References
     ----------
@@ -68,231 +315,308 @@ class ClassicBandit(Strategy):
     https://arxiv.org/pdf/1209.3352.pdf
     """
 
-    @validate_call
-    def select_action(
+    def get_prerequisites(
         self,
-        p: Dict[UnifiedActionId, float],
-        actions: Optional[Dict[UnifiedActionId, BaseModel]] = None,
+        p: Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
+        actions: Dict[ActionId, BaseModel],
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+    ) -> Dict[str, Any]:
+        """
+        Compute prerequisites for classic bandit strategy.
+
+        Classic bandits don't require any prerequisites as they consider
+        all actions equally without additional filtering criteria.
+
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[float, Callable[[np.ndarray], float]]]
+            Dictionary mapping action IDs to probability functions or values.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their associated models.
+        constraint_list : Optional[List[Callable[[np.ndarray], bool]]]
+            List of constraint functions (unused in classic bandit).
+
+        Returns
+        -------
+        Dict[str, Any]
+            Empty dictionary as no prerequisites are needed.
+        """
+        return {}
+
+    def _verify_action(self, score: float) -> bool:
+        """
+        Verify if an action should be considered for selection.
+
+        Classic bandits consider all actions regardless of their scores.
+
+        Parameters
+        ----------
+        score : float
+            The probability or score of the action (unused).
+
+        Returns
+        -------
+        bool
+            Always True - all actions are considered in classic bandits.
+        """
+        return True
+
+    def _verify_and_select_from_quantitative_action(
+        self,
+        score_func: Callable[[np.ndarray], float],
+        model: BaseModel,
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+    ) -> Optional[np.ndarray]:
+        """
+        Find optimal quantity for a quantitative action.
+
+        Classic bandits maximize the score function to find the best quantity
+        vector for quantitative actions.
+
+        Parameters
+        ----------
+        score_func : Callable[[np.ndarray], float]
+            Function that computes probability given a quantity vector.
+        model : BaseModel
+            The model associated with this quantitative action.
+        constraint_list : Optional[List[Callable[[np.ndarray], bool]]]
+            List of constraint functions that quantity must satisfy.
+
+        Returns
+        -------
+        Optional[np.ndarray]
+            Optimal quantity vector that maximizes the score function, or None if optimization fails.
+        """
+        try:
+            return maximize_by_quantity(score_func, model.dimension, constraint_list)
+        except OptimizationFailedError:
+            return None
+
+    def _select_from_refined_actions(
+        self,
+        refined_p: Dict[UnifiedActionId, float],
+        actions: Dict[ActionId, BaseModel],
+        constraint: Optional[Callable[[np.ndarray], bool]] = None,
     ) -> UnifiedActionId:
         """
-        Select the action with the highest probability of getting a positive reward.
+        Select the action with the highest probability.
+
+        This implements pure exploitation by choosing the action with the
+        maximum sampled reward probability.
 
         Parameters
         ----------
-        p : Dict[UnifiedActionId, Probability]
-            The dictionary of actions and their sampled probability of getting a positive reward.
-        actions : Optional[Dict[UnifiedActionId, BaseModel]]
-            The dictionary of actions and their associated model.
+        refined_p : Dict[UnifiedActionId, float]
+            Dictionary of unified action IDs to their probability values.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their models (unused).
+        constraint : Optional[Callable[[np.ndarray], bool]], default=None
+            Optional constraint function (unused).
 
         Returns
         -------
-        selected_action: UnifiedActionId
-            The selected action.
+        UnifiedActionId
+            The action with the highest probability value.
         """
-        return max(p, key=p.get)
+        if not refined_p:
+            raise ValueError("Cannot select action from empty refined_p dictionary")
+        best_unified_action = max(refined_p, key=refined_p.get)
+        return best_unified_action
 
 
-class BestActionIdentificationBandit(Strategy):
+class BestActionIdentificationBandit(ClassicBandit):
     """
     Best-Action Identification (BAI) strategy for multi-armed bandits.
 
+    This strategy balances between exploitation and exploration by probabilistically
+    choosing between the best action and the second-best action. It's designed for
+    scenarios where identifying the truly best action is important.
+
+    Parameters
+    ----------
+    exploit_p : Optional[Float01], default=0.5
+        Probability of selecting the best action versus the second-best action.
+        - If exploit_p = 1: Always selects the best action (pure exploitation/greedy).
+        - If exploit_p = 0: Always selects the second-best action.
+        - If exploit_p = 0.5: Equal probability of selecting best or second-best.
+
     References
     ----------
     Simple Bayesian Algorithms for Best-Arm Identification (Russo, 2018)
     https://arxiv.org/pdf/1602.08448.pdf
-
-    Parameters
-    ----------
-    exploit_p: Optional[Float01], 0.5 if not specified
-        Tuning parameter taking value in [0, 1] which specifies the probability of selecting the best or an alternative
-        action.
-        If exploit_p is 1, the bandit always selects the action with the highest probability of
-        getting a positive reward. That is, it behaves as a Greedy strategy.
-        If exploit_p is 0, the bandit always select the action with 2nd highest probability of getting a positive
-        reward.
     """
 
     exploit_p: Optional[Float01] = 0.5
 
     @field_validator("exploit_p", mode="before")
     @classmethod
-    def numerize_exploit_p(cls, v):
-        return cls.numerize_field(v, "exploit_p")
+    def normalize_exploit_p(cls, v):
+        """
+        Normalize the exploit_p field value to its default if None.
+
+        Parameters
+        ----------
+        v : Any
+            The exploit_p value to normalize.
+
+        Returns
+        -------
+        Float01
+            The original value if not None, otherwise 0.5.
+        """
+        return cls._normalize_field(v, "exploit_p")
 
     @validate_call
     def with_exploit_p(self, exploit_p: Optional[Float01]) -> Self:
         """
-        Instantiate a mutated cost control bandit strategy with an altered subsidy factor.
+        Create a new instance with a different exploitation probability.
 
         Parameters
         ----------
-        exploit_p: Optional[Float01], 0.5 if not specified
-            Tuning parameter taking value in [0, 1] which specifies the probability of selecting the best or an alternative
-            action.
-            If exploit_p is 1, the bandit always selects the action with the highest probability of
-            getting a positive reward. That is, it behaves as a Greedy strategy.
-            If exploit_p is 0, the bandit always select the action with 2nd highest probability of getting a positive
-            reward.
+        exploit_p : Optional[Float01], default=0.5
+            Probability of selecting the best action versus the second-best action.
+            - If exploit_p = 1: Always selects the best action (pure exploitation).
+            - If exploit_p = 0: Always selects the second-best action.
+            - If exploit_p = 0.5: Equal probability of selecting best or second-best.
 
         Returns
         -------
         mutated_best_action_identification : BestActionIdentificationBandit
-            The mutated best action identification strategy.
+            A new instance with the specified exploitation probability.
         """
         mutated_best_action_identification = self._with_argument("exploit_p", exploit_p)
         return mutated_best_action_identification
 
-    @validate_call
-    def select_action(
+    def _select_from_refined_actions(
         self,
-        p: Dict[UnifiedActionId, float],
-        actions: Optional[Dict[UnifiedActionId, BaseModel]] = None,
+        refined_p: Dict[UnifiedActionId, float],
+        actions: Dict[ActionId, BaseModel],
+        constraint: Optional[Callable[[np.ndarray], bool]] = None,
     ) -> UnifiedActionId:
         """
-        Select with probability self.exploit_p the best action (i.e. the action with the highest probability of getting
-        a positive reward), and with probability 1-self.exploit_p it returns the second best action (i.e. the action
-        with the second highest probability of getting a positive reward).
+        Select action based on BAI strategy.
+
+        Probabilistically chooses between the best action (with probability exploit_p)
+        and the second-best action (with probability 1 - exploit_p).
 
         Parameters
         ----------
-        p : Dict[UnifiedActionId, Probability]
-            The dictionary of actions and their sampled probability of getting a positive reward.
-        actions : Optional[Dict[UnifiedActionId, BaseModel]]
-            The dictionary of actions and their associated model.
+        refined_p : Dict[UnifiedActionId, float]
+            Dictionary of unified action IDs to their probability values.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their models (unused).
+        constraint : Optional[Callable[[np.ndarray], bool]], default=None
+            Optional constraint function (unused).
 
         Returns
         -------
-        selected_action: UnifiedActionId
-            The selected action.
+        UnifiedActionId
+            Either the best or second-best action based on exploit_p probability.
         """
-        p = p.copy()
-
-        # select the action with the highest probability
-        selected_action = max(p, key=p.get)
+        # First get the best action
+        best_unified_action = super()._select_from_refined_actions(refined_p, actions, constraint)
 
         # exploit with probability exploit_p and not exploit with probability 1-exploit_p
         take_second_max = self.exploit_p <= random() if self.exploit_p != 1 else False
 
         # select the action with the second-highest probability
         if take_second_max:
-            _ = p.pop(selected_action)
-            selected_action = max(p, key=p.get)
+            refined_p.pop(best_unified_action)
 
-        return selected_action
+            # Get the second best action
+            if refined_p:
+                return super()._select_from_refined_actions(refined_p, actions, constraint)
 
-    # TODO: WIP this is valid only for SmabBernoulli
-    def compare_best_actions(self, actions: Dict[UnifiedActionId, Beta]) -> float:
-        """
-        Compare the 2 best actions, hence the 2 actions with the highest expected means of getting a positive reward.
+        return best_unified_action
 
-        Parameters
-        ----------
-        actions: Dict[UnifiedActionId, Beta]
 
-        Returns
-        ----------
-        pvalue: float
-            p-value result of the statistical test.
-        """
-        sorted_actions_mean = sorted([(counter.mean, a) for a, counter in actions.items()], reverse=True)
-
-        _, first_best_action = sorted_actions_mean[0]
-        _, second_best_action = sorted_actions_mean[1]
-
-        _, pvalue = ttest_ind_from_stats(
-            actions[first_best_action].mean,
-            actions[first_best_action].std,
-            actions[first_best_action].count,
-            actions[second_best_action].mean,
-            actions[second_best_action].std,
-            actions[second_best_action].count,
-            alternative="greater",
-        )
-        return pvalue
+class CostControlStrategy(PyBanditsBaseModel):
+    """
+    Mixin class for cost-aware action selection strategies.
 
+    This class provides functionality for strategies that consider action costs
+    in addition to rewards. It defines a feasible action set based on a tolerance
+    threshold and selects the lowest-cost action from this set.
 
-class CostControlStrategy(Strategy, ABC):
-    """
-    Cost Control (CC) strategy for multi-armed bandits.
+    Parameters
+    ----------
+    subsidy_factor : Optional[Float01], default=0.5
+        Tolerance factor defining the feasible action set as those with rewards
+        in the range [(1-subsidy_factor)*max_reward, max_reward].
+        - If subsidy_factor = 1: Selects minimum cost action (ignores rewards).
+        - If subsidy_factor = 0: Selects highest reward action (ignores costs).
+        - If subsidy_factor = 0.5: Balances between reward and cost.
 
-    Bandits are extended to include a control of the action cost. Each action is associated with a predefined "cost".
+    References
+    ----------
+    Thompson Sampling for Contextual Bandit Problems with Auxiliary Safety Constraints (Daulton et al., 2019)
+    https://arxiv.org/abs/1911.00638
+
+    Multi-Armed Bandits with Cost Subsidy (Sinha et al., 2021)
+    https://arxiv.org/abs/2011.01488
     """
 
-    @classmethod
-    @validate_call
-    def _average(cls, p_of_action: Union[Probability, List[Probability]]):
-        return np.mean(p_of_action)
+    subsidy_factor: Optional[Float01] = 0.5
 
+    @field_validator("subsidy_factor", mode="before")
     @classmethod
-    @validate_call
-    def _evaluate_and_select(
-        cls,
-        p: Union[Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]],
-        actions: Dict[UnifiedActionId, BaseModel],
-        feasible_actions: List[UnifiedActionId],
-    ) -> UnifiedActionId:
+    def normalize_subsidy_factor(cls, v):
         """
-        Evaluate the feasible actions and select the one with the minimum cost.
+        Normalize the subsidy_factor field value to its default if None.
 
         Parameters
         ----------
-        p: Union[Dict[UnifiedActionId, Probability], Dict[UnifiedActionId, List[Probability]]]
-            The dictionary of actions and their sampled probability of getting a positive reward.
-        actions: Dict[UnifiedActionId, BaseModel]
-            The dictionary of actions and their associated model.
-        feasible_actions: List[UnifiedActionId]
-            The list of feasible actions.
+        v : Any
+            The subsidy_factor value to normalize.
 
         Returns
         -------
-        selected_action: UnifiedActionId
-            The selected action.
-        """
-        # feasible actions enriched with their characteristics (cost, np.mean(probabilities), action_id)
-        # the negative probability ensures that if we order the actions based on their minimum quantities the one with
-        # higher probability will be selected
-        sortable_actions = [
-            (
-                actions[a[0]].cost(*a[1]) if cls._is_quantitative_action(a) else actions[a].cost,
-                -cls._average(p[a]),
-                str(a),
-            )
-            for a in feasible_actions
-        ]
-
-        # select the action with the min cost (and the highest mean of probabilities in case of cost equality)
-        _, _, selected_action = sorted(sortable_actions)[0]
-
-        # return cheapest action from the set of feasible actions
-        return selected_action
+        Float01
+            The original value if not None, otherwise 0.5.
+        """
+        return cls._normalize_field(v, "subsidy_factor")
 
-    @staticmethod
-    def _is_quantitative_action(action: UnifiedActionId) -> bool:
+    @validate_call
+    def with_subsidy_factor(self, subsidy_factor: Optional[Float01]) -> Self:
         """
-        Check whether action represents a standard action or a quantitive one.
+        Create a new instance with a different subsidy factor.
 
         Parameters
         ----------
-        action : UnifiedActionId
-            The action identifier to validate.
+        subsidy_factor : Optional[Float01], default=0.5
+            Tolerance factor defining the feasible action set.
+            - If subsidy_factor = 1: Selects minimum cost action (ignores rewards).
+            - If subsidy_factor = 0: Selects highest reward action (ignores costs).
+            - Values in between balance reward and cost considerations.
 
         Returns
         -------
-        bool
-            True for quantitive action, False for standard action
+        mutated_cost_control_bandit
+            A new instance with the specified subsidy factor.
         """
-        return isinstance(action, tuple)
+        mutated_cost_control_bandit = self._with_argument("subsidy_factor", subsidy_factor)
+        return mutated_cost_control_bandit
 
 
-class CostControlBandit(CostControlStrategy):
+class CostControlBandit(SingleObjectiveStrategy, CostControlStrategy):
     """
-    Cost Control (CC) strategy for multi-armed bandits.
+    Cost-controlled Thompson Sampling strategy for multi-armed bandits.
 
-    Bandits are extended to include a control of the action cost. Each action is associated with a predefined "cost".
-    At prediction time, the model considers the actions whose expected rewards are above a pre-defined lower bound.
-    Among these actions, the one with the lowest associated cost is recommended. The expected reward interval for
-    feasible actions is defined as [(1-subsidy_factor)*max_p, max_p], where max_p is the highest expected reward sampled
-    value.
+    This strategy extends classic bandits by considering action costs. It first
+    identifies a feasible set of actions whose rewards are within a tolerance of
+    the best reward, then selects the lowest-cost action from this set.
+
+    The feasible action set is defined as those with expected rewards in the range
+    [(1-subsidy_factor)*max_reward, max_reward], where max_reward is the highest
+    sampled reward value.
+
+    Parameters
+    ----------
+    subsidy_factor : Optional[Float01], default=0.5
+        Tolerance factor defining the feasible action set.
+        - If subsidy_factor = 1: Always selects minimum cost action.
+        - If subsidy_factor = 0: Always selects highest reward action (classic bandit).
+        - Values in between balance reward and cost considerations.
 
     References
     ----------
@@ -301,112 +625,293 @@ class CostControlBandit(CostControlStrategy):
 
     Multi-Armed Bandits with Cost Subsidy (Sinha et al., 2021)
     https://arxiv.org/abs/2011.01488
-
-    Parameters
-    ----------
-    subsidy_factor: Optional[Float01], 0.5 if not specified
-        Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions.
-        If subsidy_factor is 1, the bandits always selects the action with the minimum cost.
-        If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive
-        reward (it behaves as a classic Bernoulli bandit).
     """
 
-    subsidy_factor: Optional[Float01] = 0.5
+    def get_prerequisites(
+        self,
+        p: Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
+        actions: Dict[ActionId, BaseModel],
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+    ) -> Dict[str, Any]:
+        """
+        Compute the best available reward for defining the feasible action set.
 
-    @field_validator("subsidy_factor", mode="before")
-    @classmethod
-    def numerize_subsidy_factor(cls, v):
-        return cls.numerize_field(v, "subsidy_factor")
+        This method finds the maximum reward value across all actions, which is
+        used to determine the reward threshold for feasible actions.
 
-    @validate_call
-    def with_subsidy_factor(self, subsidy_factor: Optional[Float01]) -> Self:
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[float, Callable[[np.ndarray], float]]]
+            Dictionary mapping action IDs to probability functions or values.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their associated models.
+        constraint_list : Optional[List[Callable[[np.ndarray], bool]]]
+            List of constraint functions for quantitative actions.
+
+        Returns
+        -------
+        Dict[str, Any]
+            Dictionary containing 'best_value': the maximum reward value.
         """
-        Instantiate a mutated cost control bandit strategy with an altered subsidy factor.
+        classic_bandit = ClassicBandit()
+        best_classic_unified_action = classic_bandit.select_action(p, actions, constraint_list)
+        best_value = (
+            p[best_classic_unified_action]
+            if isinstance(best_classic_unified_action, str)
+            else p[best_classic_unified_action[0]](best_classic_unified_action[1])
+        )
+        return {"best_value": best_value}
+
+    def _select_from_refined_actions(
+        self,
+        refined_p: Dict[UnifiedActionId, float],
+        actions: Dict[ActionId, BaseModel],
+        constraint: Optional[Callable[[np.ndarray], bool]] = None,
+    ) -> UnifiedActionId:
+        """
+        Select the lowest-cost action from the feasible set.
+
+        Actions are sorted primarily by cost (ascending) and secondarily by
+        probability (descending) to break ties.
 
         Parameters
         ----------
-        subsidy_factor : Optional[Float01], 0.5 if not specified
-            Number in [0, 1] to define smallest tolerated probability reward, hence the set of feasible actions.
-            If subsidy_factor is 1, the bandits always selects the action with the minimum cost.
-            If subsidy_factor is 0, the bandits always selects the action with highest probability of getting a positive
-            reward (it behaves as a classic Bernoulli bandit).
+        refined_p : Dict[UnifiedActionId, float]
+            Dictionary of feasible actions and their probability values.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their models (for cost information).
+        constraint : Optional[Callable[[np.ndarray], bool]], default=None
+            Optional constraint function (unused).
 
         Returns
         -------
-        mutated_cost_control_bandit : CostControlBandit
-            The mutated cost control bandit strategy.
+        UnifiedActionId
+            The action with minimum cost among feasible actions.
         """
-        mutated_cost_control_bandit = self._with_argument("subsidy_factor", subsidy_factor)
-        return mutated_cost_control_bandit
 
-    @validate_call
-    def select_action(
-        self, p: Dict[UnifiedActionId, Probability], actions: Dict[UnifiedActionId, BaseModel]
-    ) -> UnifiedActionId:
+        # Apply cost control logic
+        sortable_actions = []
+        for action, proba in refined_p.items():
+            cost = actions[action[0]].cost(action[1]) if isinstance(action, tuple) else actions[action].cost
+            sortable_actions.append((cost, -proba, action))
+
+        if not sortable_actions:
+            return max(refined_p, key=refined_p.get)
+
+        # select the action with the min cost (and the highest mean of probabilities in case of cost equality)
+        _, _, best_unified_action = sorted(sortable_actions)[0]
+
+        # return cheapest action from the set of feasible actions
+        return best_unified_action
+
+    def _verify_action(self, score: float, best_value: float) -> bool:
         """
-        Select the action with the minimum cost among the set of feasible actions (the actions whose expected rewards
-        are above a certain lower bound defined as [(1-subsidy_factor)*max_p, max_p], where max_p is the highest
-        expected reward sampled value.
+        Check if an action's reward is within the feasible threshold.
+
+        An action is feasible if its reward is at least (1-subsidy_factor) times
+        the best available reward.
 
         Parameters
         ----------
-        p: Dict[UnifiedActionId, Probability]
-            The dictionary or actions and their sampled probability of getting a positive reward.
-        actions: Dict[UnifiedActionId, BetaCC]
-            The dictionary or actions and their cost.
+        score : float
+            The reward/probability of the action.
+        best_value : float
+            The maximum reward across all actions.
 
         Returns
         -------
-        selected_action: UnifiedActionId
-            The selected action.
+        bool
+            True if the action's reward is above the threshold, False otherwise.
         """
-        # get the highest expected reward sampled value
-        max_p = max(p.values())
+        return score >= best_value * (1 - self.subsidy_factor)
 
-        # define the set of feasible actions
-        feasible_actions = [a for a in p.keys() if p[a] >= (1 - self.subsidy_factor) * max_p]
+    def _verify_and_select_from_quantitative_action(
+        self,
+        score_func: Callable[[np.ndarray], float],
+        model: BaseModel,
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+        best_value: float,
+    ) -> Optional[np.ndarray]:
+        """
+        Find the minimum-cost quantity that meets the reward threshold.
 
-        selected_action = self._evaluate_and_select(p, actions, feasible_actions)
-        return selected_action
+        This method adds a reward threshold constraint and then minimizes cost
+        subject to all constraints.
 
+        Parameters
+        ----------
+        score_func : Callable[[np.ndarray], float]
+            Function that computes reward given a quantity vector.
+        model : BaseModel
+            The model associated with this quantitative action.
+        constraint_list : Optional[List[Callable[[np.ndarray], bool]]]
+            List of existing constraint functions.
+        best_value : float
+            The maximum reward across all actions.
 
-class MultiObjectiveStrategy(Strategy, ABC):
+        Returns
+        -------
+        Optional[np.ndarray]
+            Optimal quantity vector that minimizes cost while meeting the
+            reward threshold, or None if no feasible solution exists.
+        """
+
+        def cost_control_constraint(x: np.ndarray) -> bool:
+            return score_func(x) >= best_value * (1 - self.subsidy_factor)
+
+        if constraint_list is not None:
+            constraint_list.append(cost_control_constraint)
+        else:
+            constraint_list = [cost_control_constraint]
+        try:
+            return maximize_by_quantity(lambda x: -model.cost(x), model.dimension, constraint_list)
+        except OptimizationFailedError:
+            return None
+
+
+class MultiObjectiveStrategy(BaseStrategy, ABC):
     """
-    Multi Objective Strategy to select actions in multi-armed bandits.
+    Abstract strategy for multi-objective multi-armed bandits.
+
+    This class handles bandits where each action has multiple reward objectives.
+    It selects actions from the Pareto front - the set of non-dominated actions
+    where no other action is better in all objectives.
     """
 
-    @classmethod
+    # Class variable to define how to select the best action for each objective
+    objective_selector_class: ClassVar[Type[SingleObjectiveStrategy]]
+    _objective_selector: SingleObjectiveStrategy = PrivateAttr()
+
+    def __init__(self, **data):
+        super().__init__(**data)
+        self._objective_selector = self.objective_selector_class(**data)
+
     @validate_call
-    def get_pareto_front(cls, p: Dict[UnifiedActionId, List[float]]) -> List[UnifiedActionId]:
+    def select_action(
+        self,
+        p: Dict[ActionId, Union[List[float], Callable[[np.ndarray], List[float]]]],
+        actions: Dict[ActionId, BaseModel],
+    ) -> UnifiedActionId:
+        """
+        Select an action from the Pareto front.
+
+        This method finds all Pareto-optimal actions and randomly selects one,
+        giving equal probability to each non-dominated action.
+
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[List[float], Callable[[np.ndarray], List[float]]]]
+            Dictionary mapping action IDs to either:
+            - List[float]: Fixed reward vector for multiple objectives
+            - Callable: Function that computes reward vector given quantity
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their associated models.
+
+        Returns
+        -------
+        UnifiedActionId
+            A randomly selected action from the Pareto front.
+        """
+        pareto_front = self._get_pareto_front(p=p, actions=actions)
+        return np.random.choice(pareto_front)
+
+    def _get_feasible_solutions(
+        self,
+        p: Dict[ActionId, Union[List[float], Callable[[np.ndarray], List[float]]]],
+        actions: Dict[ActionId, BaseModel],
+    ) -> Dict[UnifiedActionId, List[float]]:
         """
-        Create Pareto optimal set of actions (Pareto front) A* identified as actions that are not dominated by
-        any action out of the set A*.
+        Get feasible solutions for each objective.
 
-        Parameters:
-        -----------
-        p: Dict[UnifiedActionId, Probability]
-            The dictionary or actions and their sampled probability of getting a positive reward for each objective.
+        Applies logic independently to each objective, finding actions that meet the selection that objective.
 
-        Return
-        ------
-        pareto_front: set
-            The list of Pareto optimal actions
+        Parameters
+        ----------
+        p : Dict[ActionId, List[float]]
+            Dictionary mapping action IDs to reward vectors.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their models.
+
+        Returns
+        -------
+        Dict[UnifiedActionId, List[float]]
+            Feasible actions considering logic for each objective.
+        """
+        action_id = list(p.keys())[0]
+        if isinstance(action_id, tuple):
+            action_id = action_id[0]
+        n_objectives = len(actions[action_id].models)
+        feasible_solutions = {}
+        # Separate discrete and quantitative actions
+        discrete_actions = {aid: prob_or_func for aid, prob_or_func in p.items() if not callable(prob_or_func)}
+        quantitative_actions = {aid: prob_or_func for aid, prob_or_func in p.items() if callable(prob_or_func)}
+
+        # For discrete actions, add directly (they already have full reward vectors)
+        feasible_solutions.update(discrete_actions)
+
+        # For quantitative actions, refine per objective
+        if quantitative_actions:
+            for i in range(n_objectives):
+                # Fix closure bug: create a new function that captures i by value
+                def make_objective_extractor(obj_idx):
+                    return lambda x: x[obj_idx]
+
+                objective_p = {action_id: make_objective_extractor(i) for action_id in quantitative_actions.keys()}
+                objective_actions = {
+                    action_id: actions[action_id].models[i] for action_id in quantitative_actions.keys()
+                }
+
+                refined = self._objective_selector.refine_p(objective_p, objective_actions, None)
+
+                # Build multi-objective vectors from per-objective results
+                for unified_action_id in refined.keys():
+                    if unified_action_id not in feasible_solutions:
+                        feasible_solutions[unified_action_id] = quantitative_actions[unified_action_id[0]](
+                            unified_action_id[1]
+                        )
+
+        return feasible_solutions
+
+    def _get_exact_pareto_front(
+        self, p: Dict[UnifiedActionId, List[float]], actions: Dict[ActionId, BaseModel]
+    ) -> List[UnifiedActionId]:
         """
+        Compute the exact Pareto front for discrete action sets.
+
+        An action is Pareto-optimal if no other action dominates it (i.e., is
+        better or equal in all objectives and strictly better in at least one).
+
+        Parameters
+        ----------
+        p : Dict[UnifiedActionId, List[float]]
+            Dictionary mapping unified action IDs to their reward vectors.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their models.
+
+        Returns
+        -------
+        List[UnifiedActionId]
+            List of Pareto-optimal action IDs.
+        """
+        feasible_solutions = self._get_feasible_solutions(p, actions)
         # store non dominated actions
         pareto_front = []
 
-        for this_action in p.keys():
+        for this_action in feasible_solutions.keys():
             is_pareto = True  # we assume that action is Pareto Optimal until proven otherwise
-            other_actions = [a for a in p.keys() if a != this_action]
+            other_actions = [a for a in feasible_solutions.keys() if a != this_action]
 
             for other_action in other_actions:
                 # check if this_action is not dominated by other_action based on
                 # multiple objectives reward prob vectors
                 is_dominated = not (
                     # an action cannot be dominated by an identical one
-                    (p[this_action] == p[other_action])
+                    (feasible_solutions[this_action] == feasible_solutions[other_action])
                     # otherwise, apply the classical definition
-                    or any(p[this_action][i] > p[other_action][i] for i in range(len(p[this_action])))
+                    or any(
+                        feasible_solutions[this_action][i] > feasible_solutions[other_action][i]
+                        for i in range(len(feasible_solutions[this_action]))
+                    )
                 )
 
                 if is_dominated:
@@ -421,72 +926,417 @@ def get_pareto_front(cls, p: Dict[UnifiedActionId, List[float]]) -> List[Unified
 
         return pareto_front
 
+    def _get_approximate_pareto_front(
+        self,
+        p: Dict[ActionId, Union[List[float], Callable[[np.ndarray], List[float]]]],
+        actions: Dict[ActionId, BaseModel],
+        n_divisions: int = 10,
+    ) -> List[UnifiedActionId]:
+        """
+        Approximate the Pareto front for continuous/quantitative actions.
 
-class MultiObjectiveBandit(MultiObjectiveStrategy):
-    """
-    Multi-Objective (MO) strategy for multi-armed bandits.
+        Uses the Normal Constraint method with Das-Dennis weight generation to
+        systematically sample the Pareto front for quantitative actions.
 
-    The reward pertaining to an action is a multidimensional vector instead of a scalar value. In this setting,
-    different actions are compared according to Pareto order between their expected reward vectors, and those actions
-    whose expected rewards are not inferior to that of any other actions are called Pareto optimal actions, all of which
-    constitute the Pareto front.
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[List[float], Callable[[np.ndarray], List[float]]]]
+            Dictionary mapping action IDs to reward vectors or functions.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their models.
+        n_divisions : int, default=10
+            Number of divisions for weight vector generation. Higher values
+            provide better approximation but increase computation.
 
-    References
-    ----------
-    Thompson Sampling for Multi-Objective Multi-Armed Bandits Problem (Yahyaa and Manderick, 2015)
-    https://www.researchgate.net/publication/272823659_Thompson_Sampling_for_Multi-Objective_Multi-Armed_Bandits_Problem
-    """
+        Returns
+        -------
+        List[UnifiedActionId]
+            List of approximately Pareto-optimal actions.
+        """
+        if not p:
+            return []
+
+        approximate_p = {}
+        n_objectives = len(actions[list(p.keys())[0]].models)
+
+        for action_id, prob_or_func in p.items():
+            if callable(prob_or_func):
+                # Quantitative action - find Pareto optimal input points
+                pareto_input_points = self._find_pareto_front_normal_constraint(
+                    prob_or_func, actions[action_id].dimension, n_objectives, n_divisions, actions[action_id]
+                )
+                approximate_p.update(
+                    {(action_id, tuple(input_point)): prob_or_func(input_point) for input_point in pareto_input_points}
+                )
+            else:
+                # Standard action with fixed reward vector
+                approximate_p[action_id] = prob_or_func
+
+        return self._get_exact_pareto_front(approximate_p, actions)
 
     @validate_call
-    def select_action(self, p: Dict[UnifiedActionId, List[float]], **kwargs) -> UnifiedActionId:
+    def _find_pareto_front_normal_constraint(
+        self,
+        func: Callable[[np.ndarray], List[float]],
+        input_dim: int,
+        n_objectives: int,
+        n_divisions: int,
+        model: BaseModel,
+    ) -> List[np.ndarray]:
         """
-        Select an action at random from the Pareto optimal set of action. The Pareto optimal action set (Pareto front)
-        A* is the set of actions not dominated by any other actions not in A*. Dominance relation is established based
-        on the objective reward probabilities vectors.
+        Find Pareto front using Normal Constraint method with Das-Dennis weight generation for a single function.
+
+        This method systematically explores the Pareto front by solving constrained
+        optimization problems with different weight vectors.
 
         Parameters
         ----------
-        p: Dict[ActionId, List[Probability]]
-             The dictionary of actions and their sampled probability of getting a positive reward for each objective.
+        func : Callable[[np.ndarray], List[float]]
+            Function mapping quantity vectors to reward vectors.
+        input_dim : int
+            Dimension of the input quantity vector.
+        n_objectives : int
+            Number of reward objectives.
+        n_divisions : int
+            Number of divisions for weight generation (controls approximation quality).
+        model : BaseModel
+            The model for this quantitative action.
 
         Returns
         -------
-        selected_action: ActionId
-            The selected action.
+        List[np.ndarray]
+            List of Pareto-optimal quantity vectors.
+
+        References
+        ----------
+        The normalized normal constraint method for generating the Pareto frontier (Messac et al., 2003)
+        https://ieeexplore.ieee.org/document/938649
         """
-        return np.random.choice(self.get_pareto_front(p=p))
+        # Step 1: Find anchor points using optimization for each objective
+        anchor_points = [
+            self._objective_selector.verify_and_select_from_quantitative_action(
+                lambda x: func(x)[i], model.models[i], None
+            )
+            for i in range(n_objectives)
+        ]
+        anchor_rewards = [func(anchor_point) for anchor_point in anchor_points]
 
+        anchor_matrix = np.array(anchor_rewards)  # n_objectives x n_objectives
+        anchor_points = np.array(anchor_points)  # n_objectives x input_dim
 
-class MultiObjectiveCostControlBandit(MultiObjectiveStrategy, CostControlStrategy):
-    """
-    Multi-Objective (MO) with Cost Control (CC) strategy for multi-armed bandits.
+        # Step 2: Generate Das-Dennis weight vectors
+        weight_vectors = self._das_dennis_weights(n_objectives, n_divisions)
 
-    This strategy allows the reward to be a multidimensional vector and include a control of the action cost. It merges
-    the Multi-Objective and Cost Control strategies.
-    """
+        # Step 3: For each weight vector, solve NC subproblem
+        nc_solutions = set(tuple(anchor_point) for anchor_point in anchor_points)
+        utopia = np.max(anchor_matrix, axis=0)  # Ideal point
 
-    @validate_call
-    def select_action(
+        for weight in weight_vectors:
+            solution = self._solve_nc_subproblem(func, anchor_matrix, anchor_points, utopia, weight, model)
+            if solution is not None:
+                nc_solutions.add(tuple(solution))
+
+        return list(nc_solutions)
+
+    @staticmethod
+    def _das_dennis_weights(n_objectives: int, n_divisions: int) -> np.ndarray:
+        """
+        Generate Das-Dennis weight vectors for systematic Pareto front sampling.
+
+        Creates uniformly distributed weight vectors on the unit simplex using
+        the Das-Dennis method, which provides systematic coverage of the
+        objective space.
+
+        Parameters
+        ----------
+        n_objectives : int
+            Number of objectives/dimensions.
+        n_divisions : int
+            Number of divisions per dimension. Total weights generated is
+            approximately (n_divisions + n_objectives - 1)! / (n_divisions! * (n_objectives - 1)!).
+
+        Returns
+        -------
+        np.ndarray
+            Array of shape (n_weights, n_objectives) containing weight vectors.
+        """
+
+        def generate_recursive(
+            n_obj: int, n_div: int, current_weight: List[int], depth: int = 0
+        ) -> Generator[np.ndarray, None, None]:
+            """
+            Recursively generate weight combinations for Das-Dennis method.
+
+            Parameters
+            ----------
+            n_obj : int
+                Number of objectives.
+            n_div : int
+                Remaining divisions to allocate.
+            current_weight : List[int]
+                Current partial weight vector being built.
+            depth : int
+                Current recursion depth (objective index).
+
+            Yields
+            ------
+            np.ndarray
+                Normalized weight vectors summing to 1.
+            """
+            if depth == n_obj - 1:
+                current_weight.append(n_div)
+                yield np.array(current_weight) / n_divisions
+                current_weight.pop()
+                return
+
+            for i in range(n_div + 1):
+                current_weight.append(i)
+                yield from generate_recursive(n_obj, n_div - i, current_weight, depth + 1)
+                current_weight.pop()
+
+        weights = list(generate_recursive(n_objectives, n_divisions, []))
+        return np.array(weights)
+
+    def _solve_nc_subproblem(
         self,
-        p: Dict[UnifiedActionId, List[Probability]],
-        actions: Dict[UnifiedActionId, Union[BetaMOCC, BayesianNeuralNetworkMOCC]],
-    ) -> UnifiedActionId:
+        func: Callable,
+        anchor_matrix: np.ndarray,
+        utopia: np.ndarray,
+        weight: np.ndarray,
+        model: BaseModel,
+        epsilon: float = 1e-10,
+    ) -> Optional[np.ndarray]:
+        """
+        Solve a single Normal Constraint optimization subproblem.
+
+        Maximizes a weighted objective while constraining other objectives to lie
+        on the "reference point side" of hyperplanes through anchor points.
+
+        Parameters
+        ----------
+        func : Callable
+            The multi-objective function to optimize.
+        anchor_matrix : np.ndarray
+            Matrix of anchor points (extreme points for each objective).
+        utopia : np.ndarray
+            The utopia point (ideal but typically unachievable point).
+        weight : np.ndarray
+            Weight vector determining the reference point and primary objective.
+        model : BaseModel
+            The model for constraint evaluation.
+        epsilon : float, default=1e-10
+            Numerical tolerance for constraint satisfaction.
+
+        Returns
+        -------
+        Optional[np.ndarray]
+            Optimal solution if found and feasible, None otherwise.
+        """
+        n_objectives = len(weight)
+        primary_obj = np.argmax(weight)
+
+        # Step #1: Create the utopia-based coordinate system
+        # Transform the problem so utopia is at origin
+        transformed_anchors = anchor_matrix - utopia  # Anchors relative to utopia
+
+        # Step #2: Find reference point using weight vector from utopia
+        # This is where the weight ray from utopia intersects the anchor hyperplane
+        reference_point_transformed = self._find_utopia_reference_point(transformed_anchors, weight, epsilon)
+        reference_point = reference_point_transformed + utopia  # Back to original coordinates
+
+        # Step #3: Create Normal Constraint boundaries using utopia geometry
+        constraint_normals = []
+        constraint_intercepts = []
+
+        for i in range(n_objectives):
+            if i != primary_obj:
+                # Normal vector points from anchor_i towards utopia
+                # This creates the "feasible cone" emanating from utopia
+                normal_direction = reference_point - anchor_matrix[i]
+
+                # The constraint hyperplane passes through anchor_i with this normal
+                if np.linalg.norm(normal_direction) > epsilon:
+                    normal = normal_direction / np.linalg.norm(normal_direction)
+                    intercept = np.dot(normal, anchor_matrix[i])
+
+                    constraint_normals.append(normal)
+                    constraint_intercepts.append(intercept)
+
+        def reference_based_constraints(x: np.ndarray) -> bool:
+            """
+            Check if a point satisfies Normal Constraint boundaries.
+
+            Verifies that the function value at x lies on the correct side of all
+            constraint hyperplanes defined by the anchor points and reference point.
+
+            Parameters
+            ----------
+            x : np.ndarray
+                Input point to evaluate.
+
+            Returns
+            -------
+            bool
+                True if all constraints are satisfied, False otherwise.
+            """
+            rewards = np.array(func(x))
+
+            for normal, intercept in zip(constraint_normals, constraint_intercepts):
+                # Constraint: normal · f(x) >= intercept
+                # Geometric meaning: f(x) is on the reference point side of the boundary
+                constraint_value = np.dot(normal, rewards) - intercept
+
+                if constraint_value < -epsilon:  # Tolerance for numerical errors
+                    return False
+            return True
+
+        def objective_function(x: np.ndarray) -> float:
+            """
+            Extract the primary objective value for maximization.
+
+            Parameters
+            ----------
+            x : np.ndarray
+                Input point to evaluate.
+
+            Returns
+            -------
+            float
+                Value of the primary objective at x.
+            """
+            return func(x)[primary_obj]
+
+        # Solve the constrained optimization
+        try:
+            solution = self._objective_selector.verify_and_select_from_quantitative_action(
+                objective_function, model.models[primary_obj], reference_based_constraints
+            )
+
+            if reference_based_constraints(solution):
+                return solution
+            else:
+                return None
+
+        except Exception as e:
+            logger.error(f"NC subproblem failed: {e}")
+            return None
+
+    @classmethod
+    def _find_utopia_reference_point(
+        cls, transformed_anchors: np.ndarray, weight: np.ndarray, epsilon: float
+    ) -> np.ndarray:
         """
-        Select the action with the minimum cost among the Pareto optimal set of action. The Pareto optimal
-        action set (Pareto front) A* is the set of actions not dominated by any other actions not in A*. Dominance
-        relation is established based on the objective reward probabilities vectors.
+        Find the reference point for Normal Constraint method.
+
+        Computes where a ray from the utopia point in the direction of the weight
+        vector intersects the hyperplane defined by the anchor points.
 
         Parameters
         ----------
-        p: Dict[UnifiedActionId, List[Probability]]
-             The dictionary of actions and their sampled probability of getting a positive reward for each objective.
+        transformed_anchors : np.ndarray
+            Anchor points transformed relative to utopia point.
+        weight : np.ndarray
+            Direction vector from utopia point.
+        epsilon : float
+            Numerical tolerance for degeneracy detection.
 
         Returns
         -------
-        selected_action: UnifiedActionId
-            The selected action.
+        np.ndarray
+            The reference point in the transformed coordinate system.
+        """
+
+        # ray-hyperplane intersection
+        anchor_center = np.mean(transformed_anchors, axis=0)
+        anchor_vectors = transformed_anchors - anchor_center
+
+        try:
+            # Find hyperplane normal
+            U, _, _ = np.linalg.svd(anchor_vectors.T, full_matrices=True)
+            hyperplane_normal = U[:, -1]
+
+            # Ray intersection
+            numerator = np.dot(hyperplane_normal, anchor_center)
+            denominator = np.dot(hyperplane_normal, weight)
+
+            if abs(denominator) > epsilon:
+                t = numerator / denominator
+                intersection = t * weight
+                return intersection
+            else:
+                return np.dot(weight, transformed_anchors)
+
+        except np.linalg.LinAlgError:
+            return np.dot(weight, transformed_anchors)
+
+    def _get_pareto_front(
+        self,
+        p: Dict[ActionId, Union[List[float], List[Callable[[np.ndarray], float]]]],
+        actions: Dict[ActionId, BaseModel],
+    ) -> List[UnifiedActionId]:
         """
-        pareto_set = self.get_pareto_front(p=p)
+        Compute the Pareto front, using exact or approximate methods as appropriate.
+
+        Automatically selects between exact computation (for discrete actions) and
+        approximation (when quantitative actions are present).
+
+        Parameters
+        ----------
+        p : Dict[ActionId, Union[List[float], List[Callable[[np.ndarray], float]]]]
+            Dictionary mapping action IDs to reward vectors or functions.
+        actions : Dict[ActionId, BaseModel]
+            Dictionary mapping action IDs to their models.
+
+        Returns
+        -------
+        List[UnifiedActionId]
+            List of Pareto-optimal actions.
+        """
+        includes_quantitative_actions = any(isinstance(actions[a], QuantitativeModel) for a in p.keys())
+        return (
+            self._get_approximate_pareto_front(p, actions)
+            if includes_quantitative_actions
+            else self._get_exact_pareto_front(p, actions)
+        )
+
+
+class MultiObjectiveBandit(MultiObjectiveStrategy):
+    """
+    Multi-objective Thompson Sampling strategy for multi-armed bandits.
+
+    This strategy handles vector-valued rewards where each action produces multiple
+    reward outcomes. Actions are selected from the Pareto front - the set of
+    non-dominated actions where no other action is superior in all objectives.
+
+    The strategy uses Thompson Sampling for exploration by sampling from posterior
+    distributions and then selecting uniformly from the resulting Pareto front.
+
+
+
+    References
+    ----------
+    Thompson Sampling for Multi-Objective Multi-Armed Bandits Problem (Yahyaa and Manderick, 2015)
+    https://www.researchgate.net/publication/272823659_Thompson_Sampling_for_Multi-Objective_Multi-Armed_Bandits_Problem
+    """
+
+    # Use ClassicBandit's selection strategy for finding extreme points
+    objective_selector_class: ClassVar[Type[SingleObjectiveStrategy]] = ClassicBandit
+
+
+class MultiObjectiveCostControlBandit(MultiObjectiveStrategy, CostControlStrategy):
+    """
+    Multi-objective strategy with cost control for multi-armed bandits.
+
+    Combines multi-objective optimization with cost awareness. For each objective,
+    identifies actions within a tolerance of the best reward, then considers only
+    the lowest-cost actions from these feasible sets when computing the Pareto front.
+
+    This strategy is useful when actions have both multiple reward objectives and
+    associated costs, requiring a balance between Pareto-optimality and cost efficiency.
+
+
+
+    """
 
-        selected_action = self._evaluate_and_select(p, actions, pareto_set)
-        return selected_action
+    # Use CostControlBandit's selection strategy for finding extreme points
+    objective_selector_class: ClassVar[Type[SingleObjectiveStrategy]] = CostControlBandit
diff --git a/pybandits/utils.py b/pybandits/utils.py
index c9b1230..10bc8db 100644
--- a/pybandits/utils.py
+++ b/pybandits/utils.py
@@ -25,6 +25,7 @@
 from types import ModuleType
 from typing import Callable, List, Optional, Tuple
 
+import numpy as np
 from bokeh.io import curdoc, output_file, output_notebook, save, show
 from bokeh.models import InlineStyleSheet, TabPanel, Tabs
 
@@ -36,7 +37,27 @@
     _IPYTHON_AVAILABLE = False
     get_ipython = None  # type: ignore
 
-from pybandits.pydantic_version_compatibility import validate_call
+
+from loguru import logger
+from scipy.optimize import NonlinearConstraint, differential_evolution
+
+from pybandits.pydantic_version_compatibility import PositiveInt, validate_call
+
+
+class OptimizationFailedError(Exception):
+    """Exception raised when optimization fails to converge."""
+
+    def __init__(self, message: str) -> None:
+        """
+        Initialize the exception.
+
+        Parameters
+        ----------
+        message : str
+            Error message describing why optimization failed.
+        """
+        super().__init__(message)
+        self.message = message
 
 
 @validate_call
@@ -142,3 +163,76 @@ def visualize_via_bokeh(output_path: Optional[str], tabs: List[TabPanel]):
 class classproperty(property):
     def __get__(self, instance, owner):
         return self.fget(owner)
+
+
+def maximize_by_quantity(
+    quantity_score_func: Callable[[np.ndarray], float],
+    dimension: PositiveInt,
+    constraint: Optional[List[Callable[[np.ndarray], bool]]] = None,
+    n_trials: PositiveInt = 10000,
+) -> np.ndarray:
+    """
+    Maximize the quantity score for the given function.
+
+    Parameters
+    ----------
+    quantity_score_func : Callable[[np.ndarray], float]
+        The quantity score function.
+    dimension : PositiveInt
+        The quantity vector dimension.
+    constraint : Optional[List[Callable[[np.ndarray], bool]]]
+        The constraint functions.
+    n_trials : PositiveInt, defaults to 10000
+        The number of optimization trials.
+
+    Returns
+    -------
+    np.ndarray
+        The global maxima coordinates of quantity_score_func.
+
+    Raises
+    ------
+    OptimizationFailedError
+        If the optimization fails to converge.
+    """
+    bounds = [(0, 1) for _ in range(dimension)]
+
+    # Convert constraint to scipy format if provided
+    if constraint is not None:
+        constraints = []
+        for constraint_func in constraint:
+
+            def scipy_constraint_func(x, func=constraint_func):
+                # Return positive if constraint satisfied, negative if violated
+                return 1.0 if func(x) else -1.0
+
+            constraints.append(NonlinearConstraint(scipy_constraint_func, 0, np.inf))
+    else:
+        constraints = None
+
+    # Differential Evolution parameters
+    de_params = {
+        "func": lambda x: -quantity_score_func(x),  # Minimize negative = maximize
+        "bounds": bounds,
+        "maxiter": max(100, n_trials // 10),  # Ensure minimum iterations for convergence
+        "popsize": 15,  # Population size multiplier (total pop = popsize * len(bounds))
+        "atol": 1e-6,  # Relaxed tolerance for boundary convergence
+        "tol": 1e-6,  # Relaxed tolerance for boundary convergence
+        "strategy": "best1bin",  # Good balance of exploration/exploitation
+        "mutation": (0.5, 1),  # Mutation factor range
+        "recombination": 0.7,  # Crossover probability
+        "polish": True,  # Local polish with L-BFGS-B
+        "disp": False,
+    }
+
+    # Only add constraints if they exist
+    if constraints is not None:
+        de_params["constraints"] = constraints
+    result = differential_evolution(**de_params)
+
+    if result.success:
+        return result.x
+    else:
+        error_message = f"Optimization failed: {result.message}"
+        logger.warning(error_message)
+        raise OptimizationFailedError(error_message)
diff --git a/pyproject.toml b/pyproject.toml
index 3a669f9..43f6cec 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pybandits"
-version = "4.1.2"
+version = "4.2.0"
 description = "Python Multi-Armed Bandit Library"
 authors = [
     "Dario d'Andrea <dariod@playtika.com>",
diff --git a/tests/test_cmab.py b/tests/test_cmab.py
index 7718628..f5eb589 100644
--- a/tests/test_cmab.py
+++ b/tests/test_cmab.py
@@ -184,7 +184,8 @@ def mock_update(models: Union[List[BaseBayesianNeuralNetwork], BaseBayesianNeura
 
 
 def _quantitative_cost(x, cost):
-    return x**cost
+    s = sum(x)
+    return s**cost if s >= 0 else 1e10
 
 
 @dataclass
@@ -712,6 +713,24 @@ def test_predict(
     diff,
     monkeymodule,
 ):
+    def mock_maximize_by_quantity(quantity_score_func, dimension, constraint=None, n_trials=10000):
+        """Mock maximize_by_quantity to return a quick result."""
+        return np.random.random(dimension)
+
+    monkeymodule.setattr(pybandits.strategy, "maximize_by_quantity", mock_maximize_by_quantity)
+
+    if config.cmab_class in (CmabBernoulliMO, CmabBernoulliMOCC):
+
+        def mock_find_pareto_front_normal_constraint(self, func, input_dim, n_objectives, n_divisions, model):
+            """Mock _find_pareto_front_normal_constraint to return a quick result."""
+            return [np.random.random(input_dim) for _ in range(min(3, n_divisions))]
+
+        monkeymodule.setattr(
+            pybandits.strategy.MultiObjectiveStrategy,
+            "_find_pareto_front_normal_constraint",
+            mock_find_pareto_front_normal_constraint,
+        )
+
     # Create CMAB instance
     cmab = config.create_cmab_and_actions(
         action_ids,
diff --git a/tests/test_cmab_simulator.py b/tests/test_cmab_simulator.py
index 3c68bcf..e9a2efc 100644
--- a/tests/test_cmab_simulator.py
+++ b/tests/test_cmab_simulator.py
@@ -36,7 +36,7 @@
 from pybandits.cmab_simulator import CmabSimulator
 from pybandits.model import BayesianLogisticRegression
 from pybandits.quantitative_model import CmabZoomingModel
-from tests.utils import FakeApproximation
+from tests.utils import mock_update, sample_with_replacement, to_unified_action_id
 
 
 def test_mismatched_probs_reward_columns(mocker: MockerFixture, groups=(0, 1)):
@@ -237,6 +237,16 @@ def _get_context_and_group(n_features, n_updates, batch_size, num_groups) -> Tup
     return context, group
 
 
+def mock_predict(self, context, *args, **kwargs):
+    n_samples = len(context)
+    action_ids = [to_unified_action_id(action_id, model) for action_id, model in self.actions.items()]
+    return (
+        sample_with_replacement(action_ids, n_samples),
+        [{action_id: np.random.random() for action_id in action_ids} for _ in range(n_samples)],
+        [{action_id: np.random.randn() for action_id, model in self.actions.items()} for _ in range(n_samples)],
+    )
+
+
 @settings(deadline=None)
 @given(
     st.just(["a1", "a2"]),
@@ -254,21 +264,11 @@ def _get_context_and_group(n_features, n_updates, batch_size, num_groups) -> Tup
     st.sampled_from([None, 2]),
 )
 def test_cmab_e2e_simulation_with_default_arguments(monkeymodule, action_ids, models, n_features, num_groups):
-    monkeymodule.setattr(
-        pybandits.model,
-        "fit",
-        lambda *args, **kwargs: FakeApproximation(n_features=n_features),
-    )
-    monkeymodule.setattr(
-        pybandits.model,
-        "sample",
-        FakeApproximation(n_features=n_features).sample,
-    )
-    monkeymodule.setattr(
-        CmabSimulator,
-        "_maximize_prob_reward",
-        lambda *args, **kwargs: np.random.random(),
-    )
+    monkeymodule.setattr(pybandits.utils, "maximize_by_quantity", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.cmab_simulator, "maximize_by_quantity", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.cmab.CmabBernoulli, "predict", mock_predict)
+    monkeymodule.setattr(pybandits.cmab.CmabBernoulli, "update", mock_update)
+
     mab = CmabBernoulli(actions=dict(zip(action_ids, models)))
     n_updates = CmabSimulator.model_fields["n_updates"].default
     batch_size = CmabSimulator.model_fields["batch_size"].default
@@ -332,22 +332,11 @@ def test_cmab_e2e_simulation_with_non_default_args(
     num_groups,
     monkeymodule,
 ):
-    monkeymodule.setattr(
-        pybandits.model,
-        "fit",
-        lambda *args, **kwargs: FakeApproximation(n_features=n_features),
-    )
-    monkeymodule.setattr(
-        pybandits.model,
-        "sample",
-        FakeApproximation(n_features=n_features).sample,
-    )
+    monkeymodule.setattr(pybandits.utils, "maximize_by_quantity", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.cmab_simulator, "maximize_by_quantity", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.cmab.CmabBernoulli, "predict", mock_predict)
+    monkeymodule.setattr(pybandits.cmab.CmabBernoulli, "update", mock_update)
 
-    monkeymodule.setattr(
-        CmabSimulator,
-        "_maximize_prob_reward",
-        lambda *args, **kwargs: np.random.random(),
-    )
     context, group = _get_context_and_group(n_features, n_updates, batch_size, num_groups)
     mab = CmabBernoulli(actions=dict(zip(action_ids, models)))
     if visualize and not save:
diff --git a/tests/test_quantitative_model.py b/tests/test_quantitative_model.py
index 49d2b91..6b0679a 100644
--- a/tests/test_quantitative_model.py
+++ b/tests/test_quantitative_model.py
@@ -23,7 +23,7 @@
 import functools
 import json
 from copy import deepcopy
-from typing import List, Union
+from typing import Callable, Dict, List, Optional, Union
 
 import numpy as np
 import pytest
@@ -32,7 +32,7 @@
 from hypothesis.extra.numpy import arrays
 
 import pybandits
-from pybandits.base import BinaryReward
+from pybandits.base import BinaryReward, QuantitativeProbability, UnifiedProbability
 from pybandits.model import Beta
 from pybandits.pydantic_version_compatibility import NonNegativeFloat
 from pybandits.quantitative_model import (
@@ -91,12 +91,20 @@ def test_add_nonadjacent_segments():
 
 
 class DummyZoomingModel(ZoomingModel):
+    cost: Optional[Callable[[Union[float, NonNegativeFloat]], NonNegativeFloat]] = None
+
     def _init_base_model(self):
         self._base_model = Beta()
 
     def _inner_update(self, segments: List[Segment], rewards: List[BinaryReward], **kwargs):
         pass
 
+    def _to_quantitative_probabilities(
+        self, segment_probabilities: Dict[Segment, List[UnifiedProbability]]
+    ) -> List[QuantitativeProbability]:
+        max_samples = max(len(probas) for probas in segment_probabilities.values())
+        return [lambda x: np.random.uniform(0, 1) for _ in range(max_samples)]
+
 
 # Model initialization with valid parameters creates correct number of segments
 @given(dimension=st.integers(min_value=1, max_value=3))
@@ -144,14 +152,19 @@ def test_similar_segments_merge():
     assert len(model.sub_actions) == 4
 
 
-# Sample_proba returns valid probability for each segment
-def test_sample_proba_returns_valid_probabilities(n_samples=100):
+# Sample_proba returns valid probability functions
+def test_sample_proba_returns_valid_probabilities(n_samples=100, test_locations=((0.1,), (0.5,), (0.9,))):
     model = DummyZoomingModel.cold_start(dimension=1)
-    probs = model.sample_proba(n_samples=n_samples)
-    assert all(len(prob) == len(model.sub_actions) for prob in probs)
-    assert len(probs) == n_samples
-    assert all(0 <= prob[1] <= 1 for sample in probs for prob in sample)
-    assert all(0 <= v <= 1 for sample in probs for prob in sample for v in prob[0])
+    prob_functions = model.sample_proba(n_samples=n_samples)
+    assert len(prob_functions) == n_samples
+
+    # Test that each function is callable and returns valid probabilities
+
+    for prob_func in prob_functions:
+        assert callable(prob_func)
+        for location in test_locations:
+            prob = prob_func(location)
+            assert 0 <= prob <= 1
 
 
 # Update with empty rewards/quantities list
@@ -277,14 +290,20 @@ def test_updates_smab_zooming_model_correctly(rewards, quantities, dimension):
     assert model.segmented_actions != initial_segments
 
 
-# Test SmabZoomingModel sample_proba returns valid probabilities
-def test_sample_proba_returns_valid_probabilities_smab(dimension=1, n_samples=100):
+# Test SmabZoomingModel sample_proba returns valid probability functions
+def test_sample_proba_returns_valid_probabilities_smab(
+    dimension=1, n_samples=100, test_locations=((0.1,), (0.5,), (0.9,))
+):
     model = SmabZoomingModel.cold_start(dimension=dimension)
-    probas = model.sample_proba(n_samples=n_samples)
-    for proba in probas:
-        for (q,), p in proba:
-            assert 0 <= q <= 1
-            assert 0 <= p <= 1
+    prob_functions = model.sample_proba(n_samples=n_samples)
+    assert len(prob_functions) == n_samples
+
+    # Test that each function is callable and returns valid probabilities
+    for prob_func in prob_functions:
+        assert callable(prob_func)
+        for location in test_locations:
+            prob = prob_func(location)
+            assert 0 <= prob <= 1
 
 
 # Test CmabZoomingModel initialization with valid parameters
@@ -322,19 +341,23 @@ def test_updates_cmab_zooming_model_correctly(rewards, quantities, context, dime
     assert model.segmented_actions != initial_segments
 
 
-# Test CmabZoomingModel sample_proba returns valid probabilities
+# Test CmabZoomingModel sample_proba returns valid probability functions
 @given(
     context=arrays(np.float64, shape=(5, 1), elements=st.floats(min_value=0, max_value=1)),
     dimension=st.just(1),
     n_features=st.just(1),
+    location=st.floats(min_value=0, max_value=1),
 )
-def test_sample_proba_returns_valid_probabilities_cmab(context, dimension, n_features):
+def test_sample_proba_returns_valid_probabilities_cmab(context, dimension, n_features, location):
     model = CmabZoomingModel.cold_start(dimension=dimension, base_model_cold_start_kwargs={"n_features": n_features})
-    probas = model.sample_proba(context=context)
-    for proba in probas:
-        for (q,), (p, _) in proba:
-            assert 0 <= q <= 1
-            assert 0 <= p <= 1
+    prob_functions = model.sample_proba(context=context)
+    assert len(prob_functions) == len(context)
+
+    # Test that each function is callable and returns valid probabilities
+    for prob_weight_func in prob_functions:
+        assert all(callable(func) for func in prob_weight_func)
+        prob, weight = (func(np.atleast_1d(location)) for func in prob_weight_func)
+        assert 0 <= prob <= 1
 
 
 ########################################################################################################################
diff --git a/tests/test_simulator.py b/tests/test_simulator.py
index ebb639a..3a762de 100644
--- a/tests/test_simulator.py
+++ b/tests/test_simulator.py
@@ -24,10 +24,9 @@
 from typing import Dict, List, Tuple
 
 import numpy as np
-import optuna
 import pandas as pd
 import pytest
-from hypothesis import given, settings
+from hypothesis import given
 from hypothesis import strategies as st
 from pytest_mock import MockerFixture
 
@@ -74,96 +73,6 @@ def check_value_error(probs_reward):
     check_value_error(probs_reward)
 
 
-# Test _maximize_prob_reward
-
-
-# Returns maximum probability value from optimization study
-def test_returns_maximum_probability(mocker):
-    mock_study = mocker.Mock()
-    mock_study.best_value = 0.8
-    mocker.patch("optuna.create_study", return_value=mock_study)
-
-    def prob_func(x):
-        return 0.8
-
-    result = Simulator._maximize_prob_reward(prob_func, 1)
-
-    assert result == 0.8
-
-
-# Correctly samples points from [0,1] range
-@given(st.integers(min_value=1, max_value=2))
-@settings(deadline=None, max_examples=10)
-def test_samples_points_in_valid_range(dimension):
-    def prob_func(x):
-        assert all(0 <= xi <= 1 for xi in x)
-        return 0.5
-
-    Simulator._maximize_prob_reward(prob_func, dimension)
-
-
-def test_maximization_result(atol=1e-2):
-    maximum = Simulator._maximize_prob_reward(lambda x: 1 - x**2, 1)
-    assert np.isclose(maximum, 1.0, atol=atol)
-    maximum = Simulator._maximize_prob_reward(lambda x: x**2, 1)
-    assert np.isclose(maximum, 1.0, atol=atol)
-
-
-# Uses TPE sampler with multivariate optimization
-def test_uses_tpe_sampler_config(mocker):
-    sampler_spy = mocker.spy(optuna.samplers, "TPESampler")
-
-    def prob_func(x):
-        return 0.5
-
-    Simulator._maximize_prob_reward(prob_func, 1)
-
-    assert sampler_spy.call_args.kwargs["multivariate"]
-    assert sampler_spy.call_args.kwargs["group"]
-
-
-# Function is decorated with lru_cache
-def test_lru_cache_memoization():
-    def prob_func(x):
-        return 0.5
-
-    result1 = Simulator._maximize_prob_reward(prob_func, 1)
-    result2 = Simulator._maximize_prob_reward(prob_func, 1)
-
-    assert result1 == result2
-    assert hasattr(Simulator._maximize_prob_reward, "cache_info")
-
-
-# Probability reward function raises exceptions
-def test_probability_function_exceptions():
-    def failing_prob_func(x):
-        raise RuntimeError("Function failed")
-
-    with pytest.raises(RuntimeError):
-        Simulator._maximize_prob_reward(failing_prob_func, 1)
-
-
-# Input dimension is large
-def test_large_input_dimension(dimension=30):
-    def prob_func(x):
-        return 0.5
-
-    Simulator._maximize_prob_reward(prob_func, dimension)
-
-
-# Optimization fails to converge
-def test_optimization_convergence_failure(mocker):
-    mock_study = mocker.Mock()
-    mock_study.best_value = None
-    mocker.patch("optuna.create_study", return_value=mock_study)
-
-    def prob_func(x):
-        return 0.5
-
-    with pytest.raises(ValueError):
-        Simulator._maximize_prob_reward(prob_func, 1)
-
-
 # Test _generate_prob_reward
 
 
diff --git a/tests/test_smab.py b/tests/test_smab.py
index db39063..2b70ac9 100644
--- a/tests/test_smab.py
+++ b/tests/test_smab.py
@@ -30,6 +30,7 @@
 from hypothesis import strategies as st
 from pydantic.dataclasses import dataclass
 
+import pybandits
 from pybandits.actions_manager import SmabModelType
 from pybandits.base import ActionId, Float01, PositiveProbability
 from pybandits.base_model import BaseModel
@@ -75,7 +76,8 @@ def mock_update(models: List[SmabModelType], diff, monkeymodule, label=0):
 
 
 def _quantitative_cost(x, cost):
-    return x**cost
+    s = sum(x)
+    return s**cost if s >= 0 else 1e10
 
 
 @dataclass
@@ -454,6 +456,24 @@ def test_predict(
     diff,
     monkeymodule,
 ):
+    def mock_maximize_by_quantity(quantity_score_func, dimension, constraint=None, n_trials=10000):
+        """Mock maximize_by_quantity to return a quick result."""
+        return np.random.random(dimension)
+
+    monkeymodule.setattr(pybandits.strategy, "maximize_by_quantity", mock_maximize_by_quantity)
+
+    if config.smab_class in (SmabBernoulliMO, SmabBernoulliMOCC):
+
+        def mock_find_pareto_front_normal_constraint(self, func, input_dim, n_objectives, n_divisions, model):
+            """Mock _find_pareto_front_normal_constraint to return a quick result."""
+            return [np.random.random(input_dim) for _ in range(min(3, n_divisions))]
+
+        monkeymodule.setattr(
+            pybandits.strategy.MultiObjectiveStrategy,
+            "_find_pareto_front_normal_constraint",
+            mock_find_pareto_front_normal_constraint,
+        )
+
     # Create SMAB instance
     smab = config.create_smab_and_actions(action_ids, epsilon, delta, costs, n_objectives, exploit_p, subsidy_factor)[0]
 
@@ -484,8 +504,6 @@ def test_predict(
             len({action[0] if isinstance(action, tuple) else action for action in prob}) == len(action_ids)
             for prob in probs
         )
-        if isinstance(smab, SmabBernoulli) and not smab.epsilon:
-            assert all(prob[best_action] == max(prob.values()) for best_action, prob in zip(best_actions, probs))
 
 
 @settings(deadline=None)
diff --git a/tests/test_smab_simulator.py b/tests/test_smab_simulator.py
index 1f986f7..4f0ea5a 100644
--- a/tests/test_smab_simulator.py
+++ b/tests/test_smab_simulator.py
@@ -31,12 +31,14 @@
 from hypothesis import strategies as st
 from pytest_mock.plugin import MockerFixture
 
+import pybandits
 from pybandits.actions_manager import SmabModelType
 from pybandits.base_model import BaseModel
 from pybandits.model import Beta
 from pybandits.quantitative_model import QuantitativeModel, SmabZoomingModel
 from pybandits.smab import SmabBernoulli
 from pybandits.smab_simulator import SmabSimulator
+from tests.utils import mock_update, sample_with_replacement, to_unified_action_id
 
 
 def test_mismatched_probs_reward_columns(mocker: MockerFixture):
@@ -185,6 +187,14 @@ def test_validate_probs_reward_values(
             SmabSimulator._validate_probs_reward_values(probability, is_quantitative_action)
 
 
+def mock_predict(self, n_samples, *args, **kwargs):
+    action_ids = [to_unified_action_id(action_id, model) for action_id, model in self.actions.items()]
+    return (
+        sample_with_replacement(action_ids, n_samples),
+        [{action_id: np.random.random() for action_id in action_ids} for _ in range(n_samples)],
+    )
+
+
 @settings(deadline=None)
 @given(
     action_ids=st.just(["a1", "a2"]),
@@ -205,7 +215,11 @@ def test_smab_e2e_simulation_with_default_args(
     monkeymodule : MonkeyPatch
         Pytest monkeypatch fixture for modifying module attributes.
     """
-    monkeymodule.setattr(SmabSimulator, "_maximize_prob_reward", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.utils, "maximize_by_quantity", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.smab_simulator, "maximize_by_quantity", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.smab.SmabBernoulli, "predict", mock_predict)
+    monkeymodule.setattr(pybandits.smab.SmabBernoulli, "update", mock_update)
+
     mab = SmabBernoulli(actions=dict(zip(action_ids, models)))
     with TemporaryDirectory() as path:
         simulator = SmabSimulator(mab=mab, visualize=True, save=True, path=path)
@@ -268,11 +282,11 @@ def test_smab_e2e_simulation_with_non_default_args(
     monkeymodule : MonkeyPatch
         Pytest monkeypatch fixture for modifying module attributes.
     """
-    monkeymodule.setattr(
-        SmabSimulator,
-        "_maximize_prob_reward",
-        lambda *args, **kwargs: np.random.random(),
-    )
+    monkeymodule.setattr(pybandits.utils, "maximize_by_quantity", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.smab_simulator, "maximize_by_quantity", lambda *args, **kwargs: np.random.random())
+    monkeymodule.setattr(pybandits.smab.SmabBernoulli, "predict", mock_predict)
+    monkeymodule.setattr(pybandits.smab.SmabBernoulli, "update", mock_update)
+
     mab = SmabBernoulli(actions=dict(zip(action_ids, models)))
     if visualize and not save:
         with pytest.raises(ValueError):
diff --git a/tests/test_strategy.py b/tests/test_strategy.py
index 7ecc7c9..ea6d17b 100644
--- a/tests/test_strategy.py
+++ b/tests/test_strategy.py
@@ -20,56 +20,503 @@
 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 # SOFTWARE.
 
-from typing import Dict, List
+from typing import Callable, Dict, List, Optional, Tuple, Union
+from unittest.mock import MagicMock, patch
 
 import numpy as np
 import pytest
-from hypothesis import given
+from hypothesis import assume, given, settings
 from hypothesis import strategies as st
+from pytest_mock import MockerFixture
 
-from pybandits.base import ActionId, Probability
+from pybandits.base import ActionId, BaseModel, Probability, UnifiedActionId
 from pybandits.model import Beta, BetaCC, BetaMOCC
 from pybandits.pydantic_version_compatibility import ValidationError
+from pybandits.quantitative_model import QuantitativeModel
 from pybandits.strategy import (
+    BaseStrategy,
     BestActionIdentificationBandit,
     ClassicBandit,
     CostControlBandit,
+    CostControlStrategy,
     MultiObjectiveBandit,
     MultiObjectiveCostControlBandit,
     MultiObjectiveStrategy,
+    SingleObjectiveStrategy,
 )
+from tests.test_quantitative_model import DummyZoomingModel
 
 ########################################################################################################################
+# Helper functions and fixtures
+
+
+# Test constants
+DEFAULT_COST = 10.0
+DEFAULT_DIMENSION = 2
+DEFAULT_PROBABILITY = 0.5
+DEFAULT_EXPLOIT_P = 0.5
+DEFAULT_SUBSIDY_FACTOR = 0.5
+
+
+def create_mock_quantitative_model(
+    dimension: int = DEFAULT_DIMENSION, cost_value: float = DEFAULT_COST
+) -> QuantitativeModel:
+    """Create a mock quantitative model for testing.
+
+    Parameters
+    ----------
+    dimension : int
+        Dimension of the quantitative model.
+    cost_value : float
+        Cost value to return.
+
+    Returns
+    -------
+    QuantitativeModel
+        Mock quantitative model.
+    """
+    model = MagicMock(spec=QuantitativeModel)
+    model.dimension = dimension
+    model.cost = MagicMock(return_value=cost_value)
+    return model
+
+
+def create_mock_base_model(cost_value: float = DEFAULT_COST) -> BaseModel:
+    """Create a mock base model for testing.
+
+    Parameters
+    ----------
+    cost_value : float
+        Cost value for the model.
+
+    Returns
+    -------
+    BaseModel
+        Mock base model.
+    """
+    model = MagicMock(spec=BaseModel)
+    model.cost = cost_value
+    return model
+
+
+@st.composite
+def action_probability_pairs(draw, min_actions: int = 2, max_actions: int = 10, allow_callables: bool = False):
+    """Generate action-probability pairs for testing.
+
+    Parameters
+    ----------
+    draw : function
+        Hypothesis draw function.
+    min_actions : int
+        Minimum number of actions.
+    max_actions : int
+        Maximum number of actions.
+    allow_callables : bool
+        Whether to include callable probabilities.
+
+    Returns
+    -------
+    tuple
+        (action_dict, probability_dict, model_dict)
+    """
+    n_actions = draw(st.integers(min_value=min_actions, max_value=max_actions))
+    action_ids = [f"action_{i}" for i in range(n_actions)]
+
+    probabilities = {}
+    models = {}
+
+    for action_id in action_ids:
+        cost_value = np.random.random()
+        probability_value = np.random.random()
+        if allow_callables and draw(st.booleans()):
+            # Create a callable probability
+            probabilities[action_id] = lambda x, p=probability_value: p
+            models[action_id] = DummyZoomingModel.cold_start(dimension=DEFAULT_DIMENSION, cost=lambda x: cost_value)
+        else:
+            # Create a fixed probability
+            probabilities[action_id] = probability_value
+            models[action_id] = BetaCC(cost=cost_value)
+
+    return action_ids, probabilities, models
+
+
+@pytest.fixture(scope="session")
+def prob_dict_two_actions() -> Dict[str, float]:
+    """Fixture providing a probability dictionary with two actions.
+
+    Returns
+    -------
+    Dict[str, float]
+        Probability dictionary with two actions (a1: 0.5, a2: 0.7).
+    """
+    return {"a1": 0.5, "a2": 0.7}
+
+
+@pytest.fixture(scope="session")
+def prob_dict_three_actions() -> Dict[str, float]:
+    """Fixture providing a probability dictionary with three actions.
+
+    Returns
+    -------
+    Dict[str, float]
+        Probability dictionary with three actions (a1: 0.5, a2: 0.7, a3: 0.3).
+    """
+    return {"a1": 0.5, "a2": 0.7, "a3": 0.3}
+
+
+@pytest.fixture(scope="session")
+def prob_dict_single_action() -> Dict[str, float]:
+    """Fixture providing a probability dictionary with a single action.
+
+    Returns
+    -------
+    Dict[str, float]
+        Probability dictionary with one action (a1: 0.5).
+    """
+    return {"a1": 0.5}
 
+
+########################################################################################################################
+# BaseStrategy tests
+
+
+class ConcreteStrategy(BaseStrategy):
+    """Concrete implementation of BaseStrategy for testing."""
+
+    def select_action(
+        self,
+        p: Dict[ActionId, Union[float, Callable[[np.ndarray], float]]],
+        actions: Dict[ActionId, BaseModel],
+        **kwargs,
+    ) -> UnifiedActionId:
+        """Select the first action."""
+        return list(p.keys())[0]
+
+
+def test_base_strategy_abstract():
+    """Test that BaseStrategy cannot be instantiated directly."""
+    with pytest.raises(TypeError):
+        BaseStrategy()
+
+
+def test_base_strategy_concrete_implementation(prob_dict_two_actions: Dict[str, float], expected_result: str = "a1"):
+    """Test that concrete implementations of BaseStrategy work.
+
+    Parameters
+    ----------
+    prob_dict_two_actions : Dict[str, float]
+        Probability dictionary with two actions.
+    expected_result : str
+        Expected result of the strategy.
+    """
+    strategy = ConcreteStrategy()
+    p = prob_dict_two_actions
+    actions = {action_id: BetaCC(cost=DEFAULT_COST) for action_id in prob_dict_two_actions.keys()}
+
+    result = strategy.select_action(p, actions)
+    assert result == expected_result
+
+
+########################################################################################################################
+# SingleObjectiveStrategy tests
+
+
+class ConcreteSingleObjectiveStrategy(SingleObjectiveStrategy):
+    """Concrete implementation of SingleObjectiveStrategy for testing."""
+
+    def get_prerequisites(
+        self,
+        p: Dict[ActionId, Union[float, Callable]],
+        actions: Dict[ActionId, BaseModel],
+        constraint_list: Optional[List[Callable]],
+    ) -> Dict[str, any]:
+        """Return empty prerequisites."""
+        return {"test_value": 42}
+
+    def _verify_action(self, score: float, **kwargs) -> bool:
+        """Accept all actions."""
+        return True
+
+    def _verify_and_select_from_quantitative_action(
+        self,
+        score_func: Callable[[np.ndarray], float],
+        model: BaseModel,
+        constraint_list: Optional[List[Callable[[np.ndarray], bool]]],
+        **kwargs,
+    ) -> Optional[np.ndarray]:
+        """Return a simple quantity vector."""
+        return np.array([0.5, 0.5])
+
+    def _select_from_refined_actions(
+        self,
+        refined_p: Dict[UnifiedActionId, float],
+        actions: Dict[ActionId, BaseModel],
+        constraint: Optional[Callable[[np.ndarray], bool]] = None,
+    ) -> UnifiedActionId:
+        """Select the first action."""
+        return list(refined_p.keys())[0] if refined_p else None
+
+
+def test_single_objective_strategy_abstract():
+    """Test that SingleObjectiveStrategy cannot be instantiated directly."""
+    with pytest.raises(TypeError):
+        SingleObjectiveStrategy()
+
+
+def test_single_objective_strategy_select_action(prob_dict_two_actions: Dict[str, float]):
+    """Test SingleObjectiveStrategy select_action method.
+
+    Parameters
+    ----------
+    prob_dict_two_actions : Dict[str, float]
+        Probability dictionary with two actions.
+    """
+    strategy = ConcreteSingleObjectiveStrategy()
+    p = prob_dict_two_actions
+    actions = {action_id: BetaCC(cost=DEFAULT_COST) for action_id in prob_dict_two_actions.keys()}
+
+    result = strategy.select_action(p, actions)
+    assert result in p.keys()
+
+
+@pytest.mark.parametrize("constraint_returns", [True, False])
+def test_single_objective_strategy_with_constraints(
+    constraint_returns: bool, prob_dict_single_action: Dict[str, float], expected_result: str = "a1"
+):
+    """Test SingleObjectiveStrategy with constraints.
+
+    Parameters
+    ----------
+    constraint_returns : bool
+        Whether the constraint should return True or False.
+    prob_dict_single_action : Dict[str, float]
+        Probability dictionary with one action.
+    expected_result : str
+        Expected result of the strategy.
+    """
+    strategy = ConcreteSingleObjectiveStrategy()
+    p = prob_dict_single_action
+    actions = {action_id: BetaCC(cost=DEFAULT_COST) for action_id in prob_dict_single_action.keys()}
+
+    def constraint(x):
+        return constraint_returns
+
+    result = strategy.select_action(p, actions, constraint)
+
+    assert result == expected_result
+
+
+def test_single_objective_strategy_refine_p_with_quantitative(
+    prob_a1: float = 0.5, prob_a2: float = 0.7, prob_a3: float = 0.3
+):
+    """Test refine_p with quantitative actions.
+
+    Parameters
+    ----------
+    prob_a1 : float
+        Probability for regular action a1.
+    prob_a2 : float
+        Probability for quantitative action a2.
+    prob_a3 : float
+        Probability for quantitative action a3.
+    """
+    strategy = ConcreteSingleObjectiveStrategy()
+
+    # Mix of regular and quantitative actions
+    p = {"a1": prob_a1, "a2": lambda x: prob_a2, "a3": lambda x: prob_a3}
+    actions = {
+        "a1": BetaCC(cost=DEFAULT_COST),
+        "a2": create_mock_quantitative_model(),
+        "a3": create_mock_quantitative_model(),
+    }
+
+    refined_p = strategy.refine_p(p, actions, None)
+
+    # Check that regular action is preserved
+    assert "a1" in refined_p
+    assert refined_p["a1"] == prob_a1
+
+    # Check that quantitative actions are converted to tuples
+    quantitative_keys = [k for k in refined_p.keys() if isinstance(k, tuple)]
+    assert len(quantitative_keys) == 2
+
+    for key in quantitative_keys:
+        assert key[0] in ["a2", "a3"]
+        assert isinstance(key[1], tuple)
+
+
+def test_single_objective_strategy_verify_and_select_public_method(
+    model_dimension: int = 3, expected_result_length: int = 2
+):
+    """Test the public verify_and_select_from_quantitative_action method.
+
+    Parameters
+    ----------
+    model_dimension : int
+        Dimension of the quantitative model.
+    expected_result_length : int
+        Expected length of the result array.
+    """
+    strategy = ConcreteSingleObjectiveStrategy()
+
+    model = create_mock_quantitative_model(dimension=model_dimension)
+    constraint_list = [lambda x: np.all(x >= 0)]
+
+    result = strategy.verify_and_select_from_quantitative_action(sum, model, constraint_list)
+
+    assert result is not None
+    assert isinstance(result, np.ndarray)
+    assert len(result) == expected_result_length
+
+
+########################################################################################################################
 # ClassicBandit
 
 
 def test_can_init_classic_bandit():
-    ClassicBandit()
+    """Test that ClassicBandit can be initialized."""
+    bandit = ClassicBandit()
+    assert isinstance(bandit, SingleObjectiveStrategy)
+    assert isinstance(bandit, BaseStrategy)
 
 
 @given(
-    st.lists(st.text(min_size=1), min_size=2, unique=True),
-    st.lists(st.floats(min_value=0, max_value=1, allow_infinity=False, allow_nan=False), min_size=2),
+    st.lists(st.text(min_size=1, max_size=10), min_size=2, max_size=5, unique=True),
+    st.lists(st.floats(min_value=0, max_value=1, allow_infinity=False, allow_nan=False), min_size=2, max_size=5),
 )
+@settings(max_examples=10)
 def test_select_action_classic_bandit(a_list_str, a_list_float):
+    """Test ClassicBandit selects action with highest probability.
+
+    Parameters
+    ----------
+    a_list_str : list
+        List of action IDs.
+    a_list_float : list
+        List of probabilities.
+    """
+    assume(len(a_list_str) == len(a_list_float))
     p = dict(zip(a_list_str, a_list_float))
+    actions = {action_id: BetaCC(cost=DEFAULT_COST) for action_id in a_list_str}
 
     c = ClassicBandit()
-    assert max(p, key=p.get) == c.select_action(p=p)
+    assert max(p, key=p.get) == c.select_action(p=p, actions=actions)
 
 
-########################################################################################################################
+def test_classic_bandit_prerequisites(prob_dict_single_action: Dict[str, float]):
+    """Test that ClassicBandit returns empty prerequisites.
+
+    Parameters
+    ----------
+    prob_dict_single_action : Dict[str, float]
+        Probability dictionary with one action.
+    """
+    bandit = ClassicBandit()
+    p = prob_dict_single_action
+    actions = {"a1": BetaCC(cost=DEFAULT_COST)}
+
+    prerequisites = bandit.get_prerequisites(p, actions, None)
+    assert prerequisites == {}
+
+
+def test_classic_bandit_verify_action():
+    """Test that ClassicBandit accepts all actions."""
+    bandit = ClassicBandit()
 
+    # Should always return True
+    assert bandit._verify_action(0.0)
+    assert bandit._verify_action(0.5)
+    assert bandit._verify_action(1.0)
 
+
+def test_classic_bandit_quantitative_action(dimension: int = 2, expected_result: np.ndarray = np.array([1.0, 1.0])):
+    """Test ClassicBandit handles quantitative actions.
+
+    Parameters
+    ----------
+    mock_maximize : MagicMock
+        Mock for maximize_by_quantity function.
+    """
+
+    bandit = ClassicBandit()
+    model = create_mock_quantitative_model(dimension=dimension)
+
+    result = bandit._verify_and_select_from_quantitative_action(sum, model, None)
+    assert np.allclose(result, expected_result, atol=1e-3)
+
+
+@pytest.mark.parametrize(
+    "n_actions,n_quantitative",
+    [
+        (3, 0),  # All regular actions
+        (3, 1),  # Mix of regular and quantitative
+        (3, 3),  # All quantitative actions
+    ],
+)
+def test_classic_bandit_mixed_actions(
+    n_actions: int,
+    n_quantitative: int,
+    return_value: np.ndarray = np.array([0.5, 0.5]),
+    base_prob: float = 0.5,
+    prob_increment: float = 0.1,
+):
+    """Test ClassicBandit with mixed regular and quantitative actions.
+
+    Parameters
+    ----------
+    n_actions : int
+        Total number of actions.
+    n_quantitative : int
+        Number of quantitative actions.
+    return_value : np.ndarray
+        Return value for mock maximize function.
+    base_prob : float
+        Base probability value for actions.
+    prob_increment : float
+        Probability increment per action index.
+    """
+    bandit = ClassicBandit()
+    p = {}
+    actions = {}
+
+    for i in range(n_actions):
+        action_id = f"a{i}"
+        if i < n_quantitative:
+            p[action_id] = lambda x, val=base_prob + i * prob_increment: val
+            actions[action_id] = create_mock_quantitative_model()
+        else:
+            p[action_id] = base_prob + i * prob_increment
+            actions[action_id] = BetaCC(cost=DEFAULT_COST)
+
+    # Patch where it's used (strategy module) not where it's defined (utils module)
+    with patch("pybandits.strategy.maximize_by_quantity") as mock_maximize:
+        mock_maximize.return_value = return_value
+        result = bandit.select_action(p, actions)
+
+        assert result is not None
+        if n_quantitative:
+            assert mock_maximize.call_count == n_quantitative, (
+                f"Expected {n_quantitative} calls but got {mock_maximize.call_count}"
+            )
+
+
+########################################################################################################################
 # BestActionIdentificationBandit
 
 
 @given(st.floats())
-def test_can_init_best_action_identification(a_float):
+def test_can_init_best_action_identification(a_float: float):
+    """Test BestActionIdentificationBandit initialization.
+
+    Parameters
+    ----------
+    a_float : float
+        Test value for exploit_p.
+    """
     # init default params
     b = BestActionIdentificationBandit()
     assert b.exploit_p == 0.5
+    assert isinstance(b, ClassicBandit)
 
     # init with input arguments
     if a_float < 0 or a_float > 1 or np.isnan(a_float) or np.isinf(a_float):
@@ -81,7 +528,14 @@ def test_can_init_best_action_identification(a_float):
 
 
 @given(st.floats())
-def test_with_exploit_p(a_float):
+def test_with_exploit_p(a_float: float):
+    """Test BestActionIdentificationBandit with_exploit_p method.
+
+    Parameters
+    ----------
+    a_float : float
+        Test value for exploit_p.
+    """
     b = BestActionIdentificationBandit()
 
     # set with invalid float
@@ -96,66 +550,197 @@ def test_with_exploit_p(a_float):
 
 
 @given(
-    st.lists(st.text(min_size=1), min_size=2, unique=True),
-    st.lists(st.floats(min_value=0, max_value=1, allow_infinity=False, allow_nan=False), min_size=2),
+    st.lists(st.text(min_size=1, max_size=10), min_size=2, max_size=5, unique=True),
+    st.lists(st.floats(min_value=0, max_value=1, allow_infinity=False, allow_nan=False), min_size=2, max_size=5),
 )
-def test_select_action(a_list_str, a_list_float):
+@settings(max_examples=10)
+def test_select_action_bai(a_list_str, a_list_float):
+    """Test BestActionIdentificationBandit select_action method.
+
+    Parameters
+    ----------
+    a_list_str : list
+        List of action IDs.
+    a_list_float : list
+        List of probabilities.
+    """
+    assume(len(a_list_str) == len(a_list_float))
     p = dict(zip(a_list_str, a_list_float))
+    actions = {action_id: BetaCC(cost=DEFAULT_COST) for action_id in a_list_str}
+
     b = BestActionIdentificationBandit()
-    b.select_action(p=p)
+    result = b.select_action(p=p, actions=actions)
+    assert result in p.keys()
 
 
-@given(
-    st.floats(min_value=0, max_value=1, allow_infinity=False, allow_nan=False),
-    st.floats(min_value=0, max_value=1, allow_infinity=False, allow_nan=False),
-    st.floats(min_value=0, max_value=1, allow_infinity=False, allow_nan=False),
+@pytest.mark.parametrize(
+    "exploit_p,should_be_best",
+    [
+        (1.0, True),  # Always select best
+        (0.0, False),  # Always select second-best
+    ],
 )
-def test_select_action_logic(a_float1, a_float2, a_float3):
-    p = {"a1": a_float1, "a2": a_float2, "a3": a_float3}
-
-    b = BestActionIdentificationBandit(exploit_p=1)
-    # if exploit_p factor is 1 => return the action with 1st highest prob (max)
-    assert max(p, key=p.get) == b.select_action(p=p)
+def test_bai_selection_logic(
+    exploit_p: float,
+    should_be_best: bool,
+    mocker: MockerFixture,
+    prob_a1: float = 0.3,
+    prob_a2: float = 0.7,
+    prob_a3: float = 0.5,
+    random_value: float = 0.5,
+):
+    """Test BAI selection logic with different exploit_p values.
+
+    Parameters
+    ----------
+    exploit_p : float
+        Exploitation probability.
+    should_be_best : bool
+        Whether the best action should be selected.
+    mocker : MockerFixture
+        Pytest mocker fixture.
+    prob_a1 : float
+        Probability for action a1.
+    prob_a2 : float
+        Probability for action a2.
+    prob_a3 : float
+        Probability for action a3.
+    random_value : float
+        Mocked random value for selection control.
+    """
+    # Mock random to control selection
+    mocker.patch("pybandits.strategy.random", return_value=random_value)
+
+    p = {"a1": prob_a1, "a2": prob_a2, "a3": prob_a3}
+    actions = {action_id: BetaCC(cost=DEFAULT_COST) for action_id in p.keys()}
+
+    b = BestActionIdentificationBandit(exploit_p=exploit_p)
+    result = b.select_action(p=p, actions=actions)
+
+    if should_be_best:
+        assert result == "a2"  # Highest probability
+    else:
+        assert result == "a3"  # Second highest
 
-    # if exploit_p factor is 0 => return the action with 2nd highest prob (not 1st highest prob)
-    mutated_b = b.with_exploit_p(exploit_p=0)
-    assert max(p, key=p.get) != mutated_b.select_action(p=p)
-    assert sorted(p.items(), key=lambda x: x[1], reverse=True)[1][0] == mutated_b.select_action(p=p)
 
+def test_bai_all_probs_equal(equal_prob: float = 0.5, exploit_p_max: float = 1.0, exploit_p_min: float = 0.0):
+    """Test BAI behavior when all probabilities are equal.
 
-def test_select_action_logic_all_probs_equal():
-    p = {"a1": 0.5, "a2": 0.5, "a3": 0.5}
+    Parameters
+    ----------
+    equal_prob : float
+        Equal probability value for all actions.
+    exploit_p_max : float
+        Maximum exploit probability value.
+    exploit_p_min : float
+        Minimum exploit probability value.
+    """
+    p = {"a1": equal_prob, "a2": equal_prob, "a3": equal_prob}
+    actions = {action_id: BetaCC(cost=DEFAULT_COST) for action_id in p.keys()}
 
-    b = BestActionIdentificationBandit(exploit_p=1)
+    b = BestActionIdentificationBandit(exploit_p=exploit_p_max)
     # if exploit_p is 1 and all probs are equal => return the action with 1st highest prob (max)
-    assert "a1" == b.select_action(p=p)
+    assert "a1" == b.select_action(p=p, actions=actions)
 
     # if exploit_p is 0 => return the action with 2nd highest prob (not 1st highest prob)
-    mutated_b = b.with_exploit_p(exploit_p=0)
-    assert "a2" == mutated_b.select_action(p=p)
+    mutated_b = b.with_exploit_p(exploit_p=exploit_p_min)
+    assert "a2" == mutated_b.select_action(p=p, actions=actions)
 
 
-@given(st.builds(Beta), st.builds(Beta), st.builds(Beta))
-def test_compare_best_action(b1, b2, b3):
-    b = BestActionIdentificationBandit()
-    actions = {"a1": b1, "a2": b2, "a3": b3}
+@given(
+    exploit_p=st.floats(min_value=0.01, max_value=0.99), expected_result1=st.just("a1"), expected_result2=st.just("a2")
+)
+def test_bai_probabilistic_selection(
+    exploit_p: float, expected_result1: str, expected_result2: str, prob_dict_three_actions: Dict[str, float]
+):
+    """Test BAI probabilistic selection between best and second-best.
+
+    Parameters
+    ----------
+    exploit_p : float
+        Exploitation probability.
+    prob_dict_three_actions : Dict[str, float]
+        Probability dictionary with three actions.
+    expected_result1 : str
+        Expected result of the strategy when random > exploit_p.
+    expected_result2 : str
+        Expected result of the strategy when random <= exploit_p.
+        Expected result of the strategy.
+    """
+    p = prob_dict_three_actions
+    actions = {action_id: BetaCC(cost=DEFAULT_COST) for action_id in p.keys()}
+
+    b = BestActionIdentificationBandit(exploit_p=exploit_p)
+
+    # Test that selection respects probability
+    with patch("pybandits.strategy.random") as mock_random:
+        # When random > exploit_p, should select second best
+        mock_random.return_value = exploit_p + 0.01
+        assert b.select_action(p=p, actions=actions) == expected_result1
+
+        # When random <= exploit_p, should select best
+        mock_random.return_value = exploit_p - 0.01
+        assert b.select_action(p=p, actions=actions) == expected_result2
 
-    pval = b.compare_best_actions(actions=actions)
 
-    assert pval > 0
+########################################################################################################################
+# CostControlStrategy tests
 
 
-########################################################################################################################
+def test_cost_control_strategy_mixin(default_subsidy_factor: float = 0.5, new_subsidy_factor: float = 0.7):
+    """Test CostControlStrategy as a mixin.
+
+    Parameters
+    ----------
+    default_subsidy_factor : float
+        Default subsidy factor value.
+    new_subsidy_factor : float
+        New subsidy factor value for mutation test.
+    """
+    strategy = CostControlStrategy()
+    assert strategy.subsidy_factor == default_subsidy_factor
+
+    # Test with_subsidy_factor
+    mutated = strategy.with_subsidy_factor(new_subsidy_factor)
+    assert mutated.subsidy_factor == new_subsidy_factor
+    assert mutated is not strategy
+
+
+@given(st.floats())
+def test_cost_control_strategy_validation(subsidy_factor: float):
+    """Test CostControlStrategy subsidy_factor validation.
+
+    Parameters
+    ----------
+    subsidy_factor : float
+        Test value for subsidy_factor.
+    """
+    if 0 <= subsidy_factor <= 1 and not (np.isnan(subsidy_factor) or np.isinf(subsidy_factor)):
+        strategy = CostControlStrategy(subsidy_factor=subsidy_factor)
+        assert strategy.subsidy_factor == subsidy_factor
+    else:
+        with pytest.raises(ValidationError):
+            CostControlStrategy(subsidy_factor=subsidy_factor)
 
 
+########################################################################################################################
 # CostControlBandit
 
 
 @given(st.floats())
-def test_can_init_cost_control(a_float):
+def test_can_init_cost_control(a_float: float):
+    """Test CostControlBandit initialization.
+
+    Parameters
+    ----------
+    a_float : float
+        Test value for subsidy_factor.
+    """
     # init with default arguments
     c = CostControlBandit()
     assert c.subsidy_factor == 0.5
+    assert isinstance(c, SingleObjectiveStrategy)
+    assert isinstance(c, CostControlStrategy)
 
     # init with input arguments
     if a_float < 0 or a_float > 1 or np.isnan(a_float) or np.isinf(a_float):
@@ -167,7 +752,14 @@ def test_can_init_cost_control(a_float):
 
 
 @given(st.floats())
-def test_with_subsidy_factor(a_float):
+def test_with_subsidy_factor(a_float: float):
+    """Test CostControlBandit with_subsidy_factor method.
+
+    Parameters
+    ----------
+    a_float : float
+        Test value for subsidy_factor.
+    """
     c = CostControlBandit()
 
     # set with invalid float
@@ -182,202 +774,689 @@ def test_with_subsidy_factor(a_float):
 
 
 @given(
-    st.lists(st.text(min_size=1), min_size=1),
-    st.lists(st.floats(min_value=0, allow_infinity=False, allow_nan=False), min_size=1),
+    st.lists(st.text(min_size=1, max_size=10), min_size=1, max_size=3, unique=True),
+    st.lists(st.floats(min_value=0, max_value=100, allow_infinity=False, allow_nan=False), min_size=1, max_size=3),
 )
+@settings(max_examples=10)
 def test_select_action_cc(a_list_str, a_list_float):
+    """Test CostControlBandit select_action method.
+
+    Parameters
+    ----------
+    a_list_str : list
+        List of action IDs.
+    a_list_float : list
+        List of costs.
+    """
+    assume(len(a_list_str) == len(a_list_float))
     a_list_float_0_1 = [float(i) / (sum(a_list_float) + 1) for i in a_list_float]
 
     p = dict(zip(a_list_str, a_list_float_0_1))
     a = dict(zip(a_list_str, [BetaCC(cost=c) for c in a_list_float]))
 
     c = CostControlBandit()
-    c.select_action(p=p, actions=a)
+    result = c.select_action(p=p, actions=a)
+    assert result in p.keys()
 
 
-def test_select_action_logic_cc():
+@pytest.mark.parametrize(
+    "subsidy_factor,expected_action",
+    [
+        (1.0, "a4"),  # Min cost action with highest prob among same cost
+        (0.0, "a2"),  # Highest probability action
+        (0.5, "a5"),  # Cheapest feasible action
+    ],
+)
+def test_cost_control_logic(subsidy_factor: float, expected_action: str):
+    """Test CostControlBandit selection logic with different subsidy factors.
+
+    Parameters
+    ----------
+    subsidy_factor : float
+        Subsidy factor for cost control.
+    expected_action : str
+        Expected selected action.
+    """
     actions_cost = {"a1": 10, "a2": 30, "a3": 20, "a4": 10, "a5": 20}
     p = {"a1": 0.1, "a2": 0.8, "a3": 0.6, "a4": 0.2, "a5": 0.65}
 
-    actions = {
-        "a1": BetaCC(cost=actions_cost["a1"]),
-        "a2": BetaCC(cost=actions_cost["a2"]),
-        "a3": BetaCC(cost=actions_cost["a3"]),
-        "a4": BetaCC(cost=actions_cost["a4"]),
-        "a5": BetaCC(cost=actions_cost["a5"]),
-    }
-
-    c = CostControlBandit(subsidy_factor=1)
-    # if subsidy_factor is 1 => return the action with min cost and the highest sampled probability
-    assert "a4" == c.select_action(p=p, actions=actions)
-
-    # if subsidy_factor is 0 => return the action with highest p (classic bandit)
-    mutated_c = c.with_subsidy_factor(subsidy_factor=0)
-    assert "a2" == mutated_c.select_action(p=p, actions=actions)
+    actions = {action_id: BetaCC(cost=cost) for action_id, cost in actions_cost.items()}
 
-    # otherwise, return the cheapest feasible action with the highest sampled probability
-    mutated_c = c.with_subsidy_factor(subsidy_factor=0.5)
-    assert "a5" == mutated_c.select_action(p=p, actions=actions)
+    c = CostControlBandit(subsidy_factor=subsidy_factor)
+    assert c.select_action(p=p, actions=actions) == expected_action
 
 
 @given(
     st.lists(st.floats(min_value=0, max_value=1, allow_infinity=False, allow_nan=False), min_size=3, max_size=3),
-    st.lists(
-        st.floats(min_value=0, allow_infinity=False, allow_nan=False),
-        min_size=3,
-        max_size=3,
-    ),
+    st.lists(st.floats(min_value=0, max_value=100, allow_infinity=False, allow_nan=False), min_size=3, max_size=3),
 )
-def test_select_action_logic_corner_cases(a_list_p, a_list_cost):
+def test_cost_control_corner_cases(a_list_p, a_list_cost):
+    """Test CostControlBandit corner cases with ties in cost and probability.
+
+    Parameters
+    ----------
+    a_list_p : list
+        List of probabilities.
+    a_list_cost : list
+        List of costs.
+    """
     action_ids = ["a1", "a2", "a3"]
 
     p = dict(zip(action_ids, a_list_p))
     actions_cost = dict(zip(action_ids, a_list_cost))
     actions_cost_proba = [(a_cost, -a_proba, a_id) for a_id, a_cost, a_proba in zip(action_ids, a_list_cost, a_list_p)]
 
-    actions = {
-        "a1": BetaCC(cost=actions_cost["a1"]),
-        "a2": BetaCC(cost=actions_cost["a2"]),
-        "a3": BetaCC(cost=actions_cost["a3"]),
-    }
+    actions = {aid: BetaCC(cost=actions_cost[aid]) for aid in action_ids}
 
     c = CostControlBandit(subsidy_factor=1)
-    # if cost factor is 1 return:
-    # - the action with the min cost, or
-    # - the highest probability in case of cost equality, or
-    # - the lowest action id (alphabetically) in case of equal cost and probability
+    # if subsidy_factor is 1 => return the action with min cost (and highest prob if tied)
     assert sorted(actions_cost_proba)[0][-1] == c.select_action(p=p, actions=actions)
 
-    # if cost factor is 0:
+    # if subsidy_factor is 0:
     mutated_c = c.with_subsidy_factor(subsidy_factor=0)
-    # get the keys of the max p.quantities() (there might be more max_p_values)
+    # get the keys of the max p.values() (there might be more max_p_values)
     max_p_values = [k for k, v in p.items() if v == max(p.values())]
 
-    # if cost factor is 0 and only 1 max_value => return the action with highest p (classic bandit)
-    # e.g. p={"a1": 0.5, "a2": 0.2} => return always "a1"
+    # if subsidy_factor is 0 and only 1 max_value => return the action with highest p
     if len(max_p_values) == 1:
         assert max(p, key=p.get) == mutated_c.select_action(p=p, actions=actions)
-
-    # if cost factor is 0 and only 1+ max_values => return the action with highest p and min cost
-    # e.g. p={"a1": 0.5, "a2": 0.5} and cost={"a1": 20, "a2": 10} => return always "a2"
+    # if subsidy_factor is 0 and 1+ max_values => return the one with min cost
     else:
         actions_cost_max = {k: actions_cost[k] for k in max_p_values}
         assert min(actions_cost_max, key=actions_cost_max.get) == mutated_c.select_action(p=p, actions=actions)
 
 
+def test_cost_control_get_prerequisites(prob_a1: float = 0.5, prob_a2: float = 0.8, prob_a3: float = 0.3):
+    """Test CostControlBandit get_prerequisites method.
+
+    Parameters
+    ----------
+    prob_a1 : float
+        Probability for action a1.
+    prob_a2 : float
+        Probability for action a2 (expected to be highest).
+    prob_a3 : float
+        Probability for action a3.
+    """
+    c = CostControlBandit()
+
+    p = {"a1": prob_a1, "a2": prob_a2, "a3": prob_a3}
+    actions = {aid: BetaCC(cost=DEFAULT_COST) for aid in p.keys()}
+
+    prerequisites = c.get_prerequisites(p, actions, None)
+
+    assert "best_value" in prerequisites
+    assert prerequisites["best_value"] == prob_a2  # Highest probability
+
+
+@pytest.mark.parametrize(
+    "score, best_value, expected_result",
+    [
+        (0.6, 1.0, True),
+        (0.5, 1.0, True),
+        (0.4, 1.0, False),
+    ],
+)
+def test_cost_control_verify_action(
+    score: float, best_value: float, expected_result: bool, subsidy_factor: float = DEFAULT_SUBSIDY_FACTOR
+):
+    """Test CostControlBandit _verify_action method.
+
+    Parameters
+    ----------
+    score : float
+        Score to verify.
+    best_value : float
+        Best value for comparison.
+    expected_result : bool
+        Expected verification result.
+    subsidy_factor : float
+        Subsidy factor for the bandit.
+    """
+    c = CostControlBandit(subsidy_factor=subsidy_factor)
+    assert c._verify_action(score, best_value=best_value) is expected_result
+
+
+@patch("pybandits.utils.maximize_by_quantity")
+def test_cost_control_quantitative_action(
+    mock_maximize: MagicMock,
+    return_value: np.ndarray = np.array([0.3, 0.7]),
+    subsidy_factor: float = 0.5,
+    dimension: int = 2,
+    cost_multiplier: float = 10.0,
+    best_value: float = 0.8,
+):
+    """Test CostControlBandit with quantitative actions.
+
+    Parameters
+    ----------
+    mock_maximize : MagicMock
+        Mock for maximize_by_quantity function.
+    return_value : np.ndarray
+        Return value for mock maximize function.
+    subsidy_factor : float
+        Subsidy factor for cost control.
+    dimension : int
+        Dimension of the quantitative model.
+    cost_multiplier : float
+        Multiplier for cost calculation.
+    best_value : float
+        Best value for verification.
+    """
+    mock_maximize.return_value = return_value
+
+    c = CostControlBandit(subsidy_factor=subsidy_factor)
+
+    model = create_mock_quantitative_model(dimension=dimension)
+    model.cost = MagicMock(side_effect=lambda x: np.sum(x) * cost_multiplier)
+
+    result = c._verify_and_select_from_quantitative_action(sum, model, None, best_value=best_value)
+
+    # Check if mock was used, otherwise handle actual optimization result
+    if mock_maximize.called:
+        assert result is not None, "Optimization should return a result"
+        assert np.allclose(result, return_value, atol=1e-6)
+        mock_maximize.assert_called_once()
+        # Check that cost control constraint was added
+        call_args = mock_maximize.call_args
+        constraint_list = call_args[0][2] if len(call_args[0]) > 2 else call_args[1].get("constraint_list")
+        assert constraint_list is not None, "Cost control constraint should be added"
+    else:
+        # Mock wasn't used - actual optimization may fail due to constraints
+        # Accept None if constraints can't be satisfied, or verify result if successful
+        if result is not None:
+            assert isinstance(result, np.ndarray)
+            assert len(result) == dimension
+
+
 ########################################################################################################################
+# MultiObjectiveStrategy tests
+
+
+class ConcreteMultiObjectiveStrategy(MultiObjectiveStrategy):
+    """Concrete implementation of MultiObjectiveStrategy for testing."""
+
+    objective_selector_class = ClassicBandit
 
+    def _get_feasible_solutions(
+        self, p: Dict[ActionId, List[float]], actions: Dict[ActionId, BaseModel]
+    ) -> Dict[UnifiedActionId, List[float]]:
+        """Return all solutions as feasible."""
+        return p
 
+
+def test_multi_objective_strategy_abstract():
+    """Test that MultiObjectiveStrategy cannot be instantiated directly."""
+    with pytest.raises(AttributeError):
+        MultiObjectiveStrategy()
+
+
+def test_multi_objective_strategy_initialization():
+    """Test MultiObjectiveStrategy initialization."""
+    strategy = ConcreteMultiObjectiveStrategy()
+    assert hasattr(strategy, "_objective_selector")
+    assert isinstance(strategy._objective_selector, ClassicBandit)
+
+
+########################################################################################################################
 # MultiObjectiveBandit
 
 
 def test_can_init_multiobjective():
-    MultiObjectiveBandit()
+    """Test MultiObjectiveBandit initialization."""
+    m = MultiObjectiveBandit()
+    assert isinstance(m, MultiObjectiveStrategy)
+    assert m.objective_selector_class == ClassicBandit
 
 
 @given(
     st.dictionaries(
         st.text(min_size=1, alphabet=st.characters(blacklist_categories=("Cc", "Cs"))),
-        st.lists(st.floats(min_value=0, max_value=1), min_size=3, max_size=3),
-        min_size=3,
+        st.lists(st.floats(min_value=0, max_value=1, allow_nan=False, allow_infinity=False), min_size=2, max_size=3),
+        min_size=2,
     )
 )
 def test_select_action_mo(p: Dict[ActionId, List[Probability]]):
+    """Test MultiObjectiveBandit selects from Pareto front.
+
+    Parameters
+    ----------
+    p : Dict[ActionId, List[Probability]]
+        Dictionary of actions and their multi-objective probabilities.
+    """
+    # Ensure all actions have same number of objectives
+    n_objectives = len(list(p.values())[0])
+    p = {k: v for k, v in p.items() if len(v) == n_objectives}
+
+    if not p:
+        return  # Skip if no valid actions
+
+    actions = {aid: BetaMOCC(models=[Beta() for _ in range(n_objectives)], cost=DEFAULT_COST) for aid in p.keys()}
+
     m = MultiObjectiveBandit()
-    assert m.select_action(p=p) in m.get_pareto_front(p=p)
-
-
-def test_pareto_front():
-    # works in 2D
-    #
-    #    +
-    # .3 |     X
-    #    |
-    # .2 |          X
-    #    |
-    # .1 |      X       X
-    #    |
-    #  0 | X            X
-    #    +-----------------+
-    #      0   .1  .2  .3
-
-    p2d = {
-        "a0": [0.1, 0.3],
-        "a1": [0.1, 0.3],
-        "a2": [0.0, 0.0],
-        "a3": [0.1, 0.1],
-        "a4": [0.3, 0.1],
-        "a5": [0.2, 0.2],
-        "a6": [0.3, 0.0],
-        "a7": [0.1, 0.1],
-    }
+    selected = m.select_action(p=p, actions=actions)
+    pareto_front = m._get_pareto_front(p, actions)
+
+    assert selected in pareto_front
+
+
+@pytest.mark.parametrize(
+    "p_dict,expected_front",
+    [
+        # 2D case 1: Clear Pareto front
+        (
+            {
+                "a0": [0.1, 0.3],
+                "a1": [0.1, 0.3],
+                "a2": [0.0, 0.0],
+                "a3": [0.1, 0.1],
+                "a4": [0.3, 0.1],
+                "a5": [0.2, 0.2],
+                "a6": [0.3, 0.0],
+                "a7": [0.1, 0.1],
+            },
+            ["a0", "a1", "a4", "a5"],
+        ),
+        # 2D case 2: Duplicate optimal points
+        (
+            {
+                "a0": [0.1, 0.1],
+                "a1": [0.3, 0.3],
+                "a2": [0.3, 0.3],
+            },
+            ["a1", "a2"],
+        ),
+        # 3D case
+        (
+            {
+                "a0": [0.1, 0.3, 0.1],
+                "a1": [0.1, 0.3, 0.1],
+                "a2": [0.0, 0.0, 0.1],
+                "a3": [0.1, 0.1, 0.1],
+                "a4": [0.3, 0.1, 0.1],
+                "a5": [0.2, 0.2, 0.1],
+                "a6": [0.3, 0.0, 0.1],
+                "a7": [0.1, 0.1, 0.3],
+            },
+            ["a0", "a1", "a4", "a5", "a7"],
+        ),
+    ],
+)
+def test_exact_pareto_front(p_dict: Dict[str, List[float]], expected_front: List[str]):
+    """Test exact Pareto front computation.
+
+    Parameters
+    ----------
+    p_dict : Dict[str, List[float]]
+        Dictionary of actions and their multi-objective values.
+    expected_front : List[str]
+        Expected Pareto front actions.
+    """
+    n_objectives = len(list(p_dict.values())[0])
+    actions = {aid: BetaMOCC(models=[Beta() for _ in range(n_objectives)], cost=DEFAULT_COST) for aid in p_dict.keys()}
+
+    m = MultiObjectiveBandit()
+    pareto_front = m._get_exact_pareto_front(p_dict, actions)
+
+    assert sorted(pareto_front) == sorted(expected_front)
+
+
+def test_approximate_pareto_front(
+    fixed_prob: float = 0.4,
+    func2_coeff: float = 0.5,
+    func2_offset: float = 0.3,
+    dimension: int = 1,
+    n_divisions: int = 5,
+    mock_solution1: float = 0.5,
+    mock_solution2: float = 0.8,
+):
+    """Test approximate Pareto front computation for quantitative actions.
+
+    Parameters
+    ----------
+    fixed_prob : float
+        Fixed probability value for discrete action.
+    func2_coeff : float
+        Coefficient for func2 calculation.
+    func2_offset : float
+        Offset for func2 calculation.
+    dimension : int
+        Dimension of quantitative models.
+    n_divisions : int
+        Number of divisions for Pareto front approximation.
+    mock_solution1 : float
+        First mock solution value.
+    mock_solution2 : float
+        Second mock solution value.
+    """
+    m = MultiObjectiveBandit()
+
+    # Create mock quantitative actions
+    def func1(x: np.ndarray) -> List[float]:
+        return [x[0], 1 - x[0]]  # Trade-off between objectives
 
-    assert MultiObjectiveStrategy.get_pareto_front(p2d) == ["a0", "a1", "a4", "a5"]
+    def func2(x: np.ndarray) -> List[float]:
+        return [func2_coeff * x[0], func2_coeff * (1 - x[0]) + func2_offset]  # Different trade-off
 
-    p2d = {
-        "a0": [0.1, 0.1],
-        "a1": [0.3, 0.3],
-        "a2": [0.3, 0.3],
+    p = {
+        "a1": func1,
+        "a2": func2,
+        "a3": [fixed_prob, fixed_prob],  # Fixed action
     }
 
-    assert MultiObjectiveStrategy.get_pareto_front(p2d) == ["a1", "a2"]
-
-    # works in 3D
-    p3d = {
-        "a0": [0.1, 0.3, 0.1],
-        "a1": [0.1, 0.3, 0.1],
-        "a2": [0.0, 0.0, 0.1],
-        "a3": [0.1, 0.1, 0.1],
-        "a4": [0.3, 0.1, 0.1],
-        "a5": [0.2, 0.2, 0.1],
-        "a6": [0.3, 0.0, 0.1],
-        "a7": [0.1, 0.1, 0.3],
+    actions = {
+        "a1": create_mock_quantitative_model(dimension=dimension),
+        "a2": create_mock_quantitative_model(dimension=dimension),
+        "a3": BetaMOCC(models=[Beta(), Beta()], cost=DEFAULT_COST),
     }
 
-    assert MultiObjectiveStrategy.get_pareto_front(p3d) == ["a0", "a1", "a4", "a5", "a7"]
+    # Mock the models attribute for quantitative models
+    actions["a1"].models = [Beta(), Beta()]
+    actions["a2"].models = [Beta(), Beta()]
+
+    # Patch on the class where it's used (strategy module) to avoid Pydantic model restrictions
+    with patch("pybandits.strategy.MultiObjectiveStrategy._find_pareto_front_normal_constraint") as mock_nc:
+        mock_nc.side_effect = lambda *args, **kwargs: [
+            np.array([mock_solution1]),
+            np.array([mock_solution2]),
+        ]
+
+        pareto_front = m._get_approximate_pareto_front(p, actions, n_divisions=n_divisions)
+
+        # Should have been called for quantitative actions
+        assert mock_nc.call_count == 2
+        # If a3 is not in pareto front, it might have been dominated - check that we at least have some results
+        assert len(pareto_front)
+        # If a3 exists and is not dominated, it should be in the front
+        # For now, just verify we have results from the quantitative actions
+        assert any(isinstance(item, tuple) for item in pareto_front)
+
+
+@pytest.mark.parametrize(
+    "n_objectives,n_divisions",
+    [
+        (2, 5),
+        (3, 3),
+        (4, 2),
+    ],
+)
+def test_das_dennis_weights(n_objectives: int, n_divisions: int):
+    """Test Das-Dennis weight generation.
+
+    Parameters
+    ----------
+    n_objectives : int
+        Number of objectives.
+    n_divisions : int
+        Number of divisions for weight generation.
+    """
+    weights = MultiObjectiveStrategy._das_dennis_weights(n_objectives, n_divisions)
+
+    # Check all weights sum to 1
+    for w in weights:
+        assert np.isclose(np.sum(w), 1.0)
+
+    # Check all weights are non-negative
+    assert np.all(weights >= 0)
+
+    # Check dimensionality
+    assert weights.shape[1] == n_objectives
+
+    # Check approximate number of weights (combinatorial formula)
+    from math import comb
+
+    expected_count = comb(n_divisions + n_objectives - 1, n_objectives - 1)
+    assert len(weights) == expected_count
+
+
+def test_find_pareto_front_normal_constraint(
+    return_value: np.ndarray = np.array([0.5]),
+    dimension: int = 1,
+    n_objectives: int = 2,
+    n_divisions: int = 3,
+    best_obj1: float = 1.0,
+    best_obj2: float = 0.0,
+):
+    """Test Normal Constraint method for Pareto front finding.
+
+    Parameters
+    ----------
+    return_value : np.ndarray
+        Return value for mock solve function.
+    dimension : int
+        Dimension of the quantitative model.
+    n_objectives : int
+        Number of objectives.
+    n_divisions : int
+        Number of divisions for weight generation.
+    best_obj1 : float
+        Best value for objective 1.
+    best_obj2 : float
+        Best value for objective 2.
+    """
+    m = MultiObjectiveBandit()
 
+    # Simple 2-objective function with known Pareto front
+    def test_func(x: np.ndarray) -> List[float]:
+        return [x[0], 1 - x[0]]  # Linear trade-off
 
-########################################################################################################################
+    model = create_mock_quantitative_model(dimension=dimension)
+    model.models = [Beta() for _ in range(n_objectives)]
 
+    with patch(
+        "pybandits.strategy.ClassicBandit.verify_and_select_from_quantitative_action",
+        side_effect=[
+            np.array([best_obj1]),  # Best for objective 1
+            np.array([best_obj2]),  # Best for objective 2
+        ],
+    ) as mock_verify:
+        # Add mock for NC subproblem solving
+        with patch("pybandits.strategy.MultiObjectiveStrategy._solve_nc_subproblem") as mock_solve:
+            mock_solve.return_value = return_value
 
-# MultiObjectiveCostControlBandit
+            solutions = m._find_pareto_front_normal_constraint(test_func, dimension, n_objectives, n_divisions, model)
 
+            assert len(solutions) > 0
+            assert mock_verify.call_count == n_objectives  # Called for each objective
+            assert mock_solve.call_count > 0  # Called for each weight vector
 
-def test_can_init_multiobjective_mo_cc():
-    MultiObjectiveCostControlBandit()
 
+########################################################################################################################
+# MultiObjectiveCostControlBandit
 
-def test_select_action_mo_cc():
+
+def test_can_init_multiobjective_mo_cc():
+    """Test MultiObjectiveCostControlBandit initialization."""
+    m = MultiObjectiveCostControlBandit()
+    assert isinstance(m, MultiObjectiveStrategy)
+    assert isinstance(m, CostControlStrategy)
+    assert m.objective_selector_class == CostControlBandit
+    assert m.subsidy_factor == 0.5
+
+
+@pytest.mark.parametrize(
+    "test_case",
+    [
+        # Case 1: Different costs, clear Pareto front
+        {
+            "actions_costs": {"a1": 8, "a2": 2, "a3": 5, "a4": 1, "a5": 7},
+            "probabilities": {
+                "a1": [0.1, 0.3, 0.5],
+                "a2": [0.1, 0.3, 0.5],
+                "a3": [0.0, 0.4, 0.4],
+                "a4": [0.5, 0.3, 0.7],
+                "a5": [0.6, 0.1, 0.5],
+            },
+            "expected_pareto": ["a3", "a4", "a5"],
+            "expected_selection": "a5",  # Min cost in Pareto front
+        },
+        # Case 2: Equal costs, select by probability
+        {
+            "actions_costs": {"a1": 2, "a2": 2, "a3": 5},
+            "probabilities": {
+                "a1": [0.6, 0.1],
+                "a2": [0.5, 0.8],
+                "a3": [0.0, 0.1],
+            },
+            "expected_pareto": ["a1", "a2"],
+        },
+    ],
+)
+def test_mo_cc_selection_logic(test_case: dict):
+    """Test MultiObjectiveCostControlBandit selection logic.
+
+    Parameters
+    ----------
+    test_case : dict
+        Test case with actions, probabilities, and expected results.
+    """
     m = MultiObjectiveCostControlBandit()
 
+    n_objectives = len(list(test_case["probabilities"].values())[0])
     actions = {
-        "a1": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=8),
-        "a2": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2),
-        "a3": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=5),
-        "a4": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=1),
-        "a5": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=7),
+        aid: BetaMOCC(models=[Beta() for _ in range(n_objectives)], cost=cost)
+        for aid, cost in test_case["actions_costs"].items()
     }
+
+    p = test_case["probabilities"]
+
+    # Test Pareto front computation
+    pareto_front = m._get_pareto_front(p, actions)
+    assert sorted(pareto_front) == sorted(test_case["expected_pareto"])
+
+    # Test action selection
+    selected = m.select_action(p=p, actions=actions)
+    # Verify selected action is in expected Pareto front
+    assert selected in test_case["expected_pareto"]
+
+
+def test_mo_cc_get_feasible_solutions(subsidy_factor: float = 0.5, fixed_prob_value: float = 0.5):
+    """Test MultiObjectiveCostControlBandit _get_feasible_solutions method.
+
+    Parameters
+    ----------
+    subsidy_factor : float
+        Subsidy factor for the bandit.
+    fixed_prob_value : float
+        Fixed probability value for action a2.
+    """
+    m = MultiObjectiveCostControlBandit(subsidy_factor=subsidy_factor)
+
+    # Create test data with quantitative actions
     p = {
-        "a1": [0.1, 0.3, 0.5],
-        "a2": [0.1, 0.3, 0.5],
-        "a3": [0.0, 0.4, 0.4],
-        "a4": [0.5, 0.3, 0.7],
-        "a5": [0.6, 0.1, 0.5],
+        "a1": lambda x: [x[0], 1 - x[0]],
+        "a2": lambda x: [fixed_prob_value, fixed_prob_value],
     }
-    # within the pareto front ("a3", "a4", "a5") select the action with min cost ("a4")
-    assert m.get_pareto_front(p) == ["a3", "a4", "a5"]
-    assert m.select_action(p=p, actions=actions) == "a4"
+
+    model1 = create_mock_quantitative_model()
+    model1.models = [Beta(), Beta()]
+    model2 = create_mock_quantitative_model()
+    model2.models = [Beta(), Beta()]
 
     actions = {
-        "a1": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2),
-        "a2": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=2),
-        "a3": BetaMOCC(models=[Beta(), Beta(), Beta()], cost=5),
+        "a1": model1,
+        "a2": model2,
+        "a3": BetaMOCC(models=[Beta(), Beta()], cost=DEFAULT_COST),  # Discrete action model
     }
-    p = {
-        "a1": [0.6, 0.1, 0.1],
-        "a2": [0.5, 0.8, 0.8],
-        "a3": [0.0, 0.1, 0.9],
-    }
-    # within the actions with the min cost ("a1" or "a2") select the action the highest mean of probabilities ("a2")
-    assert m.get_pareto_front(p) == ["a1", "a2", "a3"]
-    assert m.select_action(p=p, actions=actions) == "a2"
+
+    # Mock the objective selector's refine_p method
+    # Patch on the class where it's used (strategy module) to avoid Pydantic model restrictions
+    with patch("pybandits.strategy.CostControlBandit.refine_p", return_value={("a1", (0.5,)): 0.5}) as mock_refine:
+        m._get_feasible_solutions(p, actions)
+        # Verify the method was called (it will be called per objective)
+        # Should be called for each objective (2 objectives in this test)
+        assert mock_refine.call_count == 2, f"Expected 2 calls but got {mock_refine.call_count}"
+
+
+########################################################################################################################
+# Integration tests
+
+
+@pytest.mark.parametrize(
+    "strategy",
+    [
+        ClassicBandit(),
+        BestActionIdentificationBandit(exploit_p=DEFAULT_EXPLOIT_P),
+        CostControlBandit(subsidy_factor=DEFAULT_SUBSIDY_FACTOR),
+    ],
+    ids=["Classic", "BAI", "CC"],
+)
+@given(
+    action_data=action_probability_pairs(min_actions=2, max_actions=4, allow_callables=True),
+    mock_return_value=st.just(np.array([0.5, 0.5])),
+)
+def test_strategy_integration(
+    strategy: BaseStrategy,
+    action_data: Tuple,
+    mock_return_value: np.ndarray,
+):
+    """Integration test for strategies with mixed action types.
+
+    Parameters
+    ----------
+    strategy : BaseStrategy
+        Strategy instance to test.
+    action_data : tuple
+        Generated action IDs, probabilities, and models.
+    mock_return_value : np.ndarray
+        Return value for mock maximize function.
+    """
+    action_ids, probabilities, models = action_data
+
+    # Patch maximize_by_quantity in both utils and strategy modules to ensure all calls are mocked
+    with (
+        patch("pybandits.utils.maximize_by_quantity") as mock_maximize_utils,
+        patch("pybandits.strategy.maximize_by_quantity") as mock_maximize_strategy,
+    ):
+        mock_maximize_utils.return_value = mock_return_value
+        mock_maximize_strategy.return_value = mock_return_value
+
+        result = strategy.select_action(probabilities, models)
+        assert result is not None
+
+        # Check result is valid
+        if isinstance(result, tuple):
+            assert result[0] in action_ids
+            assert isinstance(result[1], (tuple, np.ndarray))
+        else:
+            assert result in action_ids
+
+
+@pytest.mark.parametrize(
+    "strategy_class,kwargs",
+    [
+        (ClassicBandit, {}),
+        (BestActionIdentificationBandit, {"exploit_p": 0.8}),
+        (CostControlBandit, {"subsidy_factor": 0.2}),
+        (MultiObjectiveBandit, {}),
+        (MultiObjectiveCostControlBandit, {"subsidy_factor": 0.6}),
+    ],
+)
+def test_strategy_normalize_field(strategy_class, kwargs):
+    """Test field normalization for all strategies.
+
+    Parameters
+    ----------
+    strategy_class : type
+        Strategy class to test.
+    kwargs : dict
+        Initialization arguments.
+    """
+    strategy_class(**kwargs)
+
+    # Test normalize_field method if it's a SingleObjectiveStrategy
+    if issubclass(strategy_class, SingleObjectiveStrategy):
+        # Test with None value
+        if "exploit_p" in strategy_class.model_fields:
+            result = strategy_class._normalize_field(None, "exploit_p")
+            assert result == strategy_class.model_fields["exploit_p"].default
+
+        if "subsidy_factor" in strategy_class.model_fields:
+            result = strategy_class._normalize_field(None, "subsidy_factor")
+            assert result == strategy_class.model_fields["subsidy_factor"].default
+
+        # Test with actual value
+        result = strategy_class._normalize_field(
+            0.7, "subsidy_factor" if "subsidy_factor" in strategy_class.model_fields else "exploit_p"
+        )
+        assert result == 0.7
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 4d53413..5cacde9 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -4,17 +4,22 @@
 import sys
 from abc import ABC, abstractmethod
 from types import ModuleType
+from typing import Callable, List, Optional
 from unittest.mock import MagicMock, patch
 
+import numpy as np
 import pytest
 from bokeh.models import Div, InlineStyleSheet, TabPanel, Tabs
 
 import pybandits
+from pybandits.base import Probability
 from pybandits.utils import (
+    OptimizationFailedError,
     classproperty,
     extract_argument_names_from_function,
     get_non_abstract_classes,
     in_jupyter_notebook,
+    maximize_by_quantity,
     visualize_via_bokeh,
 )
 
@@ -328,3 +333,411 @@ class SubClass(TestClass):
 
         assert TestClass.class_attr == "original"
         assert SubClass.class_attr == "subclass"
+
+
+class TestMaximizeByQuantity:
+    """Test cases for maximize_by_quantity function."""
+
+    @pytest.fixture
+    def default_n_trials(self) -> int:
+        """Default number of trials for optimization."""
+        return 1000
+
+    @pytest.fixture
+    def default_dimension(self) -> int:
+        """Default dimension for test cases."""
+        return 2
+
+    @pytest.mark.parametrize(
+        "quantity_score_func,dimension,expected_shape",
+        [
+            # Simple quadratic function - maximum at (0.5, 0.5)
+            (lambda x: (1.0 - np.sum((x - 0.5) ** 2)), 2, (2,)),
+            # Linear function - maximum at (1.0, 1.0)
+            (lambda x: (np.sum(x)), 2, (2,)),
+            # Single variable function
+            (lambda x: (1.0 - (x[0] - 0.7) ** 2), 1, (1,)),
+            # Three variable function
+            (lambda x: (1.0 - np.sum((x - 0.3) ** 2)), 3, (3,)),
+        ],
+    )
+    def test_maximize_by_quantity_without_constraints(
+        self,
+        quantity_score_func: Callable[[np.ndarray], float],
+        dimension: int,
+        expected_shape: tuple,
+        default_n_trials: int,
+    ) -> None:
+        """Test maximize_by_quantity without constraints."""
+        result = maximize_by_quantity(quantity_score_func, dimension, n_trials=default_n_trials)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == expected_shape
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+        # Verify that the result actually maximizes the function
+        result_score = quantity_score_func(result)
+        assert result_score > 0.0  # Should be positive for valid result
+
+    @pytest.mark.parametrize(
+        "quantity_score_func,dimension,constraints",
+        [
+            # Constraint: sum of elements <= 0.5
+            (
+                lambda x: (np.sum(x)),
+                2,
+                [lambda x: np.sum(x) <= 0.5],
+            ),
+            # Constraint: first element <= 0.3
+            (
+                lambda x: (x[0] + x[1]),
+                2,
+                [lambda x: x[0] <= 0.3],
+            ),
+            # Multiple constraints: sum <= 0.6 and first element >= 0.2
+            (
+                lambda x: (np.sum(x)),
+                2,
+                [lambda x: np.sum(x) <= 0.6, lambda x: x[0] >= 0.2],
+            ),
+            # Constraint: product of elements <= 0.1
+            (
+                lambda x: (np.prod(x)),
+                2,
+                [lambda x: np.prod(x) <= 0.1],
+            ),
+        ],
+        ids=["sum <= 0.5", "first element <= 0.3", "sum <= 0.6 and first element >= 0.2", "product <= 0.1"],
+    )
+    def test_maximize_by_quantity_with_constraints(
+        self,
+        quantity_score_func: Callable[[np.ndarray], float],
+        dimension: int,
+        constraints: List[Callable[[np.ndarray], bool]],
+        default_n_trials: int,
+    ) -> None:
+        """Test maximize_by_quantity with constraints."""
+        result = maximize_by_quantity(quantity_score_func, dimension, constraints, n_trials=default_n_trials)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (dimension,)
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+        # Verify constraints are satisfied
+        for constraint in constraints:
+            assert constraint(result)
+
+    @pytest.mark.parametrize(
+        "n_trials",
+        [100, 1000, 5000],
+    )
+    def test_maximize_by_quantity_different_trial_counts(
+        self, n_trials: int, default_dimension: int, center: float = 0.5
+    ) -> None:
+        """Test maximize_by_quantity with different trial counts."""
+
+        def quantity_score_func(x) -> Probability:
+            return Probability(1.0 - np.sum((x - center) ** 2))
+
+        result = maximize_by_quantity(quantity_score_func, default_dimension, n_trials=n_trials)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (default_dimension,)
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+    def test_maximize_by_quantity_single_element_array(self, dimension: int = 1, center: float = 0.7) -> None:
+        """Test maximize_by_quantity with single element array."""
+
+        def quantity_score_func(x) -> Probability:
+            return Probability(1.0 - (x[0] - center) ** 2)
+
+        result = maximize_by_quantity(quantity_score_func, dimension)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (1,)
+        assert 0.0 <= result[0] <= 1.0
+
+    def test_maximize_by_quantity_large_array(
+        self, default_n_trials: int, dimension: int = 5, center: float = 0.3
+    ) -> None:
+        """Test maximize_by_quantity with larger array."""
+
+        def quantity_score_func(x) -> Probability:
+            return Probability(1.0 - np.sum((x - center) ** 2))
+
+        result = maximize_by_quantity(quantity_score_func, dimension, n_trials=default_n_trials)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (dimension,)
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+    def test_maximize_by_quantity_constraint_violation_handling(
+        self,
+        default_n_trials: int,
+        bound: float = 0.3,
+        epsilon: float = 1e-6,
+        dimension: int = 2,
+    ) -> None:
+        """Test that constraints are properly handled when they might be violated."""
+
+        # This test uses a function that would naturally maximize at (1, 1)
+        # but we constrain it to stay within a smaller region
+        def quantity_score_func(x) -> Probability:
+            return Probability(x[0] + x[1])
+
+        constraints = [lambda x: np.sum(x) <= bound]  # Force sum to be small
+
+        result = maximize_by_quantity(quantity_score_func, dimension, constraints, n_trials=default_n_trials)
+        if result is not None:
+            assert isinstance(result, np.ndarray)
+            assert result.shape == (dimension,)
+            assert np.all(result >= 0.0)
+            assert np.all(result <= 1.0)
+            assert np.sum(result) <= bound + epsilon  # Allow small numerical error
+
+    @pytest.mark.parametrize(
+        "constraints",
+        [
+            None,
+            [],
+            [lambda x: True],  # Always satisfied constraint
+            [lambda x: x[0] >= 0.0],  # Trivial constraint
+        ],
+    )
+    def test_maximize_by_quantity_various_constraint_inputs(
+        self,
+        constraints: Optional[List[Callable[[np.ndarray], bool]]],
+        default_dimension: int,
+        center: float = 0.5,
+    ) -> None:
+        """Test maximize_by_quantity with various constraint inputs."""
+
+        def quantity_score_func(x) -> Probability:
+            return Probability(1.0 - np.sum((x - center) ** 2))
+
+        result = maximize_by_quantity(quantity_score_func, default_dimension, constraints)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (default_dimension,)
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+    def test_maximize_by_quantity_caching_behavior(
+        self, default_dimension: int, default_n_trials: int, center: float = 0.5
+    ) -> None:
+        """Test that the function uses caching correctly."""
+
+        def quantity_score_func(x) -> Probability:
+            return Probability(1.0 - np.sum((x - center) ** 2))
+
+        # First call
+        result1 = maximize_by_quantity(quantity_score_func, default_dimension, n_trials=default_n_trials)
+
+        # Second call with same parameters should use cache
+        result2 = maximize_by_quantity(quantity_score_func, default_dimension, n_trials=default_n_trials)
+
+        # Results should be identical due to caching
+        np.testing.assert_array_almost_equal(result1, result2)
+
+    @pytest.mark.parametrize("dimension", [1, 2, 3, 4])
+    def test_maximize_by_quantity_different_dimensions(
+        self, dimension: int, default_n_trials: int, center: float = 0.5
+    ) -> None:
+        """Test maximize_by_quantity with different dimensions."""
+
+        def quantity_score_func(x) -> Probability:
+            return Probability(1.0 - np.sum((x - center) ** 2))
+
+        result = maximize_by_quantity(quantity_score_func, dimension, n_trials=default_n_trials)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (dimension,)
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+    @patch("pybandits.utils.differential_evolution")
+    def test_maximize_by_quantity_optimization_failure(
+        self, mock_de: MagicMock, default_dimension: int, center: float = 0.5
+    ) -> None:
+        """Test that OptimizationFailedError is raised when optimization fails."""
+        # Mock differential_evolution to return unsuccessful result
+        mock_result = MagicMock()
+        mock_result.success = False
+        mock_result.message = "Optimization failed"
+        mock_de.return_value = mock_result
+
+        def quantity_score_func(x) -> Probability:
+            return Probability(1.0 - np.sum((x - center) ** 2))
+
+        with pytest.raises(OptimizationFailedError, match="Optimization failed"):
+            maximize_by_quantity(quantity_score_func, default_dimension)
+
+    def test_maximize_by_quantity_complex_constraints(
+        self,
+        default_n_trials: int,
+        upper_bound: float = 0.8,
+        lower_bound: float = 0.2,
+        dimension: int = 2,
+    ) -> None:
+        """Test maximize_by_quantity with complex constraint scenarios."""
+
+        # Function that wants to maximize at (1, 1) but we constrain it
+        def quantity_score_func(x) -> Probability:
+            return Probability(x[0] * x[1])
+
+        # Complex constraints: x[0] + x[1] <= upper_bound and x[0] >= lower_bound and x[1] >= lower_bound
+        constraints = [
+            lambda x: x[0] + x[1] <= upper_bound,
+            lambda x: x[0] >= lower_bound,
+            lambda x: x[1] >= lower_bound,
+        ]
+
+        result = maximize_by_quantity(quantity_score_func, dimension, constraints, n_trials=default_n_trials)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (dimension,)
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+        # Verify all constraints are satisfied
+        assert result[0] + result[1] <= upper_bound + 1e-6
+        assert result[0] >= lower_bound - 1e-6
+        assert result[1] >= lower_bound - 1e-6
+
+    def test_maximize_by_quantity_returns_maximum_probability(
+        self, default_n_trials: int, dimension: int = 1, return_value: float = 0.8
+    ) -> None:
+        """Test that maximize_by_quantity returns maximum probability value from optimization."""
+
+        def prob_func(x: np.ndarray) -> float:
+            return return_value
+
+        result = maximize_by_quantity(prob_func, dimension, n_trials=default_n_trials)
+
+        # The function should return the optimal point that maximizes the function
+        # Since prob_func always returns return_value, any point in [0,1] should work
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (dimension,)
+        assert 0.0 <= result[0] <= 1.0
+
+    @pytest.mark.parametrize("dimension", [1, 2, 3])
+    def test_maximize_by_quantity_samples_points_in_valid_range(
+        self, default_n_trials: int, dimension: int, return_value: float = 0.5
+    ) -> None:
+        """Test that maximize_by_quantity correctly samples points from [0,1] range."""
+
+        def prob_func(x: np.ndarray) -> float:
+            # Verify all inputs are in valid range
+            assert all(0 <= xi <= 1 for xi in x)
+            return return_value
+
+        result = maximize_by_quantity(prob_func, dimension, n_trials=default_n_trials)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (dimension,)
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+    def test_maximize_by_quantity_maximization_result(
+        self,
+        default_n_trials: int,
+        dimension: int = 1,
+        atol: float = 1e-2,
+        expected_min: float = 0.0,
+        expected_max: float = 1.0,
+    ) -> None:
+        """Test that maximize_by_quantity correctly maximizes simple functions."""
+        # Test function with maximum at x=0: 1 - x^2
+        result1 = maximize_by_quantity(lambda x: 1 - x[0] ** 2, dimension, n_trials=default_n_trials)
+        assert np.isclose(result1[0], expected_min, atol=atol)
+
+        # Test function with maximum at x=1: x^2
+        result2 = maximize_by_quantity(lambda x: x[0] ** 2, dimension, n_trials=default_n_trials)
+        assert np.isclose(result2[0], expected_max, atol=atol)
+
+    @patch("pybandits.utils.differential_evolution")
+    def test_maximize_by_quantity_uses_differential_evolution(
+        self,
+        mock_de: MagicMock,
+        default_n_trials: int,
+        dimension: int = 1,
+        return_value: float = 0.5,
+        test_input_value: float = 0.5,
+        lower_bound: float = 0.0,
+        upper_bound: float = 1.0,
+    ) -> None:
+        """Test that maximize_by_quantity uses differential_evolution optimization."""
+        mock_result = MagicMock()
+        mock_result.success = True
+        mock_result.x = np.array([test_input_value])
+        mock_de.return_value = mock_result
+
+        def prob_func(x: np.ndarray) -> float:
+            return return_value
+
+        maximize_by_quantity(prob_func, dimension, n_trials=default_n_trials)
+
+        # Verify differential_evolution was called
+        mock_de.assert_called_once()
+        call_args = mock_de.call_args
+
+        # Check that bounds are set correctly
+        assert "bounds" in call_args.kwargs
+        bounds = call_args.kwargs["bounds"]
+        assert bounds == [(lower_bound, upper_bound)]  # Single dimension bounds
+
+        # Check that function is negated (for maximization)
+        func = call_args.kwargs["func"]
+        test_input = np.array([test_input_value])
+        assert func(test_input) == -return_value  # Should be negated
+
+    def test_maximize_by_quantity_probability_function_exceptions(
+        self, default_n_trials: int, dimension: int = 1, error_message: str = "Function failed"
+    ) -> None:
+        """Test that exceptions from probability function are properly propagated."""
+
+        def failing_prob_func(x: np.ndarray) -> float:
+            raise RuntimeError(error_message)
+
+        with pytest.raises(RuntimeError, match=error_message):
+            maximize_by_quantity(failing_prob_func, dimension, n_trials=default_n_trials)
+
+    def test_maximize_by_quantity_large_input_dimension(
+        self, default_n_trials: int, dimension: int = 30, return_value: float = 0.5
+    ) -> None:
+        """Test maximize_by_quantity with large input dimension."""
+
+        def prob_func(x: np.ndarray) -> float:
+            return return_value
+
+        result = maximize_by_quantity(prob_func, dimension, n_trials=default_n_trials)
+
+        assert isinstance(result, np.ndarray)
+        assert result.shape == (dimension,)
+        assert np.all(result >= 0.0)
+        assert np.all(result <= 1.0)
+
+    @patch("pybandits.utils.differential_evolution")
+    def test_maximize_by_quantity_optimization_convergence_failure(
+        self,
+        mock_de: MagicMock,
+        default_n_trials: int,
+        dimension: int = 1,
+        return_value: float = 0.5,
+        error_message: str = "Optimization failed to converge",
+    ) -> None:
+        """Test that OptimizationFailedError is raised when optimization fails to converge."""
+        mock_result = MagicMock()
+        mock_result.success = False
+        mock_result.message = error_message
+        mock_de.return_value = mock_result
+
+        def prob_func(x: np.ndarray) -> float:
+            return return_value
+
+        with pytest.raises(OptimizationFailedError, match=error_message):
+            maximize_by_quantity(prob_func, dimension, n_trials=default_n_trials)
diff --git a/tests/utils.py b/tests/utils.py
index e5a08a4..f3b8095 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -7,13 +7,15 @@
 import numpy as np
 from bokeh.core.serialization import Serializable
 
-from pybandits.base import PyBanditsBaseModel
+from pybandits.base import PyBanditsBaseModel, UnifiedActionId
+from pybandits.base_model import BaseModel
 from pybandits.model import BaseBayesianNeuralNetwork, UpdateMethods
 from pybandits.pydantic_version_compatibility import (
     Optional,
     PositiveInt,
     PrivateAttr,
 )
+from pybandits.quantitative_model import QuantitativeModel
 
 literal_update_methods = get_args(UpdateMethods)
 
@@ -45,6 +47,17 @@ def to_temporary_pickle(model: PyBanditsBaseModel):
         pickle.dump(model, file)
 
 
+def to_unified_action_id(action_id: str, model: BaseModel) -> UnifiedActionId:
+    if isinstance(model, QuantitativeModel):
+        return (action_id, (np.random.random(),))
+    else:
+        return action_id
+
+
+def mock_update(self, *args, **kwargs):
+    pass
+
+
 class FakeApproximation(PyBanditsBaseModel):
     n_draws: PositiveInt = 10
     n_features: PositiveInt