11from typing import Dict , List , Optional , cast
22
33import pandas as pd
4+ import torch
45from pydantic .types import PositiveInt
56from typing_extensions import Self
67
1314 DoEOptimalityCriterion ,
1415)
1516from bofire .strategies .doe .design import find_local_max_ipopt , get_n_experiments
16- from bofire .strategies .doe .objective import get_objective_function
17+ from bofire .strategies .doe .objective import ModelBasedObjective , get_objective_function
1718from bofire .strategies .doe .utils import get_formula_from_string , n_zero_eigvals
1819from bofire .strategies .doe .utils_categorical_discrete import (
1920 create_continuous_domain ,
21+ encode_candidates_to_relaxed_domain ,
2022 filter_out_categorical_and_categorical_auxilliary_vars ,
2123 filter_out_discrete_auxilliary_vars ,
2224 project_candidates_into_domain ,
@@ -45,31 +47,35 @@ def __init__(
4547 if self ._data_model .sampling is not None
4648 else None
4749 )
48- self ._return_fixed_candidates = data_model .return_fixed_candidates
50+ self ._return_fixed_candidates = (
51+ data_model .return_fixed_candidates
52+ ) # this defaults to False in the data model
53+ # DoE optimization has larger numerical errors (~1e-4) due to SCIP solver precision
54+ self ._validation_tol = 1e-4
4955
5056 def set_candidates (self , candidates : pd .DataFrame ):
5157 original_columns = self .domain .inputs .get_keys (includes = Input )
52- to_many_columns = []
58+ too_many_columns = []
5359 for col in candidates .columns :
5460 if col not in original_columns :
55- to_many_columns .append (col )
56- if len (to_many_columns ) > 0 :
61+ too_many_columns .append (col )
62+ if len (too_many_columns ) > 0 :
5763 raise AttributeError (
58- f"provided candidates have columns: { (* to_many_columns ,)} , which do not exist in original domain" ,
64+ f"provided candidates have columns: { (* too_many_columns ,)} , which do not exist in original domain" ,
5965 )
6066
61- to_few_columns = []
67+ too_few_columns = []
6268 for col in original_columns :
6369 if col not in candidates .columns :
64- to_few_columns .append (col )
65- if len (to_few_columns ) > 0 :
70+ too_few_columns .append (col )
71+ if len (too_few_columns ) > 0 :
6672 raise AttributeError (
67- f"provided candidates are missing columns: { (* to_few_columns ,)} which exist in original domain" ,
73+ f"provided candidates are missing columns: { (* too_few_columns ,)} which exist in original domain" ,
6874 )
6975
7076 self ._candidates = candidates
7177
72- def _ask (self , candidate_count : PositiveInt ) -> pd .DataFrame : # type: ignore
78+ def _ask (self , candidate_count : PositiveInt ) -> pd .DataFrame : # type: ignore # due to inheriting from Strategy, we then later call this using self.candidates
7379 (
7480 relaxed_domain ,
7581 mappings_categorical_var_key_to_aux_var_key_state_pairs ,
@@ -78,35 +84,41 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore
7884 mapped_aux_categorical_inputs ,
7985 mapped_continous_inputs ,
8086 ) = create_continuous_domain (domain = self .domain )
81- fixed_experiments_count = 0
82- _candidate_count = candidate_count
83- if self .candidates is not None :
84- adapted_partially_fixed_candidates = (
85- self ._transform_candidates_to_new_domain (
86- relaxed_domain ,
87- self .candidates ,
88- )
87+
88+ # if you have fixed experiments, so-called _candidates, you need to relaxe them and add them to the total number of experiments
89+ if self .candidates is not None : # aka if self._candidates is not None
90+ # transform candidates to new domain
91+ relaxed_candidates = self ._transform_candidates_to_new_domain (
92+ relaxed_domain ,
93+ self .candidates ,
8994 )
90- else :
91- adapted_partially_fixed_candidates = None
92- if self .candidates is not None :
9395 fixed_experiments_count = self .candidates .notnull ().all (axis = 1 ).sum ()
94- _candidate_count = candidate_count + fixed_experiments_count
96+ else :
97+ relaxed_candidates = None
98+ fixed_experiments_count = 0
99+
100+ # total number of experiments that will go into the design
101+ _total_count = candidate_count + fixed_experiments_count
102+
95103 objective_function = get_objective_function (
96104 self ._data_model .criterion ,
97105 domain = relaxed_domain ,
98- n_experiments = _candidate_count ,
106+ n_experiments = _total_count ,
99107 inputs_for_formula = self .domain .inputs ,
100108 )
101109 assert objective_function is not None , "Criterion type is not supported!"
110+
102111 design = find_local_max_ipopt (
103112 relaxed_domain ,
104- fixed_experiments = None ,
105- partially_fixed_experiments = adapted_partially_fixed_candidates ,
113+ fixed_experiments = None , # effectively deprecated, but others use it so we have not removed it yet
114+ partially_fixed_experiments = relaxed_candidates , # technically fixed experiments are also partially_fixed, so we only use this
106115 ipopt_options = self ._data_model .ipopt_options ,
107116 objective_function = objective_function ,
108117 )
118+
119+ # if cats or discrete var present, need to filture out all the aux vars and project back into original domain
109120 if len (self .domain .inputs .get ([DiscreteInput , CategoricalInput ])) > 0 :
121+ # deal with tthe categoricals first
110122 design_no_categoricals , design_categoricals = (
111123 filter_out_categorical_and_categorical_auxilliary_vars (
112124 design ,
@@ -139,7 +151,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore
139151 aux_vars_for_discrete = aux_vars_for_discrete ,
140152 )
141153 design = pd .concat ([design , design_categoricals ], axis = 1 )
142- if self ._return_fixed_candidates :
154+ if self ._return_fixed_candidates : # this is asking if the fixed candidates should be returned together with the new ones, or just the new ones. Default just the new ones.
143155 fixed_experiments_count = 0
144156 return design .iloc [fixed_experiments_count :, :].reset_index (
145157 drop = True ,
@@ -148,7 +160,7 @@ def _ask(self, candidate_count: PositiveInt) -> pd.DataFrame: # type: ignore
148160 def get_required_number_of_experiments (self ) -> Optional [int ]:
149161 if isinstance (self ._data_model .criterion , DoEOptimalityCriterion ):
150162 if self .domain .inputs .get ([DiscreteInput , CategoricalInput ]):
151- _domain , _ , _ , _ , _ , _ = create_continuous_domain (domain = self .domain )
163+ _domain , * _ = create_continuous_domain (domain = self .domain )
152164 else :
153165 _domain = self .domain
154166 formula = get_formula_from_string (
@@ -162,6 +174,85 @@ def get_required_number_of_experiments(self) -> Optional[int]:
162174 f"Only { AnyDoEOptimalityCriterion } type have required number of experiments."
163175 )
164176
177+ def get_candidate_rank (self ) -> int :
178+ """Get the rank of the model matrix with the current candidates."""
179+ if self .candidates is None :
180+ return 0
181+
182+ # Only works for DoEOptimalityCriterion (model-based criteria)
183+ if not isinstance (self ._data_model .criterion , DoEOptimalityCriterion ):
184+ raise ValueError (
185+ "get_candidate_rank() only works with DoEOptimalityCriterion"
186+ )
187+
188+ # Step 1: get_relaxed_domain(original_domain)
189+ (
190+ relaxed_domain ,
191+ mappings_categorical_var_key_to_aux_var_key_state_pairs ,
192+ mapping_discrete_input_to_discrete_aux ,
193+ aux_vars_for_discrete ,
194+ mapped_aux_categorical_inputs ,
195+ mapped_continous_inputs ,
196+ ) = create_continuous_domain (domain = self .domain )
197+
198+ # Step 2: Properly encode candidates to relaxed domain
199+ relaxed_candidates = encode_candidates_to_relaxed_domain (
200+ candidates = self .candidates ,
201+ mappings_categorical_var_key_to_aux_var_key_state_pairs = mappings_categorical_var_key_to_aux_var_key_state_pairs ,
202+ mapping_discrete_input_to_discrete_aux = mapping_discrete_input_to_discrete_aux ,
203+ domain = self .domain ,
204+ )
205+
206+ # Step 3: get_objective_function (combines model + objective)
207+ n_candidates = len (self .candidates )
208+ objective_function = get_objective_function (
209+ criterion = self ._data_model .criterion ,
210+ domain = relaxed_domain ,
211+ n_experiments = n_candidates ,
212+ inputs_for_formula = self .domain .inputs ,
213+ )
214+
215+ # Step 4 & 5: Combined tensor_to_model_matrix + rank calculation
216+ if isinstance (objective_function , ModelBasedObjective ):
217+ # Ensure we only use columns that match the relaxed domain inputs
218+ expected_columns = relaxed_domain .inputs .get_keys ()
219+ relaxed_candidates_clean = relaxed_candidates [expected_columns ]
220+
221+ # Convert to tensor
222+ candidates_tensor = torch .tensor (
223+ relaxed_candidates_clean .to_numpy (), dtype = torch .float64
224+ )
225+
226+ # Get candidate model matrix using objective
227+ candidates_model_matrix = objective_function .tensor_to_model_matrix (
228+ candidates_tensor
229+ )
230+
231+ model_matrix_rank = torch .linalg .matrix_rank (candidates_model_matrix ).item ()
232+
233+ return model_matrix_rank
234+
235+ else :
236+ raise ValueError (
237+ "Only ModelBasedObjective supports Fisher Information Matrix rank calculation"
238+ )
239+
240+ def get_additional_experiments_needed (self ) -> Optional [int ]:
241+ """Calculate the additional number of experiments needed beyond current candidates.
242+ This method computes: get_required_number_of_experiments() - get_candidate_rank()
243+
244+ Returns:
245+ Optional[int]: Number of additional experiments needed, or None if required number
246+ cannot be calculated (e.g., for SpaceFillingCriterion).
247+ """
248+ required_experiments = self .get_required_number_of_experiments ()
249+ if required_experiments is None :
250+ return None
251+
252+ candidate_rank = self .get_candidate_rank ()
253+ difference = required_experiments - candidate_rank
254+ return difference
255+
165256 def has_sufficient_experiments (
166257 self ,
167258 ) -> bool :
0 commit comments