@@ -10,13 +10,13 @@ the structures will be clean mono-elemental surfaces which we can generate via
1010``` py
1111>> > # Generate the clean surfaces
1212>> > from autocat.surface import generate_surface_structures
13- >> > from autocat.utils import extract_structures
13+ >> > from autocat.utils import flatten_structures_dict
1414>> > surfs_dict = generate_surface_structures(
1515... [" Pt" , " Cu" , " Li" , " Ti" ],
1616... n_fixed_layers = 2 ,
1717... default_lat_param_lib = " pbe_fd"
1818... )
19- >> > surfs = extract_structures (surfs_dict)
19+ >> > surfs = flatten_structures_dict (surfs_dict)
2020```
2121
2222In this case we specified that the default lattice parameters
@@ -30,14 +30,24 @@ to your design space!
3030``` py
3131>> > # Generate the labels for each structure
3232>> > import numpy as np
33- >> > labels = np.random.uniform(- 1.5 ,1.5 ,size = len (ads_structs ))
33+ >> > labels = np.random.uniform(- 1.5 ,1.5 ,size = len (surfs ))
3434```
3535
3636Taking the structures and labels we can define our ` DesignSpace ` .
3737
3838``` py
3939>> > from autocat.learning.sequential import DesignSpace
4040>> > design_space = DesignSpace(surfs, labels)
41+ >> > design_space
42+ + ------------------------ -+ -------------------------- +
43+ | | DesignSpace |
44+ + ------------------------ -+ -------------------------- +
45+ | total # of systems | 10 |
46+ | # of unlabelled systems | 0 |
47+ | unique species present | [' Pt' , ' Cu' , ' Li' , ' Ti' ] |
48+ | maximum label | 1.1205404366846423 |
49+ | minimum label | - 1.3259701029215702 |
50+ + ------------------------ -+ -------------------------- +
4151```
4252
4353## Doing a single simulated sequential learning run
@@ -51,35 +61,117 @@ returned at the end of the run.
5161As before, we will need to make choices with regard to the ` Predictor ` settings.
5262In this case we will use a ` SineMatrix ` featurizer alongside a ` GaussianProcessRegressor ` .
5363
64+ ``` py
65+ >> > from sklearn.gaussian_process import GaussianProcessRegressor
66+ >> > from sklearn.gaussian_process.kernels import RBF
67+ >> > from dscribe.descriptors.sinematrix import SineMatrix
68+ >> > from autocat.learning.featurizers import Featurizer
69+ >> > from autocat.learning.predictors import Predictor
70+ >> > kernel = RBF(1.5 )
71+ >> > regressor = GaussianProcessRegressor(kernel = kernel)
72+ >> > featurizer = Featurizer(
73+ ... featurizer_class = SineMatrix,
74+ ... design_space_structures = design_space.design_space_structures
75+ ... )
76+ >> > predictor = Predictor(regressor = regressor, featurizer = featurizer)
77+ >> > predictor
78+ + ---------- -+ ------------------------------------------------------------------ +
79+ | | Predictor |
80+ + ---------- -+ ------------------------------------------------------------------ +
81+ | regressor | < class ' sklearn.gaussian_process._gpr.GaussianProcessRegressor' > |
82+ | is fit? | False |
83+ + ---------- -+ ------------------------------------------------------------------ +
84+ + ---------------------------------- -+ ------------------------------------------ -+
85+ | | Featurizer |
86+ + ---------------------------------- -+ ------------------------------------------ -+
87+ | class | dscribe.descriptors.sinematrix.SineMatrix |
88+ | kwargs | None |
89+ | species list | [' Li' , ' Ti' , ' Pt' , ' Cu' ] |
90+ | maximum structure size | 36 |
91+ | preset | None |
92+ | design space structures provided? | True |
93+ + ---------------------------------- -+ ------------------------------------------ -+
94+ ```
95+
5496We also need to select parameters with regard to candidate selection.
5597This includes the acquisition function to be used,
5698target window (if applicable), and number of candidates to pick at each iteration.
99+ This can be done via the ` CandidateSelector ` object.
57100Let's use a maximum uncertainty acquisition function to pick candidates based on their
58- associated uncertainty values. We'll also restrict the run to conduct 5 iterations.
101+ associated uncertainty values.
102+
103+ ``` py
104+ >> > from autocat.learning.sequential import CandidateSelector
105+ >> > candidate_selector = CandidateSelector(
106+ ... acquisition_function = " MU" ,
107+ ... num_candidates_to_pick = 1
108+ ... )
109+ >> > candidate_selector
110+ + ------------------------------ -+ -------------------- +
111+ | | Candidate Selector |
112+ + ------------------------------ -+ -------------------- +
113+ | acquisition function | MU |
114+ | # of candidates to pick | 1 |
115+ | target window | None |
116+ | include hhi? | False |
117+ | include segregation energies? | False |
118+ + ------------------------------ -+ -------------------- +
119+ ```
120+
121+ Now we have everything we need to conduct a simulated sequential learning loop.
122+ We'll restrict the run to conduct 5 iterations.
59123
60124``` py
61- >> > from sklearn.gaussian_process import GaussianProcessRegressor
62- >> > from dscribe import SineMatrix
63125>> > from autocat.learning.sequential import simulated_sequential_learning
64- >> > kernel = RBF(1.5 )
65- >> > model_kwargs = {" kernel" : kernel}
66- >> > featurization_kwargs = {
67- ... " design_space_structures" : design_space.design_space_structures,
68- ... }
69- >> > predictor_kwargs = {
70- ... " model_class" : GaussianProcessRegressor,
71- ... " model_kwargs" : model_kwargs,
72- ... " featurizer_class" : SineMatrix,
73- ... " featurization_kwargs" : featurization_kwargs
74- ... }
75- >> > candidate_selection_kwargs = {" aq" : " MU" }
76126>> > sim_seq_learn = simulated_sequential_learning(
77127... full_design_space = design_space,
128+ ... candidate_selector = candidate_selector,
129+ ... predictor = predictor,
78130... init_training_size = 1 ,
79131... number_of_sl_loops = 5 ,
80- ... candidate_selection_kwargs = candidate_selection_kwargs,
81- ... predictor_kwargs = predictor_kwargs,
82132... )
133+ >> > sim_seq_learn
134+ + ---------------------------------- + -------------------- +
135+ | | Sequential Learner |
136+ + ---------------------------------- + -------------------- +
137+ | iteration count | 6 |
138+ | next candidate system structures | [' Cu36' ] |
139+ | next candidate system indices | [5 ] |
140+ + ---------------------------------- + -------------------- +
141+ + ------------------------------ -+ -------------------- +
142+ | | Candidate Selector |
143+ + ------------------------------ -+ -------------------- +
144+ | acquisition function | MU |
145+ | # of candidates to pick | 1 |
146+ | target window | None |
147+ | include hhi? | False |
148+ | include segregation energies? | False |
149+ + ------------------------------ -+ -------------------- +
150+ + ------------------------ -+ -------------------------- +
151+ | | DesignSpace |
152+ + ------------------------ -+ -------------------------- +
153+ | total # of systems | 10 |
154+ | # of unlabelled systems | 4 |
155+ | unique species present | [' Pt' , ' Cu' , ' Li' , ' Ti' ] |
156+ | maximum label | 0.9712050050259604 |
157+ | minimum label | - 1.3259701029215702 |
158+ + ------------------------ -+ -------------------------- +
159+ + ---------- -+ ------------------------------------------------------------------ +
160+ | | Predictor |
161+ + ---------- -+ ------------------------------------------------------------------ +
162+ | regressor | < class ' sklearn.gaussian_process._gpr.GaussianProcessRegressor' > |
163+ | is fit? | True |
164+ + ---------- -+ ------------------------------------------------------------------ +
165+ + ---------------------------------- -+ ------------------------------------------ -+
166+ | | Featurizer |
167+ + ---------------------------------- -+ ------------------------------------------ -+
168+ | class | dscribe.descriptors.sinematrix.SineMatrix |
169+ | kwargs | None |
170+ | species list | [' Li' , ' Ti' , ' Pt' , ' Cu' ] |
171+ | maximum structure size | 36 |
172+ | preset | None |
173+ | design space structures provided? | True |
174+ + ---------------------------------- -+ ------------------------------------------ -+
83175```
84176
85177Within the returned ` SequentialLearner ` object we now have information we can use
@@ -97,12 +189,13 @@ of running in parallel (since this is an embarrassingly parallel operation). Her
97189three independent runs in serial.
98190
99191``` py
192+ >> > from autocat.learning.sequential import multiple_simulated_sequential_learning_runs
100193>> > runs_history = multiple_simulated_sequential_learning_runs(
101194... full_design_space = design_space,
195+ ... candidate_selector = candidate_selector,
196+ ... predictor = predictor,
102197... init_training_size = 1 ,
103198... number_of_sl_loops = 5 ,
104- ... candidate_selection_kwargs = candidate_selection_kwargs,
105- ... predictor_kwargs = predictor_kwargs,
106199... number_of_runs = 3 ,
107200... # number_of_parallel_jobs=N if you wanted to run in parallel
108201... )
0 commit comments