Skip to content

Commit 806e5c9

Browse files
committed
Updates the documentation to the new API
1 parent 1b6e6ce commit 806e5c9

File tree

12 files changed

+160
-228
lines changed

12 files changed

+160
-228
lines changed

docs/protein-optimization/contributing/a_new_problem.md

Lines changed: 10 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -56,8 +56,8 @@ from your_local_dependency import ...
5656

5757

5858
class YourBlackBox(AbstractBlackBox):
59-
def __init__(self, L: int = np.inf):
60-
super().__init__(L=L)
59+
def __init__(self, info: ProblemSetupInformation, batch_size: int = None):
60+
super().__init__(info=info, batch_size=batch_size)
6161

6262
# The only method you have to define
6363
def _black_box(self, x: np.ndarray, context: dict = None) -> np.ndarray:
@@ -66,11 +66,8 @@ class YourBlackBox(AbstractBlackBox):
6666

6767
class YourProblemFactory(AbstractProblemFactory):
6868
def get_setup_information(self) -> ProblemSetupInformation:
69-
# The tokens of your alphabet
70-
alphabet_symbols = [...]
71-
72-
# The encoding
73-
alphabet = {symbol: i for i, symbol in enumerate(alphabet_symbols)}
69+
# Your alphabet
70+
alphabet = [...]
7471

7572
# A description of the problem
7673
# See more in the chapter about defining
@@ -92,12 +89,12 @@ class YourProblemFactory(AbstractProblemFactory):
9289
...
9390

9491
# The maximum length you defined above
95-
L = self.get_setup_information().get_max_sequence_length()
92+
problem_info = self.get_setup_information()
9693

9794
# Creating your black box function
98-
f = YourBlackBox(L=L)
95+
f = YourBlackBox(info=problem_info)
9996

100-
# Your first input (an np.array)
97+
# Your first input (an np.array[str])
10198
x0 = ...
10299

103100
return f, x0, f(x0)
@@ -145,6 +142,8 @@ dependencies:
145142
This environment will be created (if it doesn't exist yet), and will be used to run `register.py`.
146143

147144
:::{admonition} Why `conda`?
145+
:class: dropdown
146+
148147
Conda environments can be quite good! For example, the `super_mario_bros` environment contains a Java runtime. This is the `environment.yml` for said problem:
149148

150149
```yml
@@ -189,7 +188,7 @@ $ python -c "from poli.core.registry import get_problems; print(get_problems())"
189188
Your problem is not registered yet, so don't fret. You can check _if_ you can register it by running
190189

191190
```bash
192-
$ python -c "from poli.objective_repository import AVAILABLE_OBJECTIVES; print(AVAILABLE_OBJECTIVES)"
191+
$ python -c "from poli.core.registry import get_problems; print(get_problems(include_repository=True))"
193192
[..., "your_problem", ...] # If all goes well, you should see "your_problem" here.
194193
```
195194

docs/protein-optimization/index.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@ Solvent accessibility of mutations of a wildtype using `foldx`
7979
Rapid Stability Predictions of single mutations from a wildtype. [WIP]
8080
:::
8181

82+
:::{grid-item-card} RFP Fluorescence Protein Stability (using `lambo`)
83+
:link: ./using_poli/objective_repository/foldx_rfp_lambo.html
84+
:columns: 6
85+
LaMBO Fluorescence (RFP) by stability and solvent-accessible surface area.
86+
:::
87+
8288
::::
8389

8490

docs/protein-optimization/using_poli/objective_repository/aloha.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ from poli import objective_factory
2222
problem_info, f, x0, y0, run_info = objective_factory.create(name="aloha")
2323

2424
# Example input:
25-
x = np.array([["A", "L", "O", "O", "F"]]) # must be of shape [b, L], in this case [1, 3].
25+
x = np.array([["A", "L", "O", "O", "F"]]) # must be of shape [b, L], in this case [1, 5].
2626

2727
# Querying:
2828
print(f(x)) # Should be 3 (A, L, and the first O).

docs/protein-optimization/using_poli/objective_repository/foldx_sasa.md

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,13 @@ problem_info, f, x0, y0, run_info = objective_factory.create(
4646
wildtype_pdb_file=wildtype_pdb_file
4747
)
4848

49-
# Example input:
49+
# Example input: (an array of strings)
5050
print(x0)
5151

5252
# Querying:
5353
print(y0) # The stability of your wildtype
5454
```
5555

56-
You could also pass an `alphabet: Dict[str, int]` to the create method. By default, [we use this encoding](https://github.com/MachineLearningLifeScience/poli/blob/44cad2a5c95f209aeb24d4893d162b3359ca91a3/src/poli/core/util/proteins/defaults.py#L1).
57-
5856
:::
5957

6058
:::{tab-item} In isolation
@@ -94,4 +92,7 @@ Registering the objective function in this way will create a `conda` environment
9492

9593
:::
9694

97-
::::
95+
::::
96+
97+
You could also pass an `alphabet: List[str]` to the create method. By default, [we use this encoding](https://github.com/MachineLearningLifeScience/poli/blob/44cad2a5c95f209aeb24d4893d162b3359ca91a3/src/poli/core/util/proteins/defaults.py#L1).
98+

docs/protein-optimization/using_poli/objective_repository/foldx_stability.md

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,13 @@ problem_info, f, x0, y0, run_info = objective_factory.create(
4646
wildtype_pdb_file=wildtype_pdb_file
4747
)
4848

49-
# Example input:
49+
# Example input: (an array of strings)
5050
print(x0)
5151

5252
# Querying:
5353
print(y0) # The stability of your wildtype
5454
```
5555

56-
You could also pass an `alphabet: Dict[str, int]` to the create method. By default, [we use this encoding](https://github.com/MachineLearningLifeScience/poli/blob/44cad2a5c95f209aeb24d4893d162b3359ca91a3/src/poli/core/util/proteins/defaults.py#L1).
57-
5856
:::
5957

6058
:::{tab-item} In isolation
@@ -94,4 +92,6 @@ Registering the objective function in this way will create a `conda` environment
9492

9593
:::
9694

97-
::::
95+
::::
96+
97+
You could also pass an `alphabet: List[str]` to the create method. By default, [we use this encoding](https://github.com/MachineLearningLifeScience/poli/blob/44cad2a5c95f209aeb24d4893d162b3359ca91a3/src/poli/core/util/proteins/defaults.py#L1).

docs/protein-optimization/using_poli/objective_repository/rdkit_logp.md

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,11 @@
55

66
## About
77

8-
This objective function returns the Quantitative Estimate of Druglikeness (QED) using `RDKit`. During creation, you can specify whether you are measuring the QED of a SMILES string, or a SELFIES.
8+
This objective function returns the partition coefficient of a solute between octanol and water (known as logP) using `RDKit`. You can specify whether you are measuring the logP of a SMILES string, or a SELFIES.
99

1010
## Prerequisites
1111

12-
- An alphabet of tokens `{str: int}` as a json file. For example, in the case of SELFIES, this file would be
13-
```json
14-
# alphabet_selfies.json
15-
{
16-
"": 0, # an empty padding
17-
"[C]": 1,
18-
...
19-
}
20-
```
12+
- You will need to specify an alphabet `List[str]`.
2113

2214
## How to run
2315

@@ -36,24 +28,23 @@ pip install rdkit selfies
3628
Then run
3729

3830
```python
39-
from pathlib import Path
40-
4131
import numpy as np
4232

4333
from poli import objective_factory
4434

45-
# The path to your alphabet
46-
path_to_alphabet = Path("path/to/alphabet_selfies.json")
35+
# Your alphabet
36+
alphabet = ["", "[C]", ...]
4737

4838
# How to create
4939
problem_info, f, x0, y0, run_info = objective_factory.create(
5040
name="rdkit_logp",
51-
path_to_alphabet=path_to_alphabet,
52-
string_representation="SELFIES" # it is "SMILES" by default.
53-
)
41+
alphabet=alphabet,
42+
string_representation="SELFIES", # it is "SMILES" by default.
43+
force_register=True,
44+
)
5445

5546
# Example input: a single carbon
56-
x = np.array([[1]])
47+
x = np.array(["[C]"]).reshape(1, -1)
5748

5849
# Querying:
5950
print(f(x)) # Should be close to 0.6361
@@ -73,19 +64,19 @@ import numpy as np
7364
from poli import objective_factory
7465

7566

76-
# The path to your alphabet
77-
path_to_alphabet = Path("path/to/alphabet_selfies.json")
67+
# Your alphabet
68+
alphabet = ["", "[C]", ...]
7869

7970
# How to create
8071
problem_info, f, x0, y0, run_info = objective_factory.create(
8172
name="rdkit_logp",
82-
path_to_alphabet=path_to_alphabet,
83-
string_representation="SELFIES" # it is "SMILES" by default.
73+
alphabet=alphabet,
74+
string_representation="SELFIES", # it is "SMILES" by default.
8475
force_register=True,
8576
)
8677

8778
# Example input: a single carbon
88-
x = np.array([[1]])
79+
x = np.array(["[C]"]).reshape(1, -1)
8980

9081
# Querying:
9182
print(f(x)) # Should be close to 0.6361

docs/protein-optimization/using_poli/objective_repository/rdkit_qed.md

Lines changed: 21 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -5,19 +5,11 @@
55

66
## About
77

8-
This objective function returns the Quantitative Estimate of Druglikeness (QED) using `RDKit`. During creation, you can specify whether you are measuring the QED of a SMILES string, or a SELFIES.
8+
This objective function returns the Quantitative Estimate of Druglikeness (QED) using `RDKit`. You can specify whether you are measuring the QED of a SMILES string, or a SELFIES.
99

1010
## Prerequisites
1111

12-
- An alphabet of tokens `{str: int}` as a json file. For example, in the case of SELFIES, this file would be
13-
```json
14-
# alphabet_selfies.json
15-
{
16-
"": 0, # an empty padding
17-
"[C]": 1,
18-
...
19-
}
20-
```
12+
- You will need to specify an alphabet `List[str]`.
2113

2214
## How to run
2315

@@ -36,27 +28,27 @@ pip install rdkit selfies
3628
Then run
3729

3830
```python
39-
from pathlib import Path
40-
4131
import numpy as np
42-
4332
from poli import objective_factory
4433

45-
# The path to your alphabet
46-
path_to_alphabet = Path("path/to/alphabet_selfies.json")
34+
# Your alphabet
35+
alphabet = ["", "[C]", ...]
4736

4837
# How to create
4938
problem_info, f, x0, y0, run_info = objective_factory.create(
5039
name="rdkit_qed",
51-
path_to_alphabet=path_to_alphabet,
52-
string_representation="SELFIES" # it is "SMILES" by default.
53-
)
40+
alphabet=alphabet,
41+
string_representation="SELFIES", # it is "SMILES" by default.
42+
force_register=True,
43+
)
5444

5545
# Example input: a single carbon
56-
x = np.array([[1]])
46+
x = np.array(["[C]"]).reshape(1, -1)
5747

5848
# Querying:
59-
print(f(x)) # Should be close to 0.35978
49+
y = f(x)
50+
print(y) # Should be close to 0.35978494
51+
assert np.isclose(y, 0.35978494).all()
6052
```
6153

6254
:::
@@ -66,29 +58,27 @@ print(f(x)) # Should be close to 0.35978
6658
If you want us to handle dependencies, run
6759

6860
```python
69-
from pathlib import Path
70-
7161
import numpy as np
72-
7362
from poli import objective_factory
7463

75-
76-
# The path to your alphabet
77-
path_to_alphabet = Path("path/to/alphabet_selfies.json")
64+
# Your alphabet
65+
alphabet = ["", "[C]", ...]
7866

7967
# How to create
8068
problem_info, f, x0, y0, run_info = objective_factory.create(
8169
name="rdkit_qed",
82-
path_to_alphabet=path_to_alphabet,
83-
string_representation="SELFIES" # it is "SMILES" by default.
84-
force_register=True,
70+
alphabet=alphabet,
71+
string_representation="SELFIES", # it is "SMILES" by default.
72+
force_register=True,
8573
)
8674

8775
# Example input: a single carbon
88-
x = np.array([[1]])
76+
x = np.array(["[C]"]).reshape(1, -1)
8977

9078
# Querying:
91-
print(f(x)) # Should be close to 0.35978
79+
y = f(x)
80+
print(y) # Should be close to 0.35978494
81+
assert np.isclose(y, 0.35978494).all()
9282

9383
# Terminate the process.
9484
f.terminate()

0 commit comments

Comments
 (0)