Skip to content

Commit 048dfc5

Browse files
committed
release v0.5.0
1 parent bc4f435 commit 048dfc5

File tree

28 files changed

+3289
-175
lines changed

28 files changed

+3289
-175
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
VERSION ?= 0.5.0.dev1
1+
VERSION ?= 0.5.0
22
SHELL := /bin/bash
33

44
.PHONY: releasehere

anaconda_build/meta.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
package:
22
name: openprotein-python
3-
version: "0.5.0.dev1"
3+
version: "0.5.0"
44

55
source:
66
path: ../

openprotein/api/embedding.py

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,15 @@
66
from openprotein.api.align import csv_stream
77
from openprotein.base import APISession
88
from openprotein.errors import InvalidParameterError
9-
from openprotein.schemas import AttnJob, EmbeddingsJob, Job, LogitsJob, ModelMetadata
9+
from openprotein.schemas import (
10+
AttnJob,
11+
EmbeddingsJob,
12+
GenerateJob,
13+
LogitsJob,
14+
ModelMetadata,
15+
ScoreJob,
16+
ScoreSingleSiteJob,
17+
)
1018
from pydantic import TypeAdapter
1119

1220
PATH_PREFIX = "v1/embeddings"
@@ -256,7 +264,7 @@ def request_score_post(
256264
model_id: str,
257265
sequences: list[bytes] | list[str],
258266
prompt_id: str | None = None,
259-
) -> Job:
267+
) -> ScoreJob:
260268
"""
261269
POST a request for sequence scoring for the given model ID. \
262270
Returns a Job object referring to this request \
@@ -284,15 +292,15 @@ def request_score_post(
284292
if prompt_id is not None:
285293
body["prompt_id"] = prompt_id
286294
response = session.post(endpoint, json=body)
287-
return Job.model_validate(response.json())
295+
return ScoreJob.model_validate(response.json())
288296

289297

290298
def request_score_single_site_post(
291299
session: APISession,
292300
model_id: str,
293-
base_sequence: bytes,
301+
base_sequence: bytes | str,
294302
prompt_id: str | None = None,
295-
) -> Job:
303+
) -> ScoreSingleSiteJob:
296304
"""
297305
POST a request for single site mutation scoring for the given model ID. \
298306
Returns a Job object referring to this request \
@@ -314,12 +322,16 @@ def request_score_single_site_post(
314322
endpoint = PATH_PREFIX + f"/models/{model_id}/score_single_site"
315323

316324
body: dict = {
317-
"base_sequence": base_sequence.decode(),
325+
"base_sequence": (
326+
base_sequence.decode()
327+
if isinstance(base_sequence, bytes)
328+
else base_sequence
329+
),
318330
}
319331
if prompt_id is not None:
320332
body["prompt_id"] = prompt_id
321333
response = session.post(endpoint, json=body)
322-
return Job.model_validate(response.json())
334+
return ScoreSingleSiteJob.model_validate(response.json())
323335

324336

325337
def request_generate_post(
@@ -332,7 +344,7 @@ def request_generate_post(
332344
max_length: int = 1000,
333345
random_seed: int | None = None,
334346
prompt_id: str | None = None,
335-
) -> Job:
347+
) -> GenerateJob:
336348
"""
337349
POST a request for sequence generation for the given model ID. \
338350
Returns a Job object referring to this request \
@@ -364,7 +376,7 @@ def request_generate_post(
364376
random_seed = random.randrange(2**32)
365377

366378
body: dict = {
367-
"generate_n": num_samples,
379+
"n_sequences": num_samples,
368380
"temperature": temperature,
369381
"maxlen": max_length,
370382
}
@@ -377,4 +389,4 @@ def request_generate_post(
377389
if prompt_id is not None:
378390
body["prompt_id"] = prompt_id
379391
response = session.post(endpoint, json=body)
380-
return Job.model_validate(response.json())
392+
return GenerateJob.model_validate(response.json())

openprotein/api/fold.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from openprotein.api.embedding import ModelMetadata
22
from openprotein.base import APISession
3-
from openprotein.schemas import Job
3+
from openprotein.schemas import FoldJob
44
from pydantic import TypeAdapter
55

66
PATH_PREFIX = "v1/fold"
@@ -80,7 +80,7 @@ def fold_models_esmfold_post(
8080
session: APISession,
8181
sequences: list[bytes],
8282
num_recycles: int | None = None,
83-
) -> Job:
83+
) -> FoldJob:
8484
"""
8585
POST a request for structure prediction using ESMFold. Returns a Job object referring to this request
8686
that can be used to retrieve results later.
@@ -108,7 +108,7 @@ def fold_models_esmfold_post(
108108
body["num_recycles"] = num_recycles
109109

110110
response = session.post(endpoint, json=body)
111-
return Job.model_validate(response.json())
111+
return FoldJob.model_validate(response.json())
112112

113113

114114
def fold_models_alphafold2_post(
@@ -117,7 +117,7 @@ def fold_models_alphafold2_post(
117117
num_recycles: int | None = None,
118118
num_models: int = 1,
119119
num_relax: int = 0,
120-
) -> Job:
120+
) -> FoldJob:
121121
"""
122122
POST a request for structure prediction using AlphaFold2. Returns a Job object referring to this request
123123
that can be used to retrieve results later.
@@ -152,4 +152,4 @@ def fold_models_alphafold2_post(
152152
response = session.post(endpoint, json=body)
153153
# GET endpoint for AF2 expects the query sequence (first sequence) within the MSA
154154
# since we don't know what the is, leave the sequence out of the future to be retrieved when calling get()
155-
return Job.model_validate(response.json())
155+
return FoldJob.model_validate(response.json())

openprotein/api/predictor.py

Lines changed: 74 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,16 @@
33
import numpy as np
44
import pandas as pd
55
from openprotein.base import APISession
6-
from openprotein.schemas import Job, PredictorMetadata
6+
from openprotein.schemas import (
7+
CVJob,
8+
Job,
9+
PredictJob,
10+
PredictMultiJob,
11+
PredictMultiSingleSiteJob,
12+
PredictorMetadata,
13+
PredictSingleSiteJob,
14+
TrainJob,
15+
)
716
from pydantic import TypeAdapter
817

918
PATH_PREFIX = "v1/predictor"
@@ -99,13 +108,33 @@ def predictor_fit_gp_post(
99108
body["description"] = description
100109

101110
response = session.post(endpoint, json=body)
102-
return Job.model_validate(response.json())
111+
return TrainJob.model_validate(response.json())
103112

104113

105114
def predictor_delete(session: APISession, predictor_id: str):
106115
raise NotImplementedError()
107116

108117

118+
def predictor_crossvalidate_post(
119+
session: APISession, predictor_id: str, n_splits: int | None = None
120+
):
121+
endpoint = PATH_PREFIX + f"/{predictor_id}/crossvalidate"
122+
123+
params = {}
124+
if n_splits is not None:
125+
params["n_splits"] = n_splits
126+
response = session.post(endpoint, params=params)
127+
128+
return CVJob.model_validate(response.json())
129+
130+
131+
def predictor_crossvalidate_get(session: APISession, crossvalidate_job_id: str):
132+
endpoint = PATH_PREFIX + f"/crossvalidate/{crossvalidate_job_id}"
133+
134+
response = session.get(endpoint)
135+
return response.content
136+
137+
109138
def predictor_predict_post(
110139
session: APISession, predictor_id: str, sequences: list[bytes] | list[str]
111140
):
@@ -117,7 +146,25 @@ def predictor_predict_post(
117146
}
118147
response = session.post(endpoint, json=body)
119148

120-
return Job.model_validate(response.json())
149+
return PredictJob.model_validate(response.json())
150+
151+
152+
def predictor_predict_single_site_post(
153+
session: APISession,
154+
predictor_id: str,
155+
base_sequence: bytes | str,
156+
):
157+
endpoint = PATH_PREFIX + f"/{predictor_id}/predict_single_site"
158+
159+
base_sequence = (
160+
base_sequence.decode() if isinstance(base_sequence, bytes) else base_sequence
161+
)
162+
body = {
163+
"base_sequence": base_sequence,
164+
}
165+
response = session.post(endpoint, json=body)
166+
167+
return PredictSingleSiteJob.model_validate(response.json())
121168

122169

123170
def predictor_predict_get_sequences(
@@ -179,9 +226,9 @@ def predictor_predict_get_batched_result(
179226
return response.content
180227

181228

182-
def decode_score(data: bytes, batched: bool = False) -> tuple[np.ndarray, np.ndarray]:
229+
def decode_predict(data: bytes, batched: bool = False) -> tuple[np.ndarray, np.ndarray]:
183230
"""
184-
Decode embedding.
231+
Decode prediction scores.
185232
186233
Args:
187234
data (bytes): raw bytes encoding the array received over the API
@@ -203,3 +250,25 @@ def decode_score(data: bytes, batched: bool = False) -> tuple[np.ndarray, np.nda
203250
mus = scores[:, ::2]
204251
vars = scores[:, 1::2]
205252
return mus, vars
253+
254+
255+
def decode_crossvalidate(data: bytes) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
256+
"""
257+
Decode crossvalidate scores.
258+
259+
Args:
260+
data (bytes): raw bytes encoding the array received over the API
261+
262+
Returns:
263+
mus (np.ndarray): decoded array of means
264+
vars (np.ndarray): decoded array of variances
265+
"""
266+
s = io.BytesIO(data)
267+
# should contain header and sequence column
268+
df = pd.read_csv(s)
269+
scores = df.values
270+
# row_num, seq, measurement_name, y, y_mu, y_var
271+
y = scores[:, 3]
272+
mus = scores[:, 4]
273+
vars = scores[:, 5]
274+
return y, mus, vars

openprotein/api/svd.py

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
import io
2+
3+
import numpy as np
14
from openprotein.base import APISession
25
from openprotein.errors import InvalidParameterError
3-
from openprotein.schemas import FitJob, Job, SVDEmbeddingsJob, SVDMetadata
6+
from openprotein.schemas import FitJob, SVDEmbeddingsJob, SVDMetadata
47
from pydantic import TypeAdapter
58

69
PATH_PREFIX = "v1/embeddings/svd"
@@ -40,6 +43,46 @@ def svd_get_sequences(session: APISession, svd_id: str) -> list[bytes]:
4043
return TypeAdapter(list[bytes]).validate_python(response.json())
4144

4245

46+
def embed_get_sequence_result(
47+
session: APISession, job_id: str, sequence: str | bytes
48+
) -> bytes:
49+
"""
50+
Get encoded svd embeddings result for a sequence from the request ID.
51+
52+
Parameters
53+
----------
54+
session : APISession
55+
Session object for API communication.
56+
job_id : str
57+
job ID to retrieve results from
58+
sequence : bytes
59+
sequence to retrieve results for
60+
61+
Returns
62+
-------
63+
result : bytes
64+
"""
65+
if isinstance(sequence, bytes):
66+
sequence = sequence.decode()
67+
endpoint = PATH_PREFIX + f"/embed/{job_id}/{sequence}"
68+
response = session.get(endpoint)
69+
return response.content
70+
71+
72+
def embed_decode(data: bytes) -> np.ndarray:
73+
"""
74+
Decode embedding.
75+
76+
Args:
77+
data (bytes): raw bytes encoding the array received over the API
78+
79+
Returns:
80+
np.ndarray: decoded array
81+
"""
82+
s = io.BytesIO(data)
83+
return np.load(s, allow_pickle=False)
84+
85+
4386
def svd_delete(session: APISession, svd_id: str):
4487
"""
4588
Delete and SVD model.
@@ -121,7 +164,7 @@ def svd_fit_post(
121164

122165

123166
def svd_embed_post(
124-
session: APISession, svd_id: str, sequences: list[bytes]
167+
session: APISession, svd_id: str, sequences: list[bytes] | list[str]
125168
) -> SVDEmbeddingsJob:
126169
"""
127170
POST a request for embeddings from the given SVD model.
@@ -139,7 +182,7 @@ def svd_embed_post(
139182
-------
140183
Job
141184
"""
142-
endpoint = PATH_PREFIX + f"/svd/{svd_id}/embed"
185+
endpoint = PATH_PREFIX + f"/{svd_id}/embed"
143186

144187
sequences_unicode = [(s if isinstance(s, str) else s.decode()) for s in sequences]
145188
body = {

openprotein/app/models/align/msa.py

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -54,14 +54,7 @@ def __init__(
5454
"""
5555
super().__init__(session, job)
5656
self.page_size = page_size
57-
self._msa_id = None
58-
self._prompt_id = None
59-
60-
@property
61-
def msa_id(self) -> str:
62-
if self._msa_id is None:
63-
self._msa_id = self.job.job_id
64-
return self._msa_id
57+
self.msa_id = self.job.job_id
6558

6659
# def wait(self, verbose: bool = False):
6760
# _ = self.job.wait(

openprotein/app/models/align/prompt.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,7 @@ def __init__(
5959
if msa_id is None:
6060
msa_id = job_api.job_args_get(self.session, job.job_id).get("root_msa")
6161
self._msa_id = msa_id
62-
63-
@property
64-
def prompt_id(self) -> str:
65-
if self._prompt_id is None:
66-
self._prompt_id = self.job.job_id
67-
return self._prompt_id
62+
self.prompt_id = self.job.job_id
6863

6964
# def wait(self, verbose: bool = False, **kwargs) -> Iterator[list[str]]:
7065
# _ = self.job.wait(

openprotein/app/models/design.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from openprotein.api import design
22
from openprotein.base import APISession
3-
from openprotein.schemas import DesignJob, DesignResults
3+
from openprotein.schemas import DesignJob, DesignResults, DesignStep
44

55
from .futures import Future, PagedFuture
66

@@ -20,8 +20,14 @@ def __str__(self) -> str:
2020
def __repr__(self) -> str:
2121
return repr(self.job)
2222

23-
def _fmt_results(self, results: DesignResults) -> list[dict]:
24-
return [i.model_dump() for i in results.result]
23+
def _fmt_results(
24+
self, results: DesignResults
25+
) -> (
26+
# list[dict]
27+
list[DesignStep]
28+
):
29+
# return [i.model_dump() for i in results.result]
30+
return results.result
2531

2632
@property
2733
def id(self):

0 commit comments

Comments
 (0)