Skip to content

Commit ba4c5cb

Browse files
committed
added extra readme updates
1 parent 6886db2 commit ba4c5cb

File tree

5 files changed

+838
-0
lines changed

5 files changed

+838
-0
lines changed

src/biothings_typed_client/chem.py

Lines changed: 301 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,301 @@
1+
from typing import Any, Dict, List, Optional, Union
2+
from pydantic import BaseModel, Field, ConfigDict
3+
import pandas as pd
4+
5+
from biothings_typed_client.abstract_client import AbstractClient, AbstractClientAsync
6+
7+
class PubChemInfo(BaseModel):
8+
"""PubChem information for a chemical compound.
9+
10+
This class represents detailed chemical information from PubChem, including structural properties,
11+
physical characteristics, and identifiers. The data is sourced from the PubChem database and
12+
provides comprehensive information about chemical compounds.
13+
14+
Key fields include:
15+
- Structural information (SMILES, InChI, molecular formula)
16+
- Physical properties (molecular weight, exact mass)
17+
- Chemical properties (hydrogen bond donors/acceptors, rotatable bonds)
18+
- Stereochemistry information (chiral centers, stereocenters)
19+
- Chemical identifiers (CID, InChIKey)
20+
21+
For more details on available fields and their meanings, see:
22+
https://docs.mychem.info/en/latest/doc/data.html#available-fields
23+
"""
24+
model_config = ConfigDict(extra='allow')
25+
26+
chiral_atom_count: Optional[int] = Field(default=None, description="Number of chiral atoms in the molecule")
27+
chiral_bond_count: Optional[int] = Field(default=None, description="Number of chiral bonds in the molecule")
28+
cid: Optional[Union[str, int]] = Field(default=None, description="PubChem Compound Identifier (CID)")
29+
complexity: Optional[float] = Field(default=None, description="Molecular complexity score (0-100)")
30+
covalently_bonded_unit_count: Optional[int] = Field(default=None, description="Number of covalently bonded units in the molecule")
31+
defined_atom_stereocenter_count: Optional[int] = Field(default=None, description="Number of defined atom stereocenters")
32+
defined_bond_stereocenter_count: Optional[int] = Field(default=None, description="Number of defined bond stereocenters")
33+
exact_mass: Optional[float] = Field(default=None, description="Exact molecular mass (monoisotopic mass)")
34+
formal_charge: Optional[int] = Field(default=None, description="Net formal charge of the molecule")
35+
heavy_atom_count: Optional[int] = Field(default=None, description="Number of non-hydrogen atoms")
36+
hydrogen_bond_acceptor_count: Optional[int] = Field(default=None, description="Number of hydrogen bond acceptor atoms")
37+
hydrogen_bond_donor_count: Optional[int] = Field(default=None, description="Number of hydrogen bond donor atoms")
38+
inchi: Optional[str] = Field(default=None, description="IUPAC International Chemical Identifier (InChI)")
39+
inchi_key: Optional[str] = Field(default=None, description="InChI Key (27-character hash of the InChI)")
40+
isotope_atom_count: Optional[int] = Field(default=None, description="Number of isotope atoms")
41+
iupac: Optional[Dict[str, str]] = Field(default=None, description="IUPAC names in different formats")
42+
molecular_formula: Optional[str] = Field(default=None, description="Molecular formula in Hill notation")
43+
molecular_weight: Optional[float] = Field(default=None, description="Average molecular weight")
44+
monoisotopic_weight: Optional[float] = Field(default=None, description="Monoisotopic molecular weight")
45+
rotatable_bond_count: Optional[int] = Field(default=None, description="Number of rotatable bonds")
46+
smiles: Optional[Dict[str, str]] = Field(default=None, description="SMILES strings in different formats")
47+
tautomers_count: Optional[int] = Field(default=None, description="Number of possible tautomers")
48+
topological_polar_surface_area: Optional[float] = Field(default=None, description="Topological polar surface area in Ų")
49+
undefined_atom_stereocenter_count: Optional[int] = Field(default=None, description="Number of undefined atom stereocenters")
50+
undefined_bond_stereocenter_count: Optional[int] = Field(default=None, description="Number of undefined bond stereocenters")
51+
xlogp: Optional[float] = Field(default=None, description="Octanol-water partition coefficient (logP)")
52+
53+
class ChemResponse(BaseModel):
54+
"""Response model for chemical compound information from MyChem.info.
55+
56+
This class represents the complete response from the MyChem.info API for a chemical compound.
57+
It includes the compound's identifier, version information, and detailed PubChem data.
58+
59+
The response structure follows the MyChem.info API format and includes:
60+
- _id: The primary identifier (typically InChIKey)
61+
- _version: Version number of the data
62+
- pubchem: Detailed PubChem information (if available)
63+
64+
For more information about the available fields and data sources, see:
65+
https://docs.mychem.info/en/latest/doc/data.html#available-fields
66+
"""
67+
model_config = ConfigDict(extra='allow')
68+
69+
id: str = Field(description="Chemical identifier (typically InChIKey)", validation_alias="_id")
70+
version: int = Field(description="Version number of the data", validation_alias="_version")
71+
pubchem: Optional[PubChemInfo] = Field(default=None, description="Detailed PubChem information")
72+
73+
def get_chem_id(self) -> str:
74+
"""Get the chemical identifier.
75+
76+
Returns:
77+
str: The chemical identifier (typically InChIKey)
78+
"""
79+
return self.id
80+
81+
def has_pubchem(self) -> bool:
82+
"""Check if the chemical has PubChem information.
83+
84+
Returns:
85+
bool: True if PubChem information is available, False otherwise
86+
"""
87+
return self.pubchem is not None
88+
89+
class ChemClient(AbstractClient[ChemResponse]):
90+
"""A typed wrapper around the BioThings chem client (synchronous).
91+
92+
This client provides synchronous access to the MyChem.info API, allowing you to retrieve
93+
chemical compound information using various identifiers. The client handles data caching
94+
and response parsing, providing strongly-typed responses through the ChemResponse model.
95+
96+
The client supports:
97+
- Single compound lookup by ID
98+
- Batch compound lookup by multiple IDs
99+
- Field filtering to retrieve specific data
100+
- Response caching for improved performance
101+
102+
For more information about the available fields and data sources, see:
103+
https://docs.mychem.info/en/latest/doc/data.html#available-fields
104+
"""
105+
106+
def __init__(self, caching: bool = True):
107+
"""Initialize the chem client.
108+
109+
Args:
110+
caching (bool): Whether to enable response caching. Defaults to True.
111+
"""
112+
super().__init__("chem", caching=caching)
113+
114+
def _response_model(self) -> type[ChemResponse]:
115+
"""Get the response model type.
116+
117+
Returns:
118+
type[ChemResponse]: The ChemResponse model class
119+
"""
120+
return ChemResponse
121+
122+
def getchem(
123+
self,
124+
chem_id: str,
125+
fields: Optional[Union[List[str], str]] = None,
126+
**kwargs
127+
) -> Optional[ChemResponse]:
128+
"""Get chemical information by ID.
129+
130+
This method retrieves detailed information about a single chemical compound
131+
using its identifier (typically an InChIKey). The response includes structural
132+
information, physical properties, and other chemical characteristics.
133+
134+
Args:
135+
chem_id (str): The chemical identifier (e.g. InChI key)
136+
fields (Optional[Union[List[str], str]]): Specific fields to return. If None,
137+
all available fields are returned. Can be a single field name or a list
138+
of field names.
139+
**kwargs: Additional arguments passed to the underlying client
140+
141+
Returns:
142+
Optional[ChemResponse]: ChemResponse object containing the chemical information
143+
or None if not found
144+
145+
Example:
146+
>>> client = ChemClient()
147+
>>> result = client.getchem("KTUFNOKKBVMGRW-UHFFFAOYSA-N")
148+
>>> print(result.pubchem.molecular_formula)
149+
"""
150+
result = self._client.getchem(chem_id, fields=fields, **kwargs)
151+
if result is None:
152+
return None
153+
return ChemResponse.model_validate(result)
154+
155+
def getchems(
156+
self,
157+
chem_ids: Union[str, List[str], tuple],
158+
fields: Optional[Union[List[str], str]] = None,
159+
**kwargs
160+
) -> List[ChemResponse]:
161+
"""Get information for multiple chemicals.
162+
163+
This method retrieves detailed information about multiple chemical compounds
164+
in a single request. It supports various input formats for the chemical IDs
165+
and allows field filtering to optimize response size.
166+
167+
Args:
168+
chem_ids (Union[str, List[str], tuple]): List of chemical identifiers or
169+
comma-separated string. Can be:
170+
- A single string with comma-separated IDs
171+
- A list of ID strings
172+
- A tuple of ID strings
173+
fields (Optional[Union[List[str], str]]): Specific fields to return. If None,
174+
all available fields are returned. Can be a single field name or a list
175+
of field names.
176+
**kwargs: Additional arguments passed to the underlying client
177+
178+
Returns:
179+
List[ChemResponse]: List of ChemResponse objects containing the chemical
180+
information for each requested compound
181+
182+
Example:
183+
>>> client = ChemClient()
184+
>>> results = client.getchems(["KTUFNOKKBVMGRW-UHFFFAOYSA-N", "XEFQLINVKFYRCS-UHFFFAOYSA-N"])
185+
>>> for result in results:
186+
... print(result.pubchem.molecular_formula)
187+
"""
188+
if isinstance(chem_ids, str):
189+
chem_ids = chem_ids.split(",")
190+
elif isinstance(chem_ids, tuple):
191+
chem_ids = list(chem_ids)
192+
193+
results = self._client.getchems(chem_ids, fields=fields, **kwargs)
194+
return [ChemResponse.model_validate(result) for result in results]
195+
196+
class ChemClientAsync(AbstractClientAsync[ChemResponse]):
197+
"""A typed wrapper around the BioThings chem client (asynchronous).
198+
199+
This client provides asynchronous access to the MyChem.info API, allowing you to retrieve
200+
chemical compound information using various identifiers. The client handles data caching
201+
and response parsing, providing strongly-typed responses through the ChemResponse model.
202+
203+
The client supports:
204+
- Single compound lookup by ID
205+
- Batch compound lookup by multiple IDs
206+
- Field filtering to retrieve specific data
207+
- Response caching for improved performance
208+
209+
For more information about the available fields and data sources, see:
210+
https://docs.mychem.info/en/latest/doc/data.html#available-fields
211+
"""
212+
213+
def __init__(self, caching: bool = True):
214+
"""Initialize the async chem client.
215+
216+
Args:
217+
caching (bool): Whether to enable response caching. Defaults to True.
218+
"""
219+
super().__init__("chem", caching=caching)
220+
221+
def _response_model(self) -> type[ChemResponse]:
222+
"""Get the response model type.
223+
224+
Returns:
225+
type[ChemResponse]: The ChemResponse model class
226+
"""
227+
return ChemResponse
228+
229+
async def getchem(
230+
self,
231+
chem_id: str,
232+
fields: Optional[Union[List[str], str]] = None,
233+
**kwargs
234+
) -> Optional[ChemResponse]:
235+
"""Get chemical information by ID asynchronously.
236+
237+
This method retrieves detailed information about a single chemical compound
238+
using its identifier (typically an InChIKey). The response includes structural
239+
information, physical properties, and other chemical characteristics.
240+
241+
Args:
242+
chem_id (str): The chemical identifier (e.g. InChI key)
243+
fields (Optional[Union[List[str], str]]): Specific fields to return. If None,
244+
all available fields are returned. Can be a single field name or a list
245+
of field names.
246+
**kwargs: Additional arguments passed to the underlying client
247+
248+
Returns:
249+
Optional[ChemResponse]: ChemResponse object containing the chemical information
250+
or None if not found
251+
252+
Example:
253+
>>> client = ChemClientAsync()
254+
>>> result = await client.getchem("KTUFNOKKBVMGRW-UHFFFAOYSA-N")
255+
>>> print(result.pubchem.molecular_formula)
256+
"""
257+
result = await self._client.getchem(chem_id, fields=fields, **kwargs)
258+
if result is None:
259+
return None
260+
return ChemResponse.model_validate(result)
261+
262+
async def getchems(
263+
self,
264+
chem_ids: Union[str, List[str], tuple],
265+
fields: Optional[Union[List[str], str]] = None,
266+
**kwargs
267+
) -> List[ChemResponse]:
268+
"""Get information for multiple chemicals asynchronously.
269+
270+
This method retrieves detailed information about multiple chemical compounds
271+
in a single request. It supports various input formats for the chemical IDs
272+
and allows field filtering to optimize response size.
273+
274+
Args:
275+
chem_ids (Union[str, List[str], tuple]): List of chemical identifiers or
276+
comma-separated string. Can be:
277+
- A single string with comma-separated IDs
278+
- A list of ID strings
279+
- A tuple of ID strings
280+
fields (Optional[Union[List[str], str]]): Specific fields to return. If None,
281+
all available fields are returned. Can be a single field name or a list
282+
of field names.
283+
**kwargs: Additional arguments passed to the underlying client
284+
285+
Returns:
286+
List[ChemResponse]: List of ChemResponse objects containing the chemical
287+
information for each requested compound
288+
289+
Example:
290+
>>> client = ChemClientAsync()
291+
>>> results = await client.getchems(["KTUFNOKKBVMGRW-UHFFFAOYSA-N", "XEFQLINVKFYRCS-UHFFFAOYSA-N"])
292+
>>> for result in results:
293+
... print(result.pubchem.molecular_formula)
294+
"""
295+
if isinstance(chem_ids, str):
296+
chem_ids = chem_ids.split(",")
297+
elif isinstance(chem_ids, tuple):
298+
chem_ids = list(chem_ids)
299+
300+
results = await self._client.getchems(chem_ids, fields=fields, **kwargs)
301+
return [ChemResponse.model_validate(result) for result in results]

0 commit comments

Comments
 (0)