1+ from typing import Any , Dict , List , Optional , Union
2+ from pydantic import BaseModel , Field , ConfigDict
3+ import pandas as pd
4+
5+ from biothings_typed_client .abstract_client import AbstractClient , AbstractClientAsync
6+
7+ class PubChemInfo (BaseModel ):
8+ """PubChem information for a chemical compound.
9+
10+ This class represents detailed chemical information from PubChem, including structural properties,
11+ physical characteristics, and identifiers. The data is sourced from the PubChem database and
12+ provides comprehensive information about chemical compounds.
13+
14+ Key fields include:
15+ - Structural information (SMILES, InChI, molecular formula)
16+ - Physical properties (molecular weight, exact mass)
17+ - Chemical properties (hydrogen bond donors/acceptors, rotatable bonds)
18+ - Stereochemistry information (chiral centers, stereocenters)
19+ - Chemical identifiers (CID, InChIKey)
20+
21+ For more details on available fields and their meanings, see:
22+ https://docs.mychem.info/en/latest/doc/data.html#available-fields
23+ """
24+ model_config = ConfigDict (extra = 'allow' )
25+
26+ chiral_atom_count : Optional [int ] = Field (default = None , description = "Number of chiral atoms in the molecule" )
27+ chiral_bond_count : Optional [int ] = Field (default = None , description = "Number of chiral bonds in the molecule" )
28+ cid : Optional [Union [str , int ]] = Field (default = None , description = "PubChem Compound Identifier (CID)" )
29+ complexity : Optional [float ] = Field (default = None , description = "Molecular complexity score (0-100)" )
30+ covalently_bonded_unit_count : Optional [int ] = Field (default = None , description = "Number of covalently bonded units in the molecule" )
31+ defined_atom_stereocenter_count : Optional [int ] = Field (default = None , description = "Number of defined atom stereocenters" )
32+ defined_bond_stereocenter_count : Optional [int ] = Field (default = None , description = "Number of defined bond stereocenters" )
33+ exact_mass : Optional [float ] = Field (default = None , description = "Exact molecular mass (monoisotopic mass)" )
34+ formal_charge : Optional [int ] = Field (default = None , description = "Net formal charge of the molecule" )
35+ heavy_atom_count : Optional [int ] = Field (default = None , description = "Number of non-hydrogen atoms" )
36+ hydrogen_bond_acceptor_count : Optional [int ] = Field (default = None , description = "Number of hydrogen bond acceptor atoms" )
37+ hydrogen_bond_donor_count : Optional [int ] = Field (default = None , description = "Number of hydrogen bond donor atoms" )
38+ inchi : Optional [str ] = Field (default = None , description = "IUPAC International Chemical Identifier (InChI)" )
39+ inchi_key : Optional [str ] = Field (default = None , description = "InChI Key (27-character hash of the InChI)" )
40+ isotope_atom_count : Optional [int ] = Field (default = None , description = "Number of isotope atoms" )
41+ iupac : Optional [Dict [str , str ]] = Field (default = None , description = "IUPAC names in different formats" )
42+ molecular_formula : Optional [str ] = Field (default = None , description = "Molecular formula in Hill notation" )
43+ molecular_weight : Optional [float ] = Field (default = None , description = "Average molecular weight" )
44+ monoisotopic_weight : Optional [float ] = Field (default = None , description = "Monoisotopic molecular weight" )
45+ rotatable_bond_count : Optional [int ] = Field (default = None , description = "Number of rotatable bonds" )
46+ smiles : Optional [Dict [str , str ]] = Field (default = None , description = "SMILES strings in different formats" )
47+ tautomers_count : Optional [int ] = Field (default = None , description = "Number of possible tautomers" )
48+ topological_polar_surface_area : Optional [float ] = Field (default = None , description = "Topological polar surface area in Ų" )
49+ undefined_atom_stereocenter_count : Optional [int ] = Field (default = None , description = "Number of undefined atom stereocenters" )
50+ undefined_bond_stereocenter_count : Optional [int ] = Field (default = None , description = "Number of undefined bond stereocenters" )
51+ xlogp : Optional [float ] = Field (default = None , description = "Octanol-water partition coefficient (logP)" )
52+
53+ class ChemResponse (BaseModel ):
54+ """Response model for chemical compound information from MyChem.info.
55+
56+ This class represents the complete response from the MyChem.info API for a chemical compound.
57+ It includes the compound's identifier, version information, and detailed PubChem data.
58+
59+ The response structure follows the MyChem.info API format and includes:
60+ - _id: The primary identifier (typically InChIKey)
61+ - _version: Version number of the data
62+ - pubchem: Detailed PubChem information (if available)
63+
64+ For more information about the available fields and data sources, see:
65+ https://docs.mychem.info/en/latest/doc/data.html#available-fields
66+ """
67+ model_config = ConfigDict (extra = 'allow' )
68+
69+ id : str = Field (description = "Chemical identifier (typically InChIKey)" , validation_alias = "_id" )
70+ version : int = Field (description = "Version number of the data" , validation_alias = "_version" )
71+ pubchem : Optional [PubChemInfo ] = Field (default = None , description = "Detailed PubChem information" )
72+
73+ def get_chem_id (self ) -> str :
74+ """Get the chemical identifier.
75+
76+ Returns:
77+ str: The chemical identifier (typically InChIKey)
78+ """
79+ return self .id
80+
81+ def has_pubchem (self ) -> bool :
82+ """Check if the chemical has PubChem information.
83+
84+ Returns:
85+ bool: True if PubChem information is available, False otherwise
86+ """
87+ return self .pubchem is not None
88+
89+ class ChemClient (AbstractClient [ChemResponse ]):
90+ """A typed wrapper around the BioThings chem client (synchronous).
91+
92+ This client provides synchronous access to the MyChem.info API, allowing you to retrieve
93+ chemical compound information using various identifiers. The client handles data caching
94+ and response parsing, providing strongly-typed responses through the ChemResponse model.
95+
96+ The client supports:
97+ - Single compound lookup by ID
98+ - Batch compound lookup by multiple IDs
99+ - Field filtering to retrieve specific data
100+ - Response caching for improved performance
101+
102+ For more information about the available fields and data sources, see:
103+ https://docs.mychem.info/en/latest/doc/data.html#available-fields
104+ """
105+
106+ def __init__ (self , caching : bool = True ):
107+ """Initialize the chem client.
108+
109+ Args:
110+ caching (bool): Whether to enable response caching. Defaults to True.
111+ """
112+ super ().__init__ ("chem" , caching = caching )
113+
114+ def _response_model (self ) -> type [ChemResponse ]:
115+ """Get the response model type.
116+
117+ Returns:
118+ type[ChemResponse]: The ChemResponse model class
119+ """
120+ return ChemResponse
121+
122+ def getchem (
123+ self ,
124+ chem_id : str ,
125+ fields : Optional [Union [List [str ], str ]] = None ,
126+ ** kwargs
127+ ) -> Optional [ChemResponse ]:
128+ """Get chemical information by ID.
129+
130+ This method retrieves detailed information about a single chemical compound
131+ using its identifier (typically an InChIKey). The response includes structural
132+ information, physical properties, and other chemical characteristics.
133+
134+ Args:
135+ chem_id (str): The chemical identifier (e.g. InChI key)
136+ fields (Optional[Union[List[str], str]]): Specific fields to return. If None,
137+ all available fields are returned. Can be a single field name or a list
138+ of field names.
139+ **kwargs: Additional arguments passed to the underlying client
140+
141+ Returns:
142+ Optional[ChemResponse]: ChemResponse object containing the chemical information
143+ or None if not found
144+
145+ Example:
146+ >>> client = ChemClient()
147+ >>> result = client.getchem("KTUFNOKKBVMGRW-UHFFFAOYSA-N")
148+ >>> print(result.pubchem.molecular_formula)
149+ """
150+ result = self ._client .getchem (chem_id , fields = fields , ** kwargs )
151+ if result is None :
152+ return None
153+ return ChemResponse .model_validate (result )
154+
155+ def getchems (
156+ self ,
157+ chem_ids : Union [str , List [str ], tuple ],
158+ fields : Optional [Union [List [str ], str ]] = None ,
159+ ** kwargs
160+ ) -> List [ChemResponse ]:
161+ """Get information for multiple chemicals.
162+
163+ This method retrieves detailed information about multiple chemical compounds
164+ in a single request. It supports various input formats for the chemical IDs
165+ and allows field filtering to optimize response size.
166+
167+ Args:
168+ chem_ids (Union[str, List[str], tuple]): List of chemical identifiers or
169+ comma-separated string. Can be:
170+ - A single string with comma-separated IDs
171+ - A list of ID strings
172+ - A tuple of ID strings
173+ fields (Optional[Union[List[str], str]]): Specific fields to return. If None,
174+ all available fields are returned. Can be a single field name or a list
175+ of field names.
176+ **kwargs: Additional arguments passed to the underlying client
177+
178+ Returns:
179+ List[ChemResponse]: List of ChemResponse objects containing the chemical
180+ information for each requested compound
181+
182+ Example:
183+ >>> client = ChemClient()
184+ >>> results = client.getchems(["KTUFNOKKBVMGRW-UHFFFAOYSA-N", "XEFQLINVKFYRCS-UHFFFAOYSA-N"])
185+ >>> for result in results:
186+ ... print(result.pubchem.molecular_formula)
187+ """
188+ if isinstance (chem_ids , str ):
189+ chem_ids = chem_ids .split ("," )
190+ elif isinstance (chem_ids , tuple ):
191+ chem_ids = list (chem_ids )
192+
193+ results = self ._client .getchems (chem_ids , fields = fields , ** kwargs )
194+ return [ChemResponse .model_validate (result ) for result in results ]
195+
196+ class ChemClientAsync (AbstractClientAsync [ChemResponse ]):
197+ """A typed wrapper around the BioThings chem client (asynchronous).
198+
199+ This client provides asynchronous access to the MyChem.info API, allowing you to retrieve
200+ chemical compound information using various identifiers. The client handles data caching
201+ and response parsing, providing strongly-typed responses through the ChemResponse model.
202+
203+ The client supports:
204+ - Single compound lookup by ID
205+ - Batch compound lookup by multiple IDs
206+ - Field filtering to retrieve specific data
207+ - Response caching for improved performance
208+
209+ For more information about the available fields and data sources, see:
210+ https://docs.mychem.info/en/latest/doc/data.html#available-fields
211+ """
212+
213+ def __init__ (self , caching : bool = True ):
214+ """Initialize the async chem client.
215+
216+ Args:
217+ caching (bool): Whether to enable response caching. Defaults to True.
218+ """
219+ super ().__init__ ("chem" , caching = caching )
220+
221+ def _response_model (self ) -> type [ChemResponse ]:
222+ """Get the response model type.
223+
224+ Returns:
225+ type[ChemResponse]: The ChemResponse model class
226+ """
227+ return ChemResponse
228+
229+ async def getchem (
230+ self ,
231+ chem_id : str ,
232+ fields : Optional [Union [List [str ], str ]] = None ,
233+ ** kwargs
234+ ) -> Optional [ChemResponse ]:
235+ """Get chemical information by ID asynchronously.
236+
237+ This method retrieves detailed information about a single chemical compound
238+ using its identifier (typically an InChIKey). The response includes structural
239+ information, physical properties, and other chemical characteristics.
240+
241+ Args:
242+ chem_id (str): The chemical identifier (e.g. InChI key)
243+ fields (Optional[Union[List[str], str]]): Specific fields to return. If None,
244+ all available fields are returned. Can be a single field name or a list
245+ of field names.
246+ **kwargs: Additional arguments passed to the underlying client
247+
248+ Returns:
249+ Optional[ChemResponse]: ChemResponse object containing the chemical information
250+ or None if not found
251+
252+ Example:
253+ >>> client = ChemClientAsync()
254+ >>> result = await client.getchem("KTUFNOKKBVMGRW-UHFFFAOYSA-N")
255+ >>> print(result.pubchem.molecular_formula)
256+ """
257+ result = await self ._client .getchem (chem_id , fields = fields , ** kwargs )
258+ if result is None :
259+ return None
260+ return ChemResponse .model_validate (result )
261+
262+ async def getchems (
263+ self ,
264+ chem_ids : Union [str , List [str ], tuple ],
265+ fields : Optional [Union [List [str ], str ]] = None ,
266+ ** kwargs
267+ ) -> List [ChemResponse ]:
268+ """Get information for multiple chemicals asynchronously.
269+
270+ This method retrieves detailed information about multiple chemical compounds
271+ in a single request. It supports various input formats for the chemical IDs
272+ and allows field filtering to optimize response size.
273+
274+ Args:
275+ chem_ids (Union[str, List[str], tuple]): List of chemical identifiers or
276+ comma-separated string. Can be:
277+ - A single string with comma-separated IDs
278+ - A list of ID strings
279+ - A tuple of ID strings
280+ fields (Optional[Union[List[str], str]]): Specific fields to return. If None,
281+ all available fields are returned. Can be a single field name or a list
282+ of field names.
283+ **kwargs: Additional arguments passed to the underlying client
284+
285+ Returns:
286+ List[ChemResponse]: List of ChemResponse objects containing the chemical
287+ information for each requested compound
288+
289+ Example:
290+ >>> client = ChemClientAsync()
291+ >>> results = await client.getchems(["KTUFNOKKBVMGRW-UHFFFAOYSA-N", "XEFQLINVKFYRCS-UHFFFAOYSA-N"])
292+ >>> for result in results:
293+ ... print(result.pubchem.molecular_formula)
294+ """
295+ if isinstance (chem_ids , str ):
296+ chem_ids = chem_ids .split ("," )
297+ elif isinstance (chem_ids , tuple ):
298+ chem_ids = list (chem_ids )
299+
300+ results = await self ._client .getchems (chem_ids , fields = fields , ** kwargs )
301+ return [ChemResponse .model_validate (result ) for result in results ]
0 commit comments