11import json
2+ import re
3+ import cairosvg
4+ import os
25from bs4 import BeautifulSoup
36from urllib .request import urlopen
47
710 'properties' , 'synonyms' , 'replaceRns' , 'hasMolefile' ]
811
912
10- # get data from a page
1113def detail (casrn , field = "all" ):
12- """ access the common chemistry detail api """
14+ """
15+ Access the Common Chemistry detail API at
16+ https://commonchemistry.cas.org/api/detail?cas_rn=<casrn>
17+ :param casrn: CAS Registry Number
18+ :param field: field to return or all fields (default)
19+ :return mixed
20+ """
21+ if not _validcas (casrn ):
22+ return '' # false
1323 url = ccpath + 'detail?cas_rn=' + casrn
1424 respnse = urlopen (url )
1525 jsn = json .loads (respnse .read ())
16-
1726 if field == "all" :
1827 return jsn
1928 elif field in fields :
@@ -25,23 +34,125 @@ def detail(casrn, field="all"):
2534 else :
2635 return jsn [field ]
2736 else :
28- return "Field not available..."
37+ return '' # false
2938
3039
31- # run a search
32- def query (term ):
33- url = ccpath + 'search?q=' + term
40+ def query (term = '' , exact = False ):
41+ """
42+ Search the CommonChemistry database API at
43+ https://commonchemistry.cas.org/api/search?q=<term>
44+ :param term: string to be searched
45+ :param exact: boolean to indicate an exact match
46+ :return: string
47+ """
48+ url = ''
49+ if exact is False :
50+ url = ccpath + 'search?q=' + term + '*'
51+ elif term [- 1 :] == '*' or exact is True :
52+ url = ccpath + 'search?q=' + term
3453 respnse = urlopen (url )
3554 jsn = json .loads (respnse .read ())
36- out = []
37- for hit in jsn ['results' ]:
38- textname = BeautifulSoup (hit ["name" ], "lxml" ).text
39- out .append ({"textname" : textname , "htmlname" : hit ["name" ].lower (), "rn" : hit ["rn" ]})
55+ out = [] # false
56+ if jsn ['results' ]:
57+ for hit in jsn ['results' ]:
58+ textname = BeautifulSoup (hit ["name" ], "lxml" ).text
59+ out .append ({"textname" : textname , "htmlname" : hit ["name" ].lower (), "rn" : hit ["rn" ]})
4060 return out
4161
4262
43- # search for a compound using an InChIKey
4463def key2cas (key ):
45- """ search the api for an InChKey"""
46- hits = query ('InChIKey=' + key )
47- return hits [0 ]['rn' ] # only returns the casne of the first hit
64+ """
65+ Find the CAS Registry Number of a chemical substance using an IUPAC InChIKey
66+ :param key - a valid InChIKey
67+ """
68+ if _validkey (key ):
69+ hits = query ('InChIKey=' + key , True )
70+ if hits :
71+ if len (hits ) == 1 :
72+ return hits [0 ]['rn' ]
73+ else :
74+ # check hits for smallest molar mass compound, i.e., not polymer
75+ minmm = 100000
76+ minrn = ''
77+ for i , hit in enumerate (hits ):
78+ mm = detail (hit ['rn' ], 'molecularMass' )
79+ if mm != '' :
80+ if float (mm ) < minmm :
81+ minmm = float (mm )
82+ minrn = hit ['rn' ]
83+ return minrn
84+ else :
85+ return ''
86+ else :
87+ return ''
88+
89+
90+ def _validkey (key ):
91+ """
92+ Validate and IUPAC InChIKey
93+ :param key: a string to be validated as an IUPAC InChIKey
94+ :return: bool
95+ """
96+ test = re .search (r'^[A-Z]{14}-[A-Z]{8}[SN][A]-[A-Z]$' , key )
97+ if test is None :
98+ return False
99+ return True
100+
101+
102+ def _validcas (cas ):
103+ """
104+ Validate a CAS Registry Number
105+ See: https://en.wikipedia.org/wiki/CAS_Registry_Number#Format
106+ :param cas: a string to be validated as a CAS Registry Number
107+ :return: bool
108+ """
109+ test = re .search (r'^\d{2,8}-\d{2}-\d$' , cas )
110+ # if format of string does not match then it's not CAS RN
111+ if test is None :
112+ return False
113+ # verify check digit
114+ reverse = cas [::- 1 ] # reverse the CAS Registry Number (needed for checksum math and split out checksum)
115+ digits = reverse .replace ('-' , '' ) # remove the dashes
116+ nochk = digits [1 :] # all but first digit
117+ chksum = int (digits [:1 ]) # first digit
118+ total = 0
119+ for i , digit in enumerate (nochk ):
120+ total += (i + 1 ) * int (digit ) # index of chars starts at 0
121+ newsum = total % 10
122+ if newsum == chksum :
123+ return True
124+ else :
125+ return False
126+
127+
128+ def chemimg (chemid = '' , imgtype = 'svg' ):
129+ """
130+ Get an image for a compound from either a CAS Registry Number, InChIKey, SMILES, or name
131+ :param chemid: the CAS Registry Number, InChIKey, SMILES, or name
132+ :param imgtype: the type of image file to produce - svg, png, or ps
133+ :return:
134+ """
135+ # check identifier for type so checking can be done
136+ if chemid == '' :
137+ return False
138+ if _validkey (chemid ):
139+ casrn = key2cas (chemid )
140+ elif not _validcas (chemid ):
141+ casrn = query (chemid , True )
142+ else :
143+ casrn = chemid
144+ if not casrn :
145+ return casrn
146+ # get svg data and save
147+ svg = detail (casrn , "image" )
148+ f = open (casrn + ".svg" , "w" )
149+ f .write (svg )
150+ f .close ()
151+ if imgtype == 'png' :
152+ cairosvg .svg2png (url = casrn + ".svg" , write_to = casrn + ".png" )
153+ elif imgtype == 'ps' :
154+ cairosvg .svg2ps (url = casrn + ".svg" , write_to = casrn + ".ps" )
155+ if imgtype == 'png' or imgtype == 'ps' :
156+ if os .path .exists (casrn + ".svg" ):
157+ os .remove (casrn + ".svg" )
158+ return True
0 commit comments