1313"""
1414
1515from prody import PY3K , LOGGER
16- import numpy as np
16+ from prody . utilities import openFile
1717
18+ import numpy as np
19+ import os
1820
1921__all__ = ['QuartataWebBrowser' , 'QuartataChemicalRecord' , 'searchQuartataWeb' ]
2022
@@ -23,7 +25,7 @@ class QuartataWebBrowser(object):
2325 """Class to browse the QuartataWeb website."""
2426
2527 def __init__ (self , data_source = None , drug_group = None , input_type = None , query_type = None ,
26- data = None , num_predictions = None , browser_type = None , job_id = None ):
28+ data = None , num_predictions = None , browser_type = None , job_id = None , tsv = None ):
2729 """Instantiate a QuartataWebBrowser object instance.
2830
2931 :arg data_source: source database for QuartataWeb analysis
@@ -86,6 +88,10 @@ def __init__(self, data_source=None, drug_group=None, input_type=None, query_typ
8688 :arg job_id: job ID for accessing previous jobs
8789 Default is ``None``
8890 :type browser_type: int
91+
92+ :arg tsv: a filename for a file that contains the results
93+ or a file to save the results in tsv format
94+ :type tsv: str
8995 """
9096
9197 self .browser_type = None
@@ -97,9 +103,18 @@ def __init__(self, data_source=None, drug_group=None, input_type=None, query_typ
97103 self .query_type = None
98104 self .data = None
99105 self .num_predictions = None
106+ self .chemical_data = None
100107
101108 self .job_id = job_id
102109
110+ self .filename = None
111+ self .no_data = True
112+ if tsv is not None :
113+ try :
114+ self .parseChemicals (tsv )
115+ except :
116+ raise ValueError ('please provide a valid filename' )
117+
103118 self .setBrowserType (browser_type )
104119 self .setDataSource (data_source )
105120 self .setDrugGroup (drug_group )
@@ -176,7 +191,8 @@ def setDataSource(self, data_source):
176191 raise ValueError ('data_source should be DrugBank, STITCH or None' )
177192
178193 self .data_source = data_source
179- self .updateHomePage ()
194+ if self .no_data :
195+ self .updateHomePage ()
180196
181197 def setDrugGroup (self , group ):
182198 """Set drug_group and update home page
@@ -198,7 +214,9 @@ def setDrugGroup(self, group):
198214 raise ValueError ('group should be approved, all or None' )
199215
200216 self .drug_group = group
201- self .updateHomePage ()
217+ if self .no_data :
218+ self .updateHomePage ()
219+
202220 elif group is not None :
203221 LOGGER .warn ('there are no groups when using STITCH' )
204222
@@ -219,7 +237,8 @@ def setInputType(self, input_type):
219237 raise ValueError ('input_type should be 1, 2 or None' )
220238
221239 self .input_type = input_type
222- self .updateHomePage ()
240+ if self .no_data :
241+ self .updateHomePage ()
223242
224243 def setQueryType (self , query_type ):
225244 """Set query_type and update home page
@@ -249,7 +268,8 @@ def setQueryType(self, query_type):
249268 raise ValueError ('query_type should be 1, 2, 3 or None' )
250269
251270 self .query_type = query_type
252- self .updateHomePage ()
271+ if self .no_data :
272+ self .updateHomePage ()
253273
254274 def setData (self , data ):
255275 """Set data and update home page
@@ -295,7 +315,8 @@ def setData(self, data):
295315 'each item in data must be a pair with ; as delimiter' )
296316
297317 self .data = data
298- self .updateHomePage ()
318+ if self .no_data :
319+ self .updateHomePage ()
299320
300321 def setNumPredictions (self , num_predictions ):
301322 """Set num_predictions and update home page
@@ -323,7 +344,8 @@ def setNumPredictions(self, num_predictions):
323344 raise ValueError ('2nd num_predictions must be <= 20' )
324345
325346 self .num_predictions = num_predictions
326- self .updateHomePage ()
347+ if self .no_data :
348+ self .updateHomePage ()
327349
328350 def setBrowserType (self , browser_type ):
329351 """Set browser_type and update home page
@@ -352,28 +374,28 @@ def setBrowserType(self, browser_type):
352374 url = "http://quartata.csb.pitt.edu"
353375 browser .visit (url )
354376 except WebDriverException :
355- raise ValueError (
356- 'No web driver found for Chrome or Firefox. Please specify a browser type or download an appropriate driver.' )
377+ raise ValueError ('No web driver found for Chrome or Firefox. '
378+ ' Please specify a different browser type or download an appropriate driver.' )
357379 else :
358380 self .browser_type = 'firefox'
359381 else :
360382 self .browser_type = 'chrome'
361383
362- elif not isinstance (browser_type , str ):
363- raise TypeError ('browser_type should be a string or None' )
364- else :
365- try :
366- browser = Browser (browser_type )
367- url = "http://quartata.csb.pitt.edu"
368- browser .visit (url )
369- except WebDriverException :
370- raise ValueError (
371- 'No web driver found for browser_type. Please specify a different browser type or download an appropriate driver.' )
384+ elif not isinstance (browser_type , str ):
385+ raise TypeError ('browser_type should be a string or None' )
372386 else :
373- self .browser_type = browser_type
387+ try :
388+ browser = Browser (browser_type )
389+ url = "http://quartata.csb.pitt.edu"
390+ browser .visit (url )
391+ except WebDriverException :
392+ raise ValueError ('No web driver found for browser_type. '
393+ 'Please specify a different browser type or download an appropriate driver.' )
394+ else :
395+ self .browser_type = browser_type
374396
375- self .browser = browser
376- self .updateHomePage ()
397+ self .browser = browser
398+ self .updateHomePage ()
377399
378400
379401 def setJObID (self , job_id ):
@@ -384,7 +406,8 @@ def setJObID(self, job_id):
384406 :type browser_type: int
385407 """
386408 self .job_id = job_id
387- self .viewResults ()
409+ if self .no_data :
410+ self .viewResults ()
388411
389412
390413 def viewResults (self ):
@@ -428,25 +451,56 @@ def goToWorkDir(self):
428451 self .browser .visit (url )
429452
430453
431- def parseChemicals (self ):
454+ def parseChemicals (self , filename = None ):
432455 """Go to working directory and parse chemicals for query protein.
433456 Updates self.chemical_data"""
457+
458+ if filename is None :
459+ filename = self .filename
460+
434461 try :
435- self .goToWorkDir ()
436-
437- if self .data_source == 'DrugBank' :
438- filename = 'known_drugs_for_query_protein.txt'
439- else :
440- filename = 'known_chemicals_for_query_protein.txt'
462+ if filename is not None :
463+ if not self .no_data :
464+ return True
465+
466+ if not isinstance (filename , str ):
467+ raise TypeError ('filename should be a string' )
468+
469+ if os .path .isfile (filename ):
470+ # read the contents
471+ LOGGER .info ('reading chemicals from {0}' .format (filename ))
472+ stream = openFile (filename , 'rt' )
473+ lines = stream .readlines ()
474+ stream .close ()
475+ self .no_data = False
476+ else :
477+ # filename contains a filename for writing
478+ self .no_data = True
441479
442- self .browser .find_by_text (filename )[0 ].click ()
443-
444- import requests
445- html = requests .get (self .browser .url ).content
446- if PY3K :
447- html = html .decode ()
480+ self .filename = filename
481+
482+ if self .no_data :
483+ self .goToWorkDir ()
484+
485+ if self .data_source == 'DrugBank' :
486+ data_filename = 'known_drugs_for_query_protein.txt'
487+ else :
488+ data_filename = 'known_chemicals_for_query_protein.txt'
448489
449- lines = html .split ('\n ' )
490+ self .browser .find_by_text (data_filename )[0 ].click ()
491+
492+ import requests
493+ html = requests .get (self .browser .url ).content
494+ if PY3K :
495+ html = html .decode ()
496+
497+ if filename is not None :
498+ LOGGER .info ('writing chemicals to {0}' .format (filename ))
499+ out = open (filename , 'w' )
500+ out .write (html )
501+ out .close ()
502+
503+ lines = html .split ('\n ' )
450504
451505 self .fields = lines [0 ].split ('\t ' )
452506 self .num_fields = len (self .fields )
@@ -467,28 +521,30 @@ def parseChemicals(self):
467521 self .chemical_data = np .empty (self .num_rows , dtype = dtypes )
468522
469523 for i , line in enumerate (lines [1 :self .num_rows + 1 ]):
470- items = line .split ('\t ' )
524+ items = line .strip (). split ('\t ' )
471525 if len (items ) != self .num_fields :
472526 raise ValueError ('line {0} has the wrong number of fields' .format (i + 1 ))
473527
474528 for j , item in enumerate (items ):
475529 self .chemical_data [i ][j ] = item
476530 except :
477- success = False
531+ self . no_data = True
478532 else :
479- success = True
480- return success
533+ self . no_data = False
534+ return not self . no_data
481535
482536
483537 def quit (self ):
484- self .browser .quit ()
538+ if self .browser is not None :
539+ self .browser .quit ()
485540
486541
487542class QuartataChemicalRecord (object ):
488543 """Class for handling chemical data from QuartataWebBrowser"""
489544
490545 def __init__ (self , data_source = None , drug_group = None , input_type = None , query_type = None ,
491- data = None , num_predictions = None , browser_type = None , job_id = None ):
546+ data = None , num_predictions = None , browser_type = None , job_id = None ,
547+ filename = None ):
492548 """Instantiate a QuartataChemicalRecord object instance.
493549 Inputs are the same as QuartataWebBrowser.
494550 """
@@ -502,13 +558,14 @@ def __init__(self, data_source=None, drug_group=None, input_type=None, query_typ
502558 self .num_predictions = num_predictions
503559 self .browser_type = browser_type
504560 self .job_id = job_id
561+ self .filename = filename
505562
506563 self .isSuccess = self .fetch (data_source , drug_group , input_type , query_type ,
507- data , num_predictions , browser_type , job_id )
564+ data , num_predictions , browser_type , job_id , filename )
508565
509566
510567 def fetch (self , data_source = None , drug_group = None , input_type = None , query_type = None ,
511- data = None , num_predictions = None , browser_type = None , job_id = None ):
568+ data = None , num_predictions = None , browser_type = None , job_id = None , filename = None ):
512569 """Fetch data"""
513570 if data_source is None :
514571 data_source = self .data_source
@@ -520,20 +577,28 @@ def fetch(self, data_source=None, drug_group=None, input_type=None, query_type=N
520577 query_type = self .query_type
521578 if data is None :
522579 data = self .data
580+
581+ if data is None :
582+ raise ValueError ('data cannot be None' )
583+
523584 if num_predictions is None :
524585 num_predictions = self .num_predictions
525586 if browser_type is None :
526587 browser_type = self .browser_type
527588 if job_id is None :
528589 job_id = self .job_id
590+ if filename is None :
591+ filename = self .filename
529592
530593 self .qwb = QuartataWebBrowser (data_source , drug_group , input_type , query_type ,
531- data , num_predictions , browser_type , job_id )
594+ data , num_predictions , browser_type , job_id , filename )
532595
533596 isSuccess = self .qwb .parseChemicals ()
534597 self .qwb .quit ()
535598
536599 self ._chemData = self .qwb .chemical_data
600+ if self ._chemData is None :
601+ raise ValueError ('' )
537602 chem_temp_dict = dict ()
538603 listAll = []
539604 for temp in self ._chemData :
@@ -671,20 +736,27 @@ def filter(self, lower_weight=None, upper_weight=None, cutoff_score=None):
671736 filterDict = {'lower_MW' : filterListLowerMW , 'upper_MW' : filterListUpperMW , 'conf_score' : filterListConf }
672737 self ._filterList = filterList
673738 self ._filterDict = filterDict
674- self ._list = list ( set ( self ._listAll ) - set ( filterList ))
739+ self ._list = [ item for item in self ._listAll if not item in filterList ]
675740 LOGGER .info (str (len (self ._listAll )- len (self ._list )) + ' chemicals have been filtered out from ' + str (len (self ._listAll ))+ ' QuartataWeb hits (remaining: ' + str (len (self ._list ))+ ').' )
676741 return self ._list
677742
678743
679744
680745def searchQuartataWeb (data_source = None , drug_group = None , input_type = None , query_type = None ,
681- data = None , num_predictions = None , browser_type = None , job_id = None , result_type = 'Chemical' ):
682- """Wrapper function for searching QuartataWeb"""
746+ data = None , num_predictions = None , browser_type = None , job_id = None ,
747+ filename = None , result_type = 'Chemical' ):
748+ """Wrapper function for searching QuartataWeb.
749+
750+ :arg result_type: type of results to get from QuartataWeb.
751+ So far only ``'Chemical'`` is supported.
752+ :type result_type: str
753+ """
683754 if result_type == 'Chemical' :
684755 return QuartataChemicalRecord (data_source , drug_group , input_type , query_type ,
685- data , num_predictions , browser_type , job_id )
756+ data , num_predictions , browser_type , job_id ,
757+ filename )
686758 else :
687759 LOGGER .warn ('No other result types are supported yet' )
688760 return None
689761
690- searchQuartataWeb .__doc__ += QuartataWebBrowser .__doc__
762+ searchQuartataWeb .__doc__ += " \n " + QuartataWebBrowser . __init__ .__doc__
0 commit comments