Merge branch 'master' of github.com:prody/ProDy into devel-5

jamesmkrieger · jamesmkrieger · commit bf29aacff2cf · 2020-12-16T10:46:18.000Z
diff --git a/prody/database/dali.py b/prody/database/dali.py
@@ -469,7 +469,7 @@ def filter(self, cutoff_len=None, cutoff_rmsd=None, cutoff_Z=None, cutoff_identi
         filterDict = {'len': filterListLen, 'rmsd': filterListRMSD, 'Z': filterListZ, 'identity': filterListIdentity}
         self._filterList = filterList
         self._filterDict = filterDict
-        self._pdbList = [self._pdbListAll[0]] + list(set(list(self._pdbListAll[1:])) - set(filterList))
+        self._pdbList = [self._pdbListAll[0]] + [item for item in self._pdbListAll[1:] if not item in filterList]
         LOGGER.info(str(len(filterList)) + ' PDBs have been filtered out from '+str(len(pdbListAll))+' Dali hits (remaining: '+str(len(pdbListAll)-len(filterList))+').')
         return self._pdbList
     
diff --git a/prody/database/quartataweb.py b/prody/database/quartataweb.py
@@ -13,8 +13,10 @@
 """
 
 from prody import PY3K, LOGGER
-import numpy as np
+from prody.utilities import openFile
 
+import numpy as np
+import os
 
 __all__ = ['QuartataWebBrowser', 'QuartataChemicalRecord', 'searchQuartataWeb']
 
@@ -23,7 +25,7 @@ class QuartataWebBrowser(object):
     """Class to browse the QuartataWeb website."""
 
     def __init__(self, data_source=None, drug_group=None, input_type=None, query_type=None, 
-                 data=None, num_predictions=None, browser_type=None, job_id=None):
+                 data=None, num_predictions=None, browser_type=None, job_id=None, tsv=None):
         """Instantiate a QuartataWebBrowser object instance.
 
         :arg data_source: source database for QuartataWeb analysis
@@ -86,6 +88,10 @@ def __init__(self, data_source=None, drug_group=None, input_type=None, query_typ
         :arg job_id: job ID for accessing previous jobs
             Default is ``None``
         :type browser_type: int        
+
+        :arg tsv: a filename for a file that contains the results 
+            or a file to save the results in tsv format
+        :type tsv: str
         """
 
         self.browser_type = None
@@ -97,9 +103,18 @@ def __init__(self, data_source=None, drug_group=None, input_type=None, query_typ
         self.query_type = None
         self.data = None
         self.num_predictions = None
+        self.chemical_data = None
 
         self.job_id = job_id
 
+        self.filename = None
+        self.no_data = True
+        if tsv is not None:
+            try:
+                self.parseChemicals(tsv)
+            except:
+                raise ValueError('please provide a valid filename')
+
         self.setBrowserType(browser_type)
         self.setDataSource(data_source)
         self.setDrugGroup(drug_group)
@@ -176,7 +191,8 @@ def setDataSource(self, data_source):
             raise ValueError('data_source should be DrugBank, STITCH or None')
 
         self.data_source = data_source
-        self.updateHomePage()
+        if self.no_data:
+            self.updateHomePage()
 
     def setDrugGroup(self, group):
         """Set drug_group and update home page
@@ -198,7 +214,9 @@ def setDrugGroup(self, group):
                 raise ValueError('group should be approved, all or None')
 
             self.drug_group = group
-            self.updateHomePage()
+            if self.no_data:
+                self.updateHomePage()
+
         elif group is not None:
             LOGGER.warn('there are no groups when using STITCH')
 
@@ -219,7 +237,8 @@ def setInputType(self, input_type):
             raise ValueError('input_type should be 1, 2 or None')
 
         self.input_type = input_type
-        self.updateHomePage()
+        if self.no_data:
+            self.updateHomePage()
 
     def setQueryType(self, query_type):
         """Set query_type and update home page
@@ -249,7 +268,8 @@ def setQueryType(self, query_type):
             raise ValueError('query_type should be 1, 2, 3 or None')
 
         self.query_type = query_type
-        self.updateHomePage()
+        if self.no_data:
+            self.updateHomePage()
 
     def setData(self, data):
         """Set data and update home page
@@ -295,7 +315,8 @@ def setData(self, data):
                             'each item in data must be a pair with ; as delimiter')
 
         self.data = data
-        self.updateHomePage()
+        if self.no_data:
+            self.updateHomePage()
 
     def setNumPredictions(self, num_predictions):
         """Set num_predictions and update home page
@@ -323,7 +344,8 @@ def setNumPredictions(self, num_predictions):
             raise ValueError('2nd num_predictions must be <= 20')
 
         self.num_predictions = num_predictions
-        self.updateHomePage()
+        if self.no_data:
+            self.updateHomePage()
 
     def setBrowserType(self, browser_type):
         """Set browser_type and update home page
@@ -352,28 +374,28 @@ def setBrowserType(self, browser_type):
                         url = "http://quartata.csb.pitt.edu"
                         browser.visit(url)
                     except WebDriverException:
-                        raise ValueError(
-                            'No web driver found for Chrome or Firefox. Please specify a browser type or download an appropriate driver.')
+                        raise ValueError('No web driver found for Chrome or Firefox. '
+                                         'Please specify a different browser type or download an appropriate driver.')
                     else:
                         self.browser_type = 'firefox'
                 else:
                     self.browser_type = 'chrome'
 
-        elif not isinstance(browser_type, str):
-            raise TypeError('browser_type should be a string or None')
-        else:
-            try:
-                browser = Browser(browser_type)
-                url = "http://quartata.csb.pitt.edu"
-                browser.visit(url)
-            except WebDriverException:
-                raise ValueError(
-                    'No web driver found for browser_type. Please specify a different browser type or download an appropriate driver.')
+            elif not isinstance(browser_type, str):
+                raise TypeError('browser_type should be a string or None')
             else:
-                self.browser_type = browser_type
+                try:
+                    browser = Browser(browser_type)
+                    url = "http://quartata.csb.pitt.edu"
+                    browser.visit(url)
+                except WebDriverException:
+                    raise ValueError('No web driver found for browser_type. '
+                                     'Please specify a different browser type or download an appropriate driver.')
+                else:
+                    self.browser_type = browser_type
 
-        self.browser = browser
-        self.updateHomePage()
+            self.browser = browser
+            self.updateHomePage()
 
 
     def setJObID(self, job_id):
@@ -384,7 +406,8 @@ def setJObID(self, job_id):
         :type browser_type: int
         """
         self.job_id = job_id
-        self.viewResults()
+        if self.no_data:
+            self.viewResults()
 
 
     def viewResults(self):
@@ -428,25 +451,56 @@ def goToWorkDir(self):
         self.browser.visit(url)
 
 
-    def parseChemicals(self):
+    def parseChemicals(self, filename=None):
         """Go to working directory and parse chemicals for query protein.
         Updates self.chemical_data"""
+        
+        if filename is None:
+            filename = self.filename
+
         try:
-            self.goToWorkDir()
-            
-            if self.data_source == 'DrugBank':
-                filename = 'known_drugs_for_query_protein.txt'
-            else:
-                filename = 'known_chemicals_for_query_protein.txt'
+            if filename is not None:
+                if not self.no_data:
+                    return True
+
+                if not isinstance(filename, str):
+                    raise TypeError('filename should be a string')
+
+                if os.path.isfile(filename):
+                    # read the contents
+                    LOGGER.info('reading chemicals from {0}'.format(filename))
+                    stream = openFile(filename, 'rt')
+                    lines = stream.readlines()
+                    stream.close()
+                    self.no_data = False
+                else:
+                    # filename contains a filename for writing
+                    self.no_data = True
 
-            self.browser.find_by_text(filename)[0].click()
-            
-            import requests            
-            html = requests.get(self.browser.url).content
-            if PY3K:
-                html = html.decode()
+                self.filename = filename
+
+            if self.no_data:
+                self.goToWorkDir()
+                
+                if self.data_source == 'DrugBank':
+                    data_filename = 'known_drugs_for_query_protein.txt'
+                else:
+                    data_filename = 'known_chemicals_for_query_protein.txt'
 
-            lines = html.split('\n')
+                self.browser.find_by_text(data_filename)[0].click()
+                
+                import requests
+                html = requests.get(self.browser.url).content
+                if PY3K:
+                    html = html.decode()
+
+                if filename is not None:
+                    LOGGER.info('writing chemicals to {0}'.format(filename))
+                    out = open(filename, 'w')
+                    out.write(html)
+                    out.close()
+
+                lines = html.split('\n')
 
             self.fields = lines[0].split('\t')
             self.num_fields = len(self.fields)
@@ -467,28 +521,30 @@ def parseChemicals(self):
             self.chemical_data = np.empty(self.num_rows, dtype=dtypes)
 
             for i, line in enumerate(lines[1:self.num_rows+1]):
-                items = line.split('\t')
+                items = line.strip().split('\t')
                 if len(items) != self.num_fields:
                     raise ValueError('line {0} has the wrong number of fields'.format(i+1))
 
                 for j, item in enumerate(items):
                     self.chemical_data[i][j] = item
         except:
-            success = False
+            self.no_data = True
         else:
-            success = True
-        return success
+            self.no_data = False
+        return not self.no_data
 
 
     def quit(self):
-        self.browser.quit()
+        if self.browser is not None:
+            self.browser.quit()
 
 
 class QuartataChemicalRecord(object):
     """Class for handling chemical data from QuartataWebBrowser"""
 
     def __init__(self, data_source=None, drug_group=None, input_type=None, query_type=None, 
-                 data=None, num_predictions=None, browser_type=None, job_id=None):
+                 data=None, num_predictions=None, browser_type=None, job_id=None, 
+                 filename=None):
         """Instantiate a QuartataChemicalRecord object instance.
         Inputs are the same as QuartataWebBrowser.
         """
@@ -502,13 +558,14 @@ def __init__(self, data_source=None, drug_group=None, input_type=None, query_typ
         self.num_predictions = num_predictions
         self.browser_type = browser_type
         self.job_id = job_id
+        self.filename = filename
 
         self.isSuccess = self.fetch(data_source, drug_group, input_type, query_type,
-                                    data, num_predictions, browser_type, job_id)
+                                    data, num_predictions, browser_type, job_id, filename)
 
 
     def fetch(self, data_source=None, drug_group=None, input_type=None, query_type=None, 
-              data=None, num_predictions=None, browser_type=None, job_id=None):
+              data=None, num_predictions=None, browser_type=None, job_id=None, filename=None):
         """Fetch data"""
         if data_source is None:
             data_source = self.data_source
@@ -520,20 +577,28 @@ def fetch(self, data_source=None, drug_group=None, input_type=None, query_type=N
             query_type = self.query_type
         if data is None:
             data = self.data
+
+        if data is None:
+            raise ValueError('data cannot be None')
+
         if num_predictions is None:
             num_predictions = self.num_predictions
         if browser_type is None:
             browser_type = self.browser_type
         if job_id is None:
             job_id = self.job_id
+        if filename is None:
+            filename = self.filename
 
         self.qwb = QuartataWebBrowser(data_source, drug_group, input_type, query_type,
-                                      data, num_predictions, browser_type, job_id)
+                                      data, num_predictions, browser_type, job_id, filename)
         
         isSuccess = self.qwb.parseChemicals()
         self.qwb.quit()
 
         self._chemData = self.qwb.chemical_data
+        if self._chemData is None:
+            raise ValueError('')
         chem_temp_dict = dict()
         listAll = []
         for temp in self._chemData:
@@ -671,20 +736,27 @@ def filter(self, lower_weight=None, upper_weight=None, cutoff_score=None):
         filterDict = {'lower_MW': filterListLowerMW, 'upper_MW': filterListUpperMW, 'conf_score': filterListConf}
         self._filterList = filterList
         self._filterDict = filterDict
-        self._list = list(set(self._listAll) - set(filterList))
+        self._list = [item for item in self._listAll if not item in filterList]
         LOGGER.info(str(len(self._listAll)-len(self._list)) + ' chemicals have been filtered out from '+str(len(self._listAll))+' QuartataWeb hits (remaining: '+str(len(self._list))+').')
         return self._list
     
 
 
 def searchQuartataWeb(data_source=None, drug_group=None, input_type=None, query_type=None, 
-                   data=None, num_predictions=None, browser_type=None, job_id=None, result_type='Chemical'):
-    """Wrapper function for searching QuartataWeb"""
+                      data=None, num_predictions=None, browser_type=None, job_id=None, 
+                      filename=None, result_type='Chemical'):
+    """Wrapper function for searching QuartataWeb.
+
+    :arg result_type: type of results to get from QuartataWeb.
+        So far only ``'Chemical'`` is supported.
+    :type result_type: str
+    """
     if result_type == 'Chemical':
         return QuartataChemicalRecord(data_source, drug_group, input_type, query_type,
-                                    data, num_predictions, browser_type, job_id)
+                                      data, num_predictions, browser_type, job_id,
+                                      filename)
     else:
         LOGGER.warn('No other result types are supported yet')
         return None
 
-searchQuartataWeb.__doc__ += QuartataWebBrowser.__doc__
+searchQuartataWeb.__doc__ += "\n" + QuartataWebBrowser.__init__.__doc__
diff --git a/prody/dynamics/plotting.py b/prody/dynamics/plotting.py
@@ -1197,10 +1197,6 @@ def showPerturbResponse(model, atoms=None, show_matrix=True, select=None, **kwar
         show = showAtomicMatrix(prs_matrix, x_array=sensitivity, 
                                 y_array=effectiveness, atoms=atoms, 
                                 **kwargs)
-        cluster_col = kwargs.pop('cluster_col',False)
-        if cluster_col == False:
-            xlabel('Residues')
-
     else:
         if select is None:
             fig = fig_ = kwargs.pop('figure', None) # this line needs to be in this block
@@ -1341,7 +1337,6 @@ def showAtomicMatrix(matrix, x_array=None, y_array=None, atoms=None, **kwargs):
     ticklabels = kwargs.pop('ticklabels', None)
     text_color = kwargs.pop('text_color', 'k')
     text_color = kwargs.pop('textcolor', text_color)
-    cluster = kwargs.pop('cluster', False)
     interactive = kwargs.pop('interactive', True)
 
     if isinstance(fig, Figure):
diff --git a/prody/utilities/catchall.py b/prody/utilities/catchall.py