|
18 | 18 |
|
19 | 19 | from __future__ import absolute_import |
20 | 20 |
|
| 21 | +import numpy as np |
21 | 22 | import pandas as pd |
| 23 | +from requests import HTTPError |
22 | 24 |
|
23 | 25 | from .db import FragmentsDb |
24 | 26 | from .pairs import similar, open_similarity_matrix |
25 | | -from .webservice.client import WebserviceClient |
| 27 | +from .webservice.client import WebserviceClient, IncompleteFragments |
| 28 | + |
| 29 | + |
| 30 | +class IncompleteHits(Exception): |
| 31 | + def __init__(self, absent_identifiers, hits): |
| 32 | + """List of hits and list of identifiers for which no information could be found |
| 33 | +
|
| 34 | + Args: |
| 35 | + absent_identifiers (List[str]): List of identifiers for which no information could be found |
| 36 | + hits (pandas.DataFrame): Data frame with query_fragment_id, hit_frag_id and score columns |
| 37 | + """ |
| 38 | + message = 'Some query fragment identifiers could not be found' |
| 39 | + super(IncompleteHits, self).__init__(message) |
| 40 | + self.absent_identifiers = absent_identifiers |
| 41 | + self.hits = hits |
26 | 42 |
|
27 | 43 |
|
28 | 44 | def similarities(queries, similarity_matrix_filename_or_url, cutoff, limit=1000): |
@@ -54,26 +70,48 @@ def similarities(queries, similarity_matrix_filename_or_url, cutoff, limit=1000) |
54 | 70 |
|
55 | 71 | Returns: |
56 | 72 | pandas.DataFrame: Data frame with query_fragment_id, hit_frag_id and score columns |
| 73 | +
|
| 74 | + Raises: |
| 75 | + IncompleteHits: When one or more of the identifiers could not be found. |
57 | 76 | """ |
58 | 77 | hits = [] |
| 78 | + absent_identifiers = [] |
59 | 79 | if similarity_matrix_filename_or_url.startswith('http'): |
60 | 80 | client = WebserviceClient(similarity_matrix_filename_or_url) |
61 | 81 | for query in queries: |
62 | | - qhits = client.similar_fragments(query, cutoff, limit) |
63 | | - hits.extend(qhits) |
| 82 | + try: |
| 83 | + qhits = client.similar_fragments(query, cutoff, limit) |
| 84 | + hits.extend(qhits) |
| 85 | + except HTTPError as e: |
| 86 | + if e.response.status_code == 404: |
| 87 | + absent_identifiers.append(query) |
64 | 88 | else: |
65 | 89 | similarity_matrix = open_similarity_matrix(similarity_matrix_filename_or_url) |
66 | 90 | for query in queries: |
67 | | - for query_id, hit_id, score in similar(query, similarity_matrix, cutoff, limit): |
68 | | - hit = {'query_frag_id': query_id, |
69 | | - 'hit_frag_id': hit_id, |
70 | | - 'score': score, |
71 | | - } |
72 | | - hits.append(hit) |
| 91 | + try: |
| 92 | + for query_id, hit_id, score in similar(query, similarity_matrix, cutoff, limit): |
| 93 | + hit = {'query_frag_id': query_id, |
| 94 | + 'hit_frag_id': hit_id, |
| 95 | + 'score': score, |
| 96 | + } |
| 97 | + hits.append(hit) |
| 98 | + except KeyError: |
| 99 | + absent_identifiers.append(query) |
73 | 100 |
|
74 | 101 | similarity_matrix.close() |
75 | 102 |
|
76 | | - return pd.DataFrame(hits) |
| 103 | + if absent_identifiers: |
| 104 | + if len(hits) > 0: |
| 105 | + df = pd.DataFrame(hits, columns=['query_frag_id', 'hit_frag_id', 'score']) |
| 106 | + else: |
| 107 | + # empty hits array will give dataframe without columns |
| 108 | + df = pd.DataFrame({'query_frag_id': pd.Series(dtype=str), |
| 109 | + 'hit_frag_id': pd.Series(dtype=str), |
| 110 | + 'score': pd.Series(dtype=np.double) |
| 111 | + }, columns=['query_frag_id', 'hit_frag_id', 'score']) |
| 112 | + raise IncompleteHits(absent_identifiers, df) |
| 113 | + |
| 114 | + return pd.DataFrame(hits, columns=['query_frag_id', 'hit_frag_id', 'score']) |
77 | 115 |
|
78 | 116 |
|
79 | 117 | def fragments_by_pdb_codes(pdb_codes, fragments_db_filename_or_url, prefix=''): |
@@ -104,16 +142,32 @@ def fragments_by_pdb_codes(pdb_codes, fragments_db_filename_or_url, prefix=''): |
104 | 142 |
|
105 | 143 | Returns: |
106 | 144 | pandas.DataFrame: Data frame with fragment information |
| 145 | +
|
| 146 | + Raises: |
| 147 | + IncompleteFragments: When one or more of the identifiers could not be found. |
107 | 148 | """ |
108 | 149 | if fragments_db_filename_or_url.startswith('http'): |
109 | 150 | client = WebserviceClient(fragments_db_filename_or_url) |
110 | | - fragments = client.fragments_by_pdb_codes(pdb_codes) |
| 151 | + try: |
| 152 | + fragments = client.fragments_by_pdb_codes(pdb_codes) |
| 153 | + except IncompleteFragments as e: |
| 154 | + df = pd.DataFrame(e.fragments) |
| 155 | + df.rename(columns=lambda x: prefix + x, inplace=True) |
| 156 | + raise IncompleteFragments(e.absent_identifiers, df) |
111 | 157 | else: |
112 | 158 | fragmentsdb = FragmentsDb(fragments_db_filename_or_url) |
113 | 159 | fragments = [] |
| 160 | + absent_identifiers = [] |
114 | 161 | for pdb_code in pdb_codes: |
115 | | - for fragment in fragmentsdb.by_pdb_code(pdb_code): |
116 | | - fragments.append(fragment) |
| 162 | + try: |
| 163 | + for fragment in fragmentsdb.by_pdb_code(pdb_code): |
| 164 | + fragments.append(fragment) |
| 165 | + except LookupError as e: |
| 166 | + absent_identifiers.append(pdb_code) |
| 167 | + if absent_identifiers: |
| 168 | + df = pd.DataFrame(fragments) |
| 169 | + df.rename(columns=lambda x: prefix + x, inplace=True) |
| 170 | + raise IncompleteFragments(absent_identifiers, df) |
117 | 171 |
|
118 | 172 | df = pd.DataFrame(fragments) |
119 | 173 | df.rename(columns=lambda x: prefix + x, inplace=True) |
@@ -146,13 +200,31 @@ def fragments_by_id(fragment_ids, fragments_db_filename_or_url, prefix=''): |
146 | 200 |
|
147 | 201 | Returns: |
148 | 202 | pandas.DataFrame: Data frame with fragment information |
| 203 | +
|
| 204 | + Raises: |
| 205 | + IncompleteFragments: When one or more of the identifiers could not be found. |
149 | 206 | """ |
150 | 207 | if fragments_db_filename_or_url.startswith('http'): |
151 | 208 | client = WebserviceClient(fragments_db_filename_or_url) |
152 | | - fragments = client.fragments_by_id(fragment_ids) |
| 209 | + try: |
| 210 | + fragments = client.fragments_by_id(fragment_ids) |
| 211 | + except IncompleteFragments as e: |
| 212 | + df = pd.DataFrame(e.fragments) |
| 213 | + df.rename(columns=lambda x: prefix + x, inplace=True) |
| 214 | + raise IncompleteFragments(e.absent_identifiers, df) |
153 | 215 | else: |
154 | 216 | fragmentsdb = FragmentsDb(fragments_db_filename_or_url) |
155 | | - fragments = [fragmentsdb[frag_id] for frag_id in fragment_ids] |
| 217 | + fragments = [] |
| 218 | + absent_identifiers = [] |
| 219 | + for frag_id in fragment_ids: |
| 220 | + try: |
| 221 | + fragments.append(fragmentsdb[frag_id]) |
| 222 | + except KeyError: |
| 223 | + absent_identifiers.append(frag_id) |
| 224 | + if absent_identifiers: |
| 225 | + df = pd.DataFrame(fragments) |
| 226 | + df.rename(columns=lambda x: prefix + x, inplace=True) |
| 227 | + raise IncompleteFragments(absent_identifiers, df) |
156 | 228 |
|
157 | 229 | df = pd.DataFrame(fragments) |
158 | 230 | df.rename(columns=lambda x: prefix + x, inplace=True) |
|
0 commit comments