Skip to content

Commit 9a9bc89

Browse files
committed
add hashing to check if update is needed
1 parent bcfa8f9 commit 9a9bc89

File tree

4 files changed

+112
-57
lines changed

4 files changed

+112
-57
lines changed

pyhdx/support.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,26 @@
1313
from dask.distributed import Client
1414

1515

16+
def make_tuple(item):
17+
if isinstance(item, list):
18+
return tuple(make_tuple(i) for i in item)
19+
elif isinstance(item, dict):
20+
return tuple((key, make_tuple(value)) for key, value in item.items())
21+
else:
22+
return item
23+
24+
25+
def hash_dataframe(df):
26+
try:
27+
tup = (*pd.util.hash_pandas_object(df, index=True).values, *df.columns, *df.columns.names, df.index.name)
28+
29+
except TypeError:
30+
print(df)
31+
print('hoi')
32+
33+
return hash(tup)
34+
35+
1636
def multiindex_apply_function(
1737
index: pd.MultiIndex,
1838
level: int,

pyhdx/web/controllers.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -772,14 +772,17 @@ def _action_add_comparison(self):
772772

773773
combined = pd.concat([ddG, cov], axis=1)
774774

775+
#todo use _add_table method on source
775776
if current_df is not None:
776777
new_df = pd.concat([current_df, combined], axis=1)
777778
else:
778779
new_df = combined
779780

780-
self.parent.sources['main'].tables['ddG_comparison'] = new_df
781-
self.parent.sources['main'].param.trigger('tables') #todo check/remove tables trigger
782-
self.parent.sources['main'].updated = True
781+
#self.parent.sources['main'].tables['ddG_comparison'] = new_df
782+
self.src.add_table('ddG_comparison', new_df)
783+
784+
#self.parent.sources['main'].param.trigger('tables') #todo check/remove tables trigger
785+
self.src.updated = True
783786

784787

785788
class ColorTransformControl(PyHDXControlPanel):

pyhdx/web/sources.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pyhdx import TorchFitResult
88
from pyhdx.fitting import RatesFitResult
99
from pyhdx.models import HDXMeasurement, HDXMeasurementSet
10-
from pyhdx.support import multiindex_astype, multiindex_set_categories
10+
from pyhdx.support import multiindex_astype, multiindex_set_categories, hash_dataframe
1111

1212

1313
class Source(param.Parameterized):
@@ -39,6 +39,13 @@ def get(self):
3939
else:
4040
raise ValueError("TableSource has multiple tables, use `get_table`")
4141

42+
def add_table(self, table: str, df: pd.DataFrame):
43+
table_hash = hash_dataframe(df)
44+
self.hashes[table] = table_hash
45+
self.tables[table] = df
46+
47+
#todo self.updated = True?
48+
4249
def get_table(self, table):
4350
df = self.tables.get(table, None)
4451

@@ -199,7 +206,8 @@ def _add_table(self, df, table, categorical=True):
199206
if categorical:
200207
new.columns = multiindex_astype(new.columns, 0, 'category')
201208
new.columns = multiindex_set_categories(new.columns, 0, categories, ordered=True)
202-
self.tables[table] = new
209+
210+
self.add_table(table, new)
203211

204212

205213
class PDBSource(Source):
@@ -208,6 +216,8 @@ class PDBSource(Source):
208216

209217
pdb_files = param.Dict({}, doc='Dictionary with id: pdb_string pdb file entries')
210218

219+
hashes = param.Dict({})
220+
211221
max_entries = param.Number(
212222
1,
213223
doc='set maximum size for pdb files. set to none for infinite size. set to one for single pdb mode')
@@ -219,11 +229,13 @@ def add_from_pdb(self, pdb_id):
219229
pdb_string = response.read().decode()
220230

221231
self.pdb_files[pdb_id] = pdb_string
232+
self.hashes[pdb_id] = hash(pdb_string)
222233
self.updated = True
223234

224235
def add_from_string(self, pdb_string, pdb_id):
225236
self._make_room()
226237
self.pdb_files[pdb_id] = pdb_string
238+
self.hashes[pdb_id] = hash(pdb_string)
227239
self.updated = True
228240

229241
def _make_room(self):
@@ -233,9 +245,10 @@ def _make_room(self):
233245
elif len(self.pdb_files) == self.max_entries:
234246
key = next(iter(self.pdb_files))
235247
del self.pdb_files[key]
248+
del self.hashes[key]
236249

237250
def get(self):
238-
"""returns the first entry in the """
251+
"""returns the first entry in the pdb source"""
239252
return next(iter(self.pdb_files.values()))
240253

241254
def get_pdb(self, pdb_id):

pyhdx/web/transforms.py

Lines changed: 70 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,33 @@
11
import itertools
2+
import warnings
23

34
import numpy as np
45
import pandas as pd
56
import panel as pn
67
import param
78
from param.parameterized import default_label_formatter
89

9-
from pyhdx.support import autowrap
10+
from pyhdx.support import autowrap, make_tuple
1011
from pyhdx.web.sources import Source
12+
from pyhdx.web.cache import Cache
1113

1214

1315
class Transform(param.Parameterized):
14-
"""these transforms get the data from source"""
16+
"""Gets data and applies transform"""
1517

1618
_type = 'base'
1719

1820
widgets = param.Dict(default={})
1921

22+
source = param.ClassSelector(class_=Source)
23+
2024
updated = param.Event()
2125

2226
redrawn = param.Event(doc="event gets triggered when widgets are changed and the controller needs to redraw them")
2327

24-
cache = param.ClassSelector(class_=Cache)
28+
_hash = param.Integer(doc='Hash of current transform state')
29+
30+
_cache = param.ClassSelector(default=None, class_=Cache)
2531

2632
def __init__(self, **params):
2733
super().__init__(**params)
@@ -30,14 +36,40 @@ def get(self):
3036
"""method called to get the dataframe"""
3137
return None
3238

39+
# perhaps htey should all be private to prevent namespace collision with filter options
40+
@property
41+
def source_hash(self):
42+
return self.source.hash
43+
44+
@property
45+
def hash_key(self):
46+
"""hashable key describing the transform"""
47+
return tuple((item, make_tuple(val)) for item, val in self.param.get_param_values() if not item.startswith('_'))
48+
49+
@property
50+
def hash(self):
51+
tup = (*self.hash_key, self.source_hash)
52+
53+
return hash(tup)
54+
55+
def update_hash(self):
56+
if self.hash == self._hash:
57+
return False
58+
else:
59+
self._hash = self.hash
60+
return True
61+
62+
def update(self):
63+
if self.update_hash():
64+
self._update_options()
65+
self.updated = True
66+
3367

3468
class TableSourceTransform(Transform):
3569
"""transform which picks the correct table from the source"""
3670

3771
_type = 'table_source'
3872

39-
source = param.ClassSelector(class_=Source)
40-
4173
table = param.Selector(default=None, doc="""
4274
The table being transformed. """)
4375

@@ -56,14 +88,8 @@ def get(self):
5688
return df
5789

5890
@property
59-
def hash(self):
60-
# or sources can have multiple hashes?
61-
# / objects can have multiple hashes?
62-
return tuple([self._type, self.table, self.source.hashes[self.table]])
63-
64-
@param.depends('table', watch=True)
65-
def _table_updated(self):
66-
self.updated = True
91+
def source_hash(self):
92+
return self.source.hashes.get(self.table, hash(None))
6793

6894
def _update_options(self):
6995
options = self.source.get_tables()
@@ -73,10 +99,11 @@ def _update_options(self):
7399
if not self.table and options:
74100
self.table = options[0]
75101

76-
@param.depends('source.updated', watch=True)
102+
@param.depends('source.updated', 'table', watch=True)
77103
def update(self):
78104
self._update_options()
79-
self.updated = True
105+
if self.update_hash():
106+
self.updated = True
80107

81108

82109
class AppTransform(Transform):
@@ -116,43 +143,37 @@ class CrossSectionTransform(AppTransform):
116143
empty_select = param.Boolean(default=False, doc="""
117144
Add an option to Select widgets to indicate select all on this level.""")
118145

119-
# stepwise = param.Boolean(
120-
# default=False,
121-
# doc='Apply xs stepwise (one call per level)'
122-
# )
123-
124146
def __init__(self, **params):
125147
super().__init__(**params)
126148
self.index = None # index is the df index which determines the selector's options
127149
self.update()
128150

129151
@param.depends('source.updated', watch=True)
130152
def update(self):
131-
#todo only redraw if only options are changed or always?
132-
#todo remove watchers when new transforms are created?
133-
134-
135-
old_index = self.index
136-
df = self.source.get()
137-
138-
if df is None:
139-
return
140-
self.index = df.columns if self.axis else df.index
141-
self._names = self.names or self.index.names
142-
143-
if old_index is not None and self.index.nlevels == old_index.nlevels:
144-
# no redraw needed, only update selectors options
145-
options = list(self.index.unique(level=0))
146-
self.selectors[0].options = options
147-
self.selectors[0].param.trigger('value')
148-
for name, selector in zip(self._names, self.selectors):
149-
selector.name = name # todo requires testing if the names are really updated or not (they arent)
150-
selector.label = name # todo requires testing if the names are really updated or not
151-
self.redrawn = True
152-
else:
153-
self.redraw()
153+
if self.update_hash():
154+
#todo remove watchers when new transforms are created?
155+
156+
old_index = self.index
157+
df = self.source.get()
158+
159+
if df is None:
160+
return
161+
self.index = df.columns if self.axis else df.index
162+
self._names = self.names or self.index.names
163+
164+
if old_index is not None and self.index.nlevels == old_index.nlevels:
165+
# no redraw needed, only update selectors options
166+
options = list(self.index.unique(level=0))
167+
self.selectors[0].options = options
168+
self.selectors[0].param.trigger('value')
169+
for name, selector in zip(self._names, self.selectors):
170+
selector.name = name # todo requires testing if the names are really updated or not (they arent)
171+
selector.label = name # todo requires testing if the names are really updated or not
172+
self.redrawn = True
173+
else:
174+
self.redraw()
154175

155-
self.updated = True
176+
self.updated = True
156177

157178
def redraw(self):
158179
# create new widgets
@@ -163,6 +184,7 @@ def redraw(self):
163184

164185
self.widgets = {name: pn.widgets.Select(name=default_label_formatter(name)) for name in self._names[:n_levels]}
165186

187+
#todo perhaps do self.param.add_parameter?
166188
self.selectors = list(self.widgets.values())
167189
for selector in self.selectors:
168190
selector.param.watch(self._selector_changed, ['value'], onlychanged=True)
@@ -237,6 +259,7 @@ class ApplyCmapOptTransform(AppTransform):
237259
#def check_args(... ) #todo method for constructor to see if the supplied kwargs are correct for this object
238260

239261
def __init__(self, opts, **params): #opts: list of opts objects
262+
warnings.warn('ApplyCmapOptTransform does not implement hashing', NotImplementedError)
240263
self._opts_dict = {o.name: o for o in opts}
241264
opts = list(self._opts_dict.keys())
242265
params['opts'] = opts
@@ -349,6 +372,8 @@ class GenericTransform(AppTransform):
349372

350373
pd_function = param.String()
351374

375+
kwargs = param.Dict(doc='dict of additional kwargs')
376+
352377
def __init__(self, **params):
353378
self.kwargs = {k: v for k, v in params.items() if k not in self.param}
354379
super().__init__(**{k: v for k, v in params.items() if k in self.param})
@@ -467,7 +492,7 @@ def __init__(self, **params):
467492

468493

469494
class SampleTransform(AppTransform):
470-
"""subsamples dataframe along """
495+
"""subsamples dataframe along specified axis"""
471496

472497
_type = 'sample'
473498

@@ -513,12 +538,6 @@ def get(self):
513538
return df
514539

515540

516-
class TransformTransform(AppTransform):
517-
pd_function = param.String('transform')
518-
def __init__(self, **params):
519-
raise NotImplementedError()
520-
521-
522541
class PipeTransform(AppTransform):
523542
"""applies a list of pandas functions
524543

0 commit comments

Comments
 (0)