Skip to content

Commit b3a502d

Browse files
committed
added caching
1 parent 9a9bc89 commit b3a502d

File tree

5 files changed

+135
-35
lines changed

5 files changed

+135
-35
lines changed

pyhdx/web/apps.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
from pathlib import Path
32

43
import panel as pn
@@ -7,14 +6,18 @@
76
from pyhdx import VERSION_STRING
87
from pyhdx.web.constructor import AppConstructor
98
from pyhdx.web.log import logger
9+
from pyhdx.web.cache import MemoryCache, HybridHDFCache
10+
11+
cache = MemoryCache(max_items=2000)
1012

13+
#cache = HybridHDFCache(file_path ='test123.h5')
1114

1215
@logger('pyhdx')
1316
def main_app():
1417
cwd = Path(__file__).parent.resolve()
1518
yaml_dict = yaml.safe_load((cwd / 'pyhdx_app.yaml').read_text(encoding='utf-8'))
1619

17-
ctr = AppConstructor(loggers={'pyhdx': main_app.logger})
20+
ctr = AppConstructor(loggers={'pyhdx': main_app.logger}, cache=cache)
1821

1922
ctrl = ctr.parse(yaml_dict)
2023

pyhdx/web/cache.py

Lines changed: 82 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,95 @@
11
import param
2+
import pandas as pd
23

34

45
class Cache(param.Parameterized):
5-
pass
66

7+
def __getitem__(self, item):
8+
return None
9+
10+
def __setitem__(self, key, value):
11+
pass
12+
13+
def __contains__(self, item):
14+
return False
715

8-
class MemoryCache(param.Parameterized):
16+
17+
class MemoryCache(Cache):
918

1019
_cache = param.Dict(default={})
1120

21+
max_items = param.Integer(
22+
None,
23+
doc='Maximum number of items allowed in the cache'
24+
)
25+
1226
def __getitem__(self, item):
13-
return self._cache.get(item)
27+
return self._cache.__getitem__(item)
1428

1529
def __setitem__(self, key, value):
30+
if self.max_items is not None and len(self._cache) >= self.max_items:
31+
self._cache.popitem()
32+
1633
self._cache[key] = value
34+
35+
def __contains__(self, item):
36+
return item in self._cache
37+
38+
39+
class HybridHDFCache(Cache):
40+
"""
41+
42+
Hybrid HDFStore / Memory cache
43+
44+
Sometimes there are errors depending on the dtypes of dataframes stored
45+
46+
"""
47+
file_path = param.String()
48+
49+
_store = param.ClassSelector(class_=pd.HDFStore)
50+
51+
_cache = param.Dict(default={})
52+
53+
bytes_threshold = param.Integer(default=int(1e8))
54+
55+
def __init__(self, **params):
56+
super().__init__(**params)
57+
if self.file_path is not None:
58+
self._store = pd.HDFStore(self.file_path)
59+
60+
def __getitem__(self, item):
61+
key = str(item)
62+
try:
63+
return self._cache.__getitem__(key)
64+
except KeyError:
65+
return self._store.__getitem__(key)
66+
67+
def _store_put(self, key, value):
68+
try:
69+
self._store[key] = value
70+
71+
# Check if reading back the dataframe works
72+
try:
73+
_value = self._store[key]
74+
except AttributeError:
75+
del self._store[key]
76+
self._cache[key] = value
77+
78+
except (NotImplementedError, TypeError): # pytables does not support categorical dtypes
79+
self._cache[key] = value
80+
81+
def __setitem__(self, key, value):
82+
key = str(key)
83+
if isinstance(value, pd.DataFrame) and value.memory_usage().sum() > self.bytes_threshold:
84+
self._store_put(key, value)
85+
elif isinstance(value, pd.Series) and value.memory_usage() > self.bytes_threshold:
86+
self._store_put(key, value)
87+
else:
88+
self._cache[str(key)] = value
89+
90+
def __contains__(self, item):
91+
return str(item) in self._cache.keys() | self._store.keys()
92+
93+
# todo with statement for creating caches?
94+
# def __exit__(self):
95+
# pass

pyhdx/web/constructor.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,11 @@
1111
from pyhdx.web.tools import supported_tools
1212
from pyhdx.web.transforms import *
1313
from pyhdx.web.views import View
14+
from pyhdx.web.cache import Cache
1415

1516
element_count = 0
1617

18+
1719
class AppConstructor(param.Parameterized):
1820

1921
sources = param.Dict(default={})
@@ -32,6 +34,8 @@ class AppConstructor(param.Parameterized):
3234

3335
client = param.ClassSelector(default=None, class_=Client)
3436

37+
cache = param.ClassSelector(default=Cache(), class_=Cache)
38+
3539
def __init__(self, **params):
3640
super().__init__(**params)
3741
self.classes = self.find_classes()
@@ -105,12 +109,21 @@ def _parse_sections(self, yaml_dict):
105109
obj = self.create_element(name, element, **spec)
106110
element_dict[name] = obj
107111

108-
def create_element(self, name, element, **spec):
112+
def create_element(self, name: str, element: str, **spec):
113+
"""
114+
115+
:param name:
116+
:param element: eiter source, filter, opt, view, tool
117+
:param spec:
118+
:return:
119+
"""
109120
global element_count
110121

111122
_type = spec.pop('type')
112123
kwargs = self._resolve_kwargs(**spec)
113124
class_ = self._resolve_class(_type, element)
125+
if element == 'transform':
126+
kwargs['_cache'] = self.cache
114127
obj = class_(name=name, **kwargs)
115128
element_count += 1
116129

pyhdx/web/controllers.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -76,12 +76,17 @@ def _action_debug(self):
7676
print('break')
7777

7878
def _action_test(self):
79-
trs = self.transforms['table_1_select']
80-
print(trs.widgets)
79+
trs = self.transforms['peptide_select']
80+
cache = trs._cache
81+
print(cache._cache.keys())
82+
print(cache)
83+
print(cache._store.keys())
84+
85+
for item in cache._store.keys():
86+
print(item)
87+
print(cache[item])
88+
8189

82-
view = self.views['graph_1']
83-
df = view.get_data()
84-
print(df)
8590

8691
@property
8792
def _layout(self):

pyhdx/web/transforms.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,15 +27,11 @@ class Transform(param.Parameterized):
2727

2828
_hash = param.Integer(doc='Hash of current transform state')
2929

30-
_cache = param.ClassSelector(default=None, class_=Cache)
30+
_cache = param.ClassSelector(default=Cache(), class_=Cache)
3131

3232
def __init__(self, **params):
3333
super().__init__(**params)
3434

35-
def get(self):
36-
"""method called to get the dataframe"""
37-
return None
38-
3935
# perhaps htey should all be private to prevent namespace collision with filter options
4036
@property
4137
def source_hash(self):
@@ -113,9 +109,18 @@ class AppTransform(Transform):
113109

114110
source = param.ClassSelector(class_=Transform)
115111

112+
def transform(self):
113+
"""get source data, apply transform, return result"""
114+
return self.source.get()
115+
116116
def get(self):
117-
df = self.source.get()
118-
return df
117+
"""method called to get the dataframe"""
118+
if self.hash in self._cache:
119+
return self._cache[self.hash]
120+
else:
121+
data = self.transform()
122+
self._cache[self.hash] = data
123+
return data
119124

120125
@param.depends('source.updated', watch=True)
121126
def update(self):
@@ -197,20 +202,15 @@ def redraw(self):
197202

198203
self.redrawn = True
199204

200-
#todo cache df?
201-
def get(self):
205+
def transform(self):
202206
df = self.source.get()
203207
if df is None:
204208
return df
205-
else:
206-
kwargs = self.pd_kwargs
207-
# drop level bugged? https://github.com/pandas-dev/pandas/issues/6507
208-
df = df.xs(**kwargs)
209-
# if self.drop_level and self.axis == 1 and df.columns.nlevels > 1:
210-
# df.columns = df.columns.droplevel()
211-
# elif self.drop_level and self.axis == 0:
212-
# df.index = df.index.droplevel()
213-
return df
209+
210+
kwargs = self.pd_kwargs
211+
# drop level bugged? https://github.com/pandas-dev/pandas/issues/6507
212+
df = df.xs(**kwargs)
213+
return df
214214

215215
def _selector_changed(self, *events):
216216
#this sends multiple updated events as it triggers changes in other selectors
@@ -332,7 +332,7 @@ class RectangleLayoutTransform(AppTransform):
332332
step = param.Integer(5, bounds=(1, None), doc="Step size used for finding 'wrap' when its not specified")
333333
margin = param.Integer(4, doc="Margin space to keep between peptides when finding 'wrap'")
334334

335-
def get(self):
335+
def transform(self):
336336
df = self.source.get()
337337
if df is None:
338338
return None
@@ -378,7 +378,7 @@ def __init__(self, **params):
378378
self.kwargs = {k: v for k, v in params.items() if k not in self.param}
379379
super().__init__(**{k: v for k, v in params.items() if k in self.param})
380380

381-
def get(self):
381+
def transform(self):
382382
df = self.source.get()
383383
if df is None:
384384
return df
@@ -408,7 +408,7 @@ def __init__(self, **params):
408408
self.kwargs = {k: v for k, v in params.items() if k not in self.param}
409409
super().__init__(**params)
410410

411-
def get(self):
411+
def transform(self):
412412
df = self.source.get()
413413
if df is None:
414414
return None
@@ -440,7 +440,7 @@ class RescaleTransform(GenericTransform):
440440

441441
scale_factor = param.Number(1.)
442442

443-
def get(self): # todo perhaps some kind of decorator that returns nonealwasy?
443+
def transform(self): # todo perhaps some kind of decorator that returns nonealwasy?
444444
df = self.source.get()
445445
if df is None:
446446
return None
@@ -506,7 +506,7 @@ class SampleTransform(AppTransform):
506506

507507
axis = param.Number(0, inclusive_bounds=(0, 1))
508508

509-
def get(self):
509+
def transform(self):
510510
df = self.source.get()
511511
if df is None:
512512
return None
@@ -549,7 +549,7 @@ class PipeTransform(AppTransform):
549549

550550
pipe = param.List() # list of dicts
551551

552-
def get(self):
552+
def transform(self):
553553
df = self.source.get()
554554
if df is None:
555555
return None

0 commit comments

Comments
 (0)