Skip to content

Commit 43a493d

Browse files
committed
Prompt user to delete old table files (issue #194)
1 parent 5962dd0 commit 43a493d

File tree

4 files changed

+68
-5
lines changed

4 files changed

+68
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ Also adds a new ALFPath class to replace alf path functions and now returns UUID
2727
- One.save_cache now updates any tables on disk unless clobber is True
2828
- one.util.patch_cache -> one.alf.cache.patch_tables
2929
- One object will save modified tables to disk upon delete
30+
- bugfix: ONE_HTTP_DL_THREADS environment variable now works as intended
3031

3132
### Added
3233

one/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
"""The Open Neurophysiology Environment (ONE) API."""
2-
__version__ = '3.0b4'
2+
__version__ = '3.0b5'

one/api.py

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,25 @@ def load_cache(self, tables_dir=None, **kwargs):
165165
if self.offline: # In online mode, the cache tables should be downloaded later
166166
warnings.warn(f'No cache tables found in {self._tables_dir}')
167167

168+
# If in remote mode and loading old tables generated on Alyx,
169+
# prompt the user to delete them to improve load times
170+
raw_meta = self._cache['_meta'].get('raw', {}).values() or [{}]
171+
tagged = any(filter(None, flatten(x.get('database_tags') for x in raw_meta)))
172+
origin = set(x['origin'] for x in raw_meta if 'origin' in x)
173+
older = (self._cache['_meta']['created_time'] or datetime.now()) < datetime(2025, 2, 13)
174+
remote = not self.offline and self.mode == 'remote'
175+
if remote and origin == {'alyx'} and older and not self._web_client.silent and not tagged:
176+
message = ('Old Alyx cache tables detected on disk. '
177+
'It\'s recomended to remove these tables as they '
178+
'negatively affect performance.\nDelete these tables? [Y/n]: ')
179+
if (input(message).casefold().strip() or 'y')[0] == 'y':
180+
self._remove_table_files()
181+
self._reset_cache()
182+
elif len(self._cache.datasets) > 1e6:
183+
warnings.warn(
184+
'Large cache tables affect performance. '
185+
'Consider removing them by calling the `_remove_table_files` method.')
186+
168187
return self._cache['_meta']['loaded_time']
169188

170189
def save_cache(self, save_dir=None, clobber=False):
@@ -1646,6 +1665,11 @@ def load_cache(self, tables_dir=None, clobber=False, tag=None):
16461665
tag : str
16471666
An optional Alyx dataset tag for loading cache tables containing a subset of datasets.
16481667
1668+
Returns
1669+
-------
1670+
datetime.datetime
1671+
A timestamp of when the cache was loaded.
1672+
16491673
Examples
16501674
--------
16511675
To load the cache tables for a given release tag
@@ -1669,6 +1693,8 @@ def load_cache(self, tables_dir=None, clobber=False, tag=None):
16691693
different_tag = any(x != tag for x in current_tags)
16701694
if not (clobber or different_tag):
16711695
super(OneAlyx, self).load_cache(tables_dir) # Load any present cache
1696+
cache_meta = self._cache.get('_meta', {})
1697+
raw_meta = cache_meta.get('raw', {}).values() or [{}]
16721698

16731699
try:
16741700
# Determine whether a newer cache is available
@@ -1679,15 +1705,15 @@ def load_cache(self, tables_dir=None, clobber=False, tag=None):
16791705
min_version = packaging.version.parse(cache_info.get('min_api_version', '0.0.0'))
16801706
if packaging.version.parse(one.__version__) < min_version:
16811707
warnings.warn(f'Newer cache tables require ONE version {min_version} or greater')
1682-
return
1708+
return cache_meta['loaded_time']
16831709

16841710
# Check whether remote cache more recent
16851711
remote_created = datetime.fromisoformat(cache_info['date_created'])
16861712
local_created = cache_meta.get('created_time', None)
16871713
fresh = local_created and (remote_created - local_created) < timedelta(minutes=1)
16881714
if fresh and not different_tag:
16891715
_logger.info('No newer cache available')
1690-
return
1716+
return cache_meta['loaded_time']
16911717

16921718
# Set the cache table directory location
16931719
if tables_dir: # If tables directory specified, use that
@@ -1711,7 +1737,7 @@ def load_cache(self, tables_dir=None, clobber=False, tag=None):
17111737
_logger.info('Downloading remote caches...')
17121738
files = self.alyx.download_cache_tables(cache_info.get('location'), self._tables_dir)
17131739
assert any(files)
1714-
super(OneAlyx, self).load_cache(self._tables_dir) # Reload cache after download
1740+
return super(OneAlyx, self).load_cache(self._tables_dir) # Reload cache after download
17151741
except (requests.exceptions.HTTPError, wc.HTTPError, requests.exceptions.SSLError) as ex:
17161742
_logger.debug(ex)
17171743
_logger.error(f'{type(ex).__name__}: Failed to load the remote cache file')
@@ -1728,6 +1754,7 @@ def load_cache(self, tables_dir=None, clobber=False, tag=None):
17281754
'Please provide valid tables_dir / cache_dir kwargs '
17291755
'or run ONE.setup to update the default directory.'
17301756
)
1757+
return cache_meta['loaded_time']
17311758

17321759
@property
17331760
def alyx(self):

one/tests/test_one.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,40 @@ def test_load_cache(self):
955955
with self.assertRaises(KeyError):
956956
self.one.load_cache(tdir)
957957

958+
# Test loading large Alyx tables
959+
raw = {'origin': 'alyx'}
960+
cache = Bunch({
961+
'datasets': EMPTY_DATASETS_FRAME.copy(),
962+
'sessions': EMPTY_SESSIONS_FRAME.copy(),
963+
'_meta': {
964+
'created_time': datetime.datetime(2025, 2, 1, 12, 0),
965+
'loaded_time': datetime.datetime.now(),
966+
'raw': {'datasets': raw, 'sessions': raw}}
967+
})
968+
with mock.patch('one.api.load_tables', return_value=cache), \
969+
mock.patch('builtins.input', return_value='yes'), \
970+
mock.patch.object(self.one, '_remove_table_files') as m:
971+
self.one.load_cache()
972+
m.assert_not_called()
973+
# Remote mode
974+
self.one.mode = 'remote'
975+
self.one._web_client = mock.MagicMock()
976+
self.one._web_client.silent = False
977+
with mock.patch('one.api.load_tables', return_value=cache), \
978+
mock.patch('builtins.input', return_value='yes'), \
979+
mock.patch.object(self.one, '_remove_table_files') as m:
980+
self.one.load_cache()
981+
m.assert_called_once()
982+
# Test large table warning
983+
cache.datasets = mock.MagicMock()
984+
cache.datasets.__len__.return_value = int(1.5e6)
985+
self.one._web_client = None
986+
self.one.mode = 'local'
987+
with mock.patch('one.api.load_tables', return_value=cache), \
988+
mock.patch('builtins.input', return_value='n'), \
989+
mock.patch.object(self.one, '_remove_table_files') as m:
990+
self.assertWarns(UserWarning, self.one.load_cache)
991+
958992
def test_save_cache(self):
959993
"""Test One.save_cache method."""
960994
self.one._cache['_meta'].pop('modified_time', None)
@@ -1311,7 +1345,8 @@ def test_load_cache(self):
13111345
mock.patch.object(self.one.alyx, 'get', return_value=cache_info), \
13121346
mock.patch('one.api.One.load_cache', side_effect=now), \
13131347
self.assertLogs('one.api', 'INFO') as lg:
1314-
self.assertIsNone(self.one.load_cache(tag='Q3-2020-TAG'))
1348+
expected = self.one._cache['_meta']['loaded_time']
1349+
self.assertEqual(expected, self.one.load_cache(tag='Q3-2020-TAG'))
13151350
cm.assert_not_called()
13161351
self.assertRegex(lg.output[-1], 'No newer cache available')
13171352

0 commit comments

Comments
 (0)