Skip to content

Commit 96cd33c

Browse files
authored
Add new settings to control the package payload caching disk usage (#2023)
Signed-off-by: Ibrahim Sani <[email protected]>
1 parent e56d14d commit 96cd33c

File tree

6 files changed

+246
-1
lines changed

6 files changed

+246
-1
lines changed

docs/source/caching.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,17 @@ you should **not** enable caching on package repositories where packages may get
180180
overwritten. It is for this reason that caching is disabled for local packages by
181181
default (see :data:`package_cache_local`).
182182

183+
Control Disk Usage
184+
------------------
185+
186+
You can control the disk usage of the package cache by using the
187+
:data:`package_cache_space_buffer` and :data:`package_cache_used_threshold` settings.
188+
Both have a default value already set to a safe value. You are encouraged to tweak
189+
them to your liking.
190+
191+
Another way to control the disk usage is to run the :option:`rez-pkg-cache --clean` command
192+
either manually or as a cron job. See :ref:`caching-cleaning-the-cache`.
193+
183194
Commandline Tool
184195
----------------
185196

@@ -232,6 +243,8 @@ Caching operations are stored into logfiles within the cache directory. To view:
232243
rez-pkg-cache 2020-05-23 16:17:45,404 PID-29827 INFO Started caching of variant /home/ajohns/packages/python/3.7.4/package.py[0]...
233244
rez-pkg-cache 2020-05-23 16:17:46,006 PID-29827 INFO Cached variant to /home/ajohns/package_cache/python/3.7.4/ce1c/a in 0.602037 seconds
234245
246+
.. _caching-cleaning-the-cache:
247+
235248
Cleaning The Cache
236249
++++++++++++++++++
237250

src/rez/cli/pkg-cache.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,8 @@ def setup_parser(parser, completions=False):
6767

6868

6969
def add_variant(pkgcache, uri, opts):
70+
import shutil
71+
7072
from rez.config import config
7173
from rez.packages import get_variant_from_uri
7274
from rez.utils.logging_ import print_info, print_warning
@@ -95,6 +97,14 @@ def add_variant(pkgcache, uri, opts):
9597
print_info("Already exists: %s", destpath)
9698
elif status == PackageCache.VARIANT_COPYING:
9799
print_warning("Another process is currently copying to: %s", destpath)
100+
elif status == PackageCache.VARIANT_SKIPPED:
101+
free = shutil.disk_usage(config.cache_packages_path).free / 1024**2
102+
buffer = config.package_cache_space_buffer / 1024**2
103+
print_warning(
104+
"Cache no longer accepting new variant due to size limit.\n"
105+
f"Remaining cache free space: {free:.2f}MB is near "
106+
f"configured buffer {buffer:.2f}MB (config.package_cache_space_buffer)."
107+
)
98108
else:
99109
print_info("Successfully cached to: %s", destpath)
100110

src/rez/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -449,6 +449,8 @@ def _parse_env_var(self, value):
449449
"shell_error_truncate_cap": Int,
450450
"package_cache_log_days": Int,
451451
"package_cache_max_variant_days": Int,
452+
"package_cache_space_buffer": Int,
453+
"package_cache_used_threshold": Int,
452454
"package_cache_clean_limit": Float,
453455
"allow_unversioned_packages": Bool,
454456
"package_cache_during_build": Bool,

src/rez/package_cache.py

Lines changed: 97 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ class PackageCache(object):
7070
VARIANT_COPY_STALLED = 4 #: Variant payload copy has stalled
7171
VARIANT_PENDING = 5 #: Variant is pending caching
7272
VARIANT_REMOVED = 6 #: Variant was deleted
73+
VARIANT_SKIPPED = 7 # Variant was not cached due to cache size limit
7374

7475
STATUS_DESCRIPTIONS = {
7576
VARIANT_NOT_FOUND: "was not found",
@@ -81,6 +82,7 @@ class PackageCache(object):
8182
"for more information.",
8283
VARIANT_PENDING: "is pending caching",
8384
VARIANT_REMOVED: "was deleted",
85+
VARIANT_SKIPPED: "is not being cached due to cache size limit"
8486
}
8587

8688
_FILELOCK_TIMEOUT = 10
@@ -130,6 +132,80 @@ def get_cached_root(self, variant):
130132

131133
return rootpath
132134

135+
def get_variant_size(self, free, variant_root):
136+
"""Get the size of the variant root.
137+
138+
Args:
139+
free: The available free cache space.
140+
variant_root: The rez resolved variant root.
141+
142+
Returns:
143+
int: The size in bytes of the variant root (may exceed buffer slightly).
144+
"""
145+
variant_size = 0
146+
seen_inodes = set()
147+
stack = [variant_root]
148+
149+
while stack:
150+
current = stack.pop()
151+
try:
152+
with os.scandir(current) as ref:
153+
for entry in ref:
154+
try:
155+
# Since we are following symlinks, track visited inodes to make sure
156+
# we are not double counting files.
157+
st = entry.stat(follow_symlinks=True)
158+
inode = (st.st_dev, st.st_ino)
159+
if inode in seen_inodes:
160+
continue
161+
seen_inodes.add(inode)
162+
163+
if stat.S_ISREG(st.st_mode):
164+
variant_size += st.st_size
165+
# Bail out early if variant size will overtake the buffer
166+
# set by config.package_cache_space_buffer.
167+
if (free - variant_size) < config.package_cache_space_buffer:
168+
return variant_size
169+
elif stat.S_ISDIR(st.st_mode):
170+
stack.append(entry.path)
171+
except OSError:
172+
continue
173+
except OSError:
174+
continue
175+
176+
return variant_size
177+
178+
def cache_near_full(self):
179+
""" Get the cache available space
180+
181+
Returns:
182+
bool: True if available cache space is below buffer, otherwise False.
183+
"""
184+
_, _, free = shutil.disk_usage(self.path)
185+
return free < config.package_cache_space_buffer
186+
187+
def variant_meets_space_requirements(self, rez_variant_root):
188+
"""Check if the cache usage is above config.package_cache_used_threshold.
189+
If it is, start throttling the cache by checking each variants size to make sure
190+
it's not going to take the cache size below the minimum buffer we set.
191+
192+
Args:
193+
variant_root: The rez resolved variant root.
194+
195+
Returns:
196+
bool:
197+
- True if the cache space used is below config.package_cache_used_threshold.
198+
- False if (free - variant_size) < config.package_cache_space_buffer, otherwise False.
199+
"""
200+
total, used, free = shutil.disk_usage(self.path)
201+
used_percentage = (used / total) * 100 if total else 0.0
202+
203+
if used_percentage > config.package_cache_used_threshold:
204+
variant_size = self.get_variant_size(free, rez_variant_root)
205+
return (free - variant_size) > config.package_cache_space_buffer
206+
207+
return True
208+
133209
def add_variant(self, variant, force=False, wait_for_copying=False, logger=None):
134210
"""Copy a variant's payload into the cache.
135211
@@ -168,7 +244,7 @@ def add_variant(self, variant, force=False, wait_for_copying=False, logger=None)
168244
Returns:
169245
tuple: 2-tuple:
170246
- str: Path to cached payload
171-
- int: One of VARIANT_FOUND, VARIANT_CREATED, VARIANT_COPYING, VARIANT_COPY_STALLED
247+
- int: One of VARIANT_FOUND, VARIANT_CREATED, VARIANT_COPYING, VARIANT_COPY_STALLED, VARIANT_SKIPPED
172248
"""
173249
from rez.utils.base26 import get_next_base26
174250

@@ -259,6 +335,11 @@ def add_variant(self, variant, force=False, wait_for_copying=False, logger=None)
259335
)
260336
return (rootpath, status)
261337

338+
# Block adding new variant to cache from rez-pkg-cache --add-variants
339+
# if the cache size is almost full.
340+
if self.cache_near_full() or not self.variant_meets_space_requirements(variant_root):
341+
return (rootpath, self.VARIANT_SKIPPED)
342+
262343
# 1.
263344
path = self._get_hash_path(variant)
264345
os.makedirs(path, exist_ok=True)
@@ -767,6 +848,14 @@ def _lock(self):
767848
def _run_caching_step(self, state, wait_for_copying=False):
768849
logger = state["logger"]
769850

851+
# Keep the cache daemon alive until the cache size reaches its min threshold.
852+
if self.cache_near_full():
853+
logger.info(
854+
"Cache storage has reached the configured threshold of "
855+
f"{config.package_cache_space_buffer / 1024**2:.2f}MB, caching will now stop."
856+
)
857+
return False
858+
770859
# pick a random pending variant to copy
771860
pending_filenames = set(os.listdir(self._pending_dir))
772861
if not wait_for_copying:
@@ -787,6 +876,13 @@ def _run_caching_step(self, state, wait_for_copying=False):
787876
raise
788877

789878
variant = get_variant(variant_handle_dict)
879+
variant_root = getattr(variant, "root")
880+
881+
if not self.variant_meets_space_requirements(variant_root):
882+
# variant cannot be cached due to its size, so remove as a pending variant.
883+
logger.info(f"Variant {variant_root} is too big to be cached due to remaining cache space.")
884+
safe_remove(filepath)
885+
return True
790886

791887
# copy the variant and log activity
792888
logger.info("Started caching of variant %s...", variant.uri)

src/rez/rezconfig.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,29 @@
302302
# Logs are written to :file:`{pkg-cache-root}/.sys/log/{filename}.log`
303303
package_cache_log_days = 7
304304

305+
# Define a default minimum of 100MB of free space buffer for the cache in bytes.
306+
# This is required to avoid writing to a full cache and for cleaning the cache
307+
# when running :option:`rez-pkg-cache --clean`.
308+
# Note: Reported disk usage may vary across different file systems due to differences
309+
# in block size, allocation strategies and metadata overhead.
310+
# 100MB = 100 * 1024 * 1024 = 104857600.
311+
#
312+
# .. note::
313+
# Reported disk usage may vary across different file systems due to differences
314+
# in block size, allocation strategies and metadata overhead.
315+
package_cache_space_buffer = 104857600
316+
317+
# The last variant being cached can take the cache size below the minimum buffer threshold we set.
318+
# To guard against this, we define a maximum cache usage threshold of 80%. We start throttling the cache
319+
# at this point by checking the size of each variant against the :data:`package_cache_space_buffer`.
320+
# If the pending variant about to be cached will take the cache size below the :data:`package_cache_space_buffer`,
321+
# don't cache it. When setting this value, subtract from your total disk space the fraction of disk space that
322+
# will be consumed by the largest variant you support and add the :data:`package_cache_space_buffer`.
323+
#
324+
# .. note::
325+
# Reported disk usage may vary across different file systems due to differences
326+
# in block size, allocation strategies and metadata overhead.
327+
package_cache_used_threshold = 80
305328

306329
###############################################################################
307330
# Package Resolution

src/rez/tests/test_package_cache.py

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import os.path
1111
import time
1212
import subprocess
13+
import tempfile
14+
from unittest.mock import patch
1315

1416
from rez.tests.util import TestBase, TempdirMixin, restore_os_environ, \
1517
install_dependent
@@ -19,6 +21,9 @@
1921
from rez.exceptions import PackageCacheError
2022
from rez.utils.filesystem import canonical_path
2123

24+
# Simulated total disk size (1 GiB) for disk space tests.
25+
VIRTUAL_GIGABYTE = 1024 * 1024 * 1024
26+
2227

2328
class TestPackageCache(TestBase, TempdirMixin):
2429
@classmethod
@@ -239,3 +244,99 @@ def test_caching_on_resolve_synchronous(self):
239244
"Reference %r should resolve to %s, but resolves to %s"
240245
% (ref, cached_root, root)
241246
)
247+
248+
@patch('rez.package_cache.shutil.disk_usage')
249+
def test_cache_near_full_true(self, mock_du):
250+
"""cache_near_full returns True when free < buffer."""
251+
mock_du.return_value = (VIRTUAL_GIGABYTE, 500_000_000, 10_000_000) # 10MB free
252+
pkgcache = self._pkgcache()
253+
self.update_settings({'package_cache_space_buffer': 50_000_000}) # 50MB buffer
254+
self.assertTrue(pkgcache.cache_near_full())
255+
256+
@patch('rez.package_cache.shutil.disk_usage')
257+
def test_cache_near_full_false(self, mock_du):
258+
"""cache_near_full returns False when free >= buffer."""
259+
mock_du.return_value = (VIRTUAL_GIGABYTE, 400_000_000, 300_000_000) # 300MB free
260+
pkgcache = self._pkgcache()
261+
self.update_settings({'package_cache_space_buffer': 50_000_000})
262+
self.assertFalse(pkgcache.cache_near_full())
263+
264+
@patch('rez.package_cache.shutil.disk_usage')
265+
def test_variant_meets_space_requirements_below_threshold(self, mock_du):
266+
"""Below used threshold always True."""
267+
mock_du.return_value = (VIRTUAL_GIGABYTE, 300_000_000, 700_000_000) # 30% used
268+
pkgcache = self._pkgcache()
269+
self.update_settings({'package_cache_used_threshold': 80})
270+
with tempfile.TemporaryDirectory() as d:
271+
self.assertTrue(pkgcache.variant_meets_space_requirements(d))
272+
273+
@patch('rez.package_cache.shutil.disk_usage')
274+
def test_variant_meets_space_requirements_above_threshold_sufficient(self, mock_du):
275+
"""Above threshold but variant fits inside buffer => True."""
276+
mock_du.return_value = (VIRTUAL_GIGABYTE, 900_000_000, 150_000_000) # ~83.8% used (>80%), 150MB free
277+
pkgcache = self._pkgcache()
278+
with tempfile.TemporaryDirectory() as d:
279+
# create small file (1MB)
280+
with open(os.path.join(d, 'f.bin'), 'wb') as f:
281+
f.write(b'0' * 1_000_000)
282+
self.update_settings(
283+
{
284+
'package_cache_used_threshold': 80,
285+
'package_cache_space_buffer': 100_000_000,
286+
}
287+
)
288+
self.assertTrue(pkgcache.variant_meets_space_requirements(d)) # 150MB - 1MB > 100MB
289+
290+
@patch('rez.package_cache.shutil.disk_usage')
291+
def test_variant_meets_space_requirements_above_threshold_insufficient(self, mock_du):
292+
"""
293+
Above threshold and variant would breach buffer => False.
294+
295+
Uses a mocked get_variant_size to avoid filesystem/storage variations
296+
that caused the real size to appear smaller on some platforms (making
297+
the predicate unexpectedly True). The logic we need to exercise is:
298+
used% > threshold AND (free - variant_size) <= buffer -> False
299+
"""
300+
mock_du.return_value = (VIRTUAL_GIGABYTE, 900_000_000, 150_000_000) # ~83.8% used (>80%), 150MB free
301+
pkgcache = self._pkgcache()
302+
with tempfile.TemporaryDirectory() as d:
303+
self.update_settings(
304+
{
305+
'package_cache_used_threshold': 80,
306+
'package_cache_space_buffer': 100_000_000,
307+
}
308+
)
309+
with patch.object(pkgcache, 'get_variant_size', return_value=70_000_000): # 70MB
310+
# free (150MB) - variant_size (70MB) = 80MB <= buffer (100MB) => False
311+
self.assertFalse(pkgcache.variant_meets_space_requirements(d)) # expected False
312+
313+
def test_variant_meets_space_requirements_invalid_path(self):
314+
"""Invalid path returns True (early exit) per implementation."""
315+
pkgcache = self._pkgcache()
316+
self.assertTrue(pkgcache.variant_meets_space_requirements(None))
317+
self.assertTrue(pkgcache.variant_meets_space_requirements(""))
318+
self.assertTrue(
319+
pkgcache.variant_meets_space_requirements(
320+
os.path.join(self.root, "path", "does", "not", "exist")
321+
)
322+
)
323+
324+
def test_add_variant_skipped_cache_near_full(self):
325+
"""add_variant returns VARIANT_SKIPPED when cache_near_full True."""
326+
pkgcache = self._pkgcache()
327+
package = get_package("versioned", "3.0")
328+
variant = next(package.iter_variants())
329+
with patch.object(pkgcache, 'cache_near_full', return_value=True), \
330+
patch.object(pkgcache, 'variant_meets_space_requirements', return_value=True):
331+
_, status = pkgcache.add_variant(variant)
332+
self.assertEqual(status, PackageCache.VARIANT_SKIPPED)
333+
334+
def test_add_variant_skipped_variant_too_large(self):
335+
"""add_variant returns VARIANT_SKIPPED when variant fails space requirements."""
336+
pkgcache = self._pkgcache()
337+
package = get_package("versioned", "3.0")
338+
variant = next(package.iter_variants())
339+
with patch.object(pkgcache, 'cache_near_full', return_value=False), \
340+
patch.object(pkgcache, 'variant_meets_space_requirements', return_value=False):
341+
_, status = pkgcache.add_variant(variant)
342+
self.assertEqual(status, PackageCache.VARIANT_SKIPPED)

0 commit comments

Comments
 (0)