99#include " CUDACommon.h"
1010#include " FFMPEGCommon.h"
1111#include " NVDECCache.h"
12+ #include " NVDECCacheConfig.h"
1213
1314#include < cuda_runtime.h> // For cudaGetDevice
1415
@@ -19,9 +20,13 @@ extern "C" {
1920
2021namespace facebook ::torchcodec {
2122
22- NVDECCache& NVDECCache::getCache ( const StableDevice& device ) {
23+ NVDECCache* NVDECCache::getCacheInstances ( ) {
2324 static NVDECCache cacheInstances[MAX_CUDA_GPUS];
24- return cacheInstances[getDeviceIndex (device)];
25+ return cacheInstances;
26+ }
27+
28+ NVDECCache& NVDECCache::getCache (const StableDevice& device) {
29+ return getCacheInstances ()[getDeviceIndex (device)];
2530}
2631
2732UniqueCUvideodecoder NVDECCache::getDecoder (CUVIDEOFORMAT* videoFormat) {
@@ -39,6 +44,21 @@ UniqueCUvideodecoder NVDECCache::getDecoder(CUVIDEOFORMAT* videoFormat) {
3944 return nullptr ;
4045}
4146
47+ // Evicts the least-recently-used entry from cache_.
48+ // Caller must hold cacheLock_!!!
49+ void NVDECCache::evictLRUEntry () {
50+ if (cache_.empty ()) {
51+ return ;
52+ }
53+ auto victim = cache_.begin ();
54+ for (auto it = cache_.begin (); it != cache_.end (); ++it) {
55+ if (it->second .lastUsed < victim->second .lastUsed ) {
56+ victim = it;
57+ }
58+ }
59+ cache_.erase (victim);
60+ }
61+
4262void NVDECCache::returnDecoder (
4363 CUVIDEOFORMAT* videoFormat,
4464 UniqueCUvideodecoder decoder) {
@@ -47,25 +67,40 @@ void NVDECCache::returnDecoder(
4767 CacheKey key (videoFormat);
4868 std::lock_guard<std::mutex> lock (cacheLock_);
4969
50- // Evict least recently used entry if at capacity.
51- // This search is O(MAX_CACHE_SIZE) but MAX_CACHE_SIZE is always small, so
52- // this isn't significant.
53- if (cache_.size () >= MAX_CACHE_SIZE) {
54- auto victim = cache_.begin ();
55- for (auto it = cache_.begin (); it != cache_.end (); ++it) {
56- if (it->second .lastUsed < victim->second .lastUsed ) {
57- victim = it;
58- }
59- }
60- cache_.erase (victim);
70+ int capacity = getNVDECCacheCapacity ();
71+ if (capacity <= 0 ) {
72+ return ;
73+ }
74+
75+ // Evict least recently used entries until under capacity.
76+ // This search is O(capacity), which is supposed to be small,
77+ // so linear vs constant search overhead is expected to be negligible.
78+ while (cache_.size () >= static_cast <size_t >(capacity)) {
79+ evictLRUEntry ();
6180 }
6281
6382 // Add the decoder back to cache
6483 cache_.emplace (key, CacheEntry (std::move (decoder), lastUsedCounter_++));
6584
6685 STD_TORCH_CHECK (
67- cache_.size () <= MAX_CACHE_SIZE,
68- " Cache size exceeded maximum limit, please report a bug" );
86+ cache_.size () <= static_cast <size_t >(capacity),
87+ " Cache size exceeded capacity, please report a bug" );
88+ }
89+
90+ void NVDECCache::evictExcessEntriesAcrossDevices (int capacity) {
91+ NVDECCache* instances = getCacheInstances ();
92+ for (int i = 0 ; i < MAX_CUDA_GPUS; ++i) {
93+ std::lock_guard<std::mutex> lock (instances[i].cacheLock_ );
94+ while (instances[i].cache_ .size () > static_cast <size_t >(capacity)) {
95+ instances[i].evictLRUEntry ();
96+ }
97+ }
98+ }
99+
100+ int NVDECCache::getCacheSizeForDevice (int device_index) {
101+ NVDECCache* instances = getCacheInstances ();
102+ std::lock_guard<std::mutex> lock (instances[device_index].cacheLock_ );
103+ return static_cast <int >(instances[device_index].cache_ .size ());
69104}
70105
71106} // namespace facebook::torchcodec
0 commit comments