diff --git a/include/zim/archive.h b/include/zim/archive.h
index a5985d1d9..d19b99838 100644
--- a/include/zim/archive.h
+++ b/include/zim/archive.h
@@ -42,6 +42,27 @@ namespace zim
     efficientOrder
   };
 
+  /** Get the maximum size of the cluster cache.
+   *
+   * @return The maximum memory size used the cluster cache.
+   */
+  size_t LIBZIM_API getClusterCacheMaxSize();
+
+  /** Get the current size of the cluster cache.
+   *
+   * @return The current memory size used by the cluster cache.
+   */
+  size_t LIBZIM_API getClusterCacheCurrentSize();
+
+  /** Set the size of the cluster cache.
+   *
+   * If the new size is lower than the number of currently stored clusters
+   * some clusters will be dropped from cache to respect the new size.
+   *
+   * @param sizeInB The memory limit (in bytes) for the cluster cache.
+   */
+  void LIBZIM_API setClusterCacheMaxSize(size_t sizeInB);
+
   /**
    * The Archive class to access content in a zim file.
    *
@@ -534,27 +555,6 @@ namespace zim
        */
       std::shared_ptr<FileImpl> getImpl() const { return m_impl; }
 
-      /** Get the maximum size of the cluster cache.
-       *
-       * @return The maximum number of clusters stored in  the cache.
-       */
-      size_t getClusterCacheMaxSize() const;
-
-      /** Get the current size of the cluster cache.
-       *
-       * @return The number of clusters currently stored in  the cache.
-       */
-      size_t getClusterCacheCurrentSize() const;
-
-      /** Set the size of the cluster cache.
-       *
-       * If the new size is lower than the number of currently stored clusters
-       * some clusters will be dropped from cache to respect the new size.
-       *
-       * @param nbClusters The maximum number of clusters stored in the cache.
-       */
-      void setClusterCacheMaxSize(size_t nbClusters);
-
       /** Get the size of the dirent cache.
        *
        * @return The maximum number of dirents stored in  the cache.
diff --git a/meson_options.txt b/meson_options.txt
index e23118f6e..936fdf8c5 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -1,5 +1,5 @@
-option('CLUSTER_CACHE_SIZE', type : 'string', value : '16',
-  description : 'set cluster cache size to number (default:16)')
+option('CLUSTER_CACHE_SIZE', type : 'string', value : '536870912',
+  description : 'set cluster cache size to number (default:512MB)')
 option('DIRENT_CACHE_SIZE', type : 'string', value : '512',
   description : 'set dirent cache size to number (default:512)')
 option('DIRENT_LOOKUP_CACHE_SIZE', type : 'string', value : '1024',
diff --git a/src/archive.cpp b/src/archive.cpp
index e370a9bf2..5cf1836b7 100644
--- a/src/archive.cpp
+++ b/src/archive.cpp
@@ -504,19 +504,19 @@ namespace zim
     return m_impl->hasNewNamespaceScheme();
   }
 
-  size_t Archive::getClusterCacheMaxSize() const
+  size_t getClusterCacheMaxSize()
   {
-    return m_impl->getClusterCacheMaxSize();
+    return getClusterCache().getMaxCost();
   }
 
-  size_t Archive::getClusterCacheCurrentSize() const
+  size_t getClusterCacheCurrentSize()
   {
-    return m_impl->getClusterCacheCurrentSize();
+    return getClusterCache().getCurrentCost();
   }
 
-  void Archive::setClusterCacheMaxSize(size_t nbClusters)
+  void setClusterCacheMaxSize(size_t sizeInB)
   {
-    m_impl->setClusterCacheMaxSize(nbClusters);
+    getClusterCache().setMaxCost(sizeInB);
   }
 
   size_t Archive::getDirentCacheMaxSize() const
@@ -534,7 +534,6 @@ namespace zim
     m_impl->setDirentCacheMaxSize(nbDirents);
   }
 
-
   size_t Archive::getDirentLookupCacheMaxSize() const
   {
     return m_impl->getDirentLookupCacheMaxSize();
diff --git a/src/buffer_reader.cpp b/src/buffer_reader.cpp
index 3b64ca137..0d08518a6 100644
--- a/src/buffer_reader.cpp
+++ b/src/buffer_reader.cpp
@@ -44,6 +44,11 @@ zsize_t BufferReader::size() const
   return source.size();
 }
 
+size_t BufferReader::getMemorySize() const
+{
+  return source.size().v;
+}
+
 offset_t BufferReader::offset() const
 {
   return offset_t((offset_type)(static_cast<const void*>(source.data(offset_t(0)))));
diff --git a/src/buffer_reader.h b/src/buffer_reader.h
index f0972c375..27a128fcd 100644
--- a/src/buffer_reader.h
+++ b/src/buffer_reader.h
@@ -31,6 +31,7 @@ class LIBZIM_PRIVATE_API BufferReader : public Reader {
     virtual ~BufferReader() {};
 
     zsize_t size() const override;
+    size_t getMemorySize() const override;
     offset_t offset() const override;
 
     const Buffer get_buffer(offset_t offset, zsize_t size) const override;
diff --git a/src/cluster.cpp b/src/cluster.cpp
index 214af0e38..b587b8a19 100644
--- a/src/cluster.cpp
+++ b/src/cluster.cpp
@@ -31,8 +31,6 @@
 
 #include "log.h"
 
-#include "config.h"
-
 log_define("zim.cluster")
 
 #define log_debug1(e)
@@ -86,7 +84,8 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression*
   Cluster::Cluster(std::unique_ptr<IStreamReader> reader_, Compression comp, bool isExtended)
     : compression(comp),
       isExtended(isExtended),
-      m_reader(std::move(reader_))
+      m_reader(std::move(reader_)),
+      m_memorySize(0)
   {
     if (isExtended) {
       read_header<uint64_t>();
@@ -179,4 +178,44 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression*
     }
   }
 
+  // This function must return a constant size for a given cluster.
+  // This is important as we want to remove the same size that what we add when we remove
+  // the cluster from the cache.
+  // However, because of partial decompression, this size can change:
+  // - As we advance in the compression, we can create new blob readers in `m_blobReaders`
+  // - The stream itself may allocate memory.
+  // To solve this, we take the average and say a cluster's blob readers will half be created and
+  // so we assume a readers size of half the full uncompressed cluster data size.
+  // If cluster is not compressed, we never store its content (mmap is created on demand and not cached),
+  // so we use a size of 0 for the readers.
+  // It also appears that when we get the size of the stream, we reach a state where no
+  // futher allocation will be done by it. Probably because:
+  // - We already started to decompress the stream to read the offsets
+  // - Cluster data size is smaller than window size associated to compression level (?)
+  // We anyway check that and print a warning if this is not the case, hopping that user will create
+  // an issue allowing us for further analysis.
+  // Note:
+  //  - No need to protect this method from concurent access as it will be called by the concurent_cache which will
+  //    have a lock (on lru cache) to ensure only one thread access it in the same time.
+  size_t Cluster::getMemorySize() const {
+    if (!m_memorySize) {
+      auto offsets_size = sizeof(offset_t) * m_blobOffsets.size();
+      auto readers_size = 0;
+      if (isCompressed()) {
+        readers_size = m_blobOffsets.back().v / 2;
+      }
+      m_streamSize = m_reader->getMemorySize();
+      // Compression level define a huge window and make decompression stream allocate a huge memory to store it.
+      // However, the used memory will not be greater than the content itself, even if window is bigger.
+      // On linux (at least), the real used memory will be the actual memory used, not the one allocated.
+      // So, let's clamm the the stream size to the size of the content itself.
+      m_memorySize = offsets_size + readers_size + std::min<size_type>(m_streamSize, m_blobOffsets.back().v);
+    }
+    auto streamSize = m_reader->getMemorySize();
+    if (streamSize != m_streamSize) {
+      std::cerr << "WARNING: stream size have changed from " << m_streamSize << " to " << streamSize << std::endl;
+      std::cerr << "Please open an issue on https://github.com/openzim/libzim/issues with this message and the zim file you use" << std::endl;
+    }
+    return m_memorySize;
+  }
 }
diff --git a/src/cluster.h b/src/cluster.h
index db1be37c1..0c1a6b553 100644
--- a/src/cluster.h
+++ b/src/cluster.h
@@ -70,6 +70,8 @@ namespace zim
 
       mutable std::mutex m_readerAccessMutex;
       mutable BlobReaders m_blobReaders;
+      mutable size_t m_memorySize;
+      mutable size_t m_streamSize;
 
 
       template<typename OFFSET_TYPE>
@@ -90,9 +92,17 @@ namespace zim
       Blob getBlob(blob_index_t n) const;
       Blob getBlob(blob_index_t n, offset_t offset, zsize_t size) const;
 
+      size_t getMemorySize() const;
+
       static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset);
   };
 
+  struct ClusterMemorySize {
+    static size_t cost(const std::shared_ptr<const Cluster>& cluster) {
+      return cluster->getMemorySize();
+    }
+  };
+
 }
 
 #endif // ZIM_CLUSTER_H
diff --git a/src/compression.cpp b/src/compression.cpp
index f5d3352c8..c3359b9ba 100644
--- a/src/compression.cpp
+++ b/src/compression.cpp
@@ -60,6 +60,11 @@ void LZMA_INFO::stream_end_decode(stream_t* stream)
   lzma_end(stream);
 }
 
+size_t LZMA_INFO::state_size(const stream_t& stream)
+{
+  return lzma_memusage(&stream);
+}
+
 
 const std::string ZSTD_INFO::name = "zstd";
 
@@ -170,3 +175,11 @@ void ZSTD_INFO::stream_end_decode(stream_t* stream)
 void ZSTD_INFO::stream_end_encode(stream_t* stream)
 {
 }
+
+size_t ZSTD_INFO::state_size(const stream_t& stream) {
+  if (stream.decoder_stream) {
+    return ZSTD_sizeof_CStream(stream.encoder_stream);
+  } else {
+    return ZSTD_sizeof_DStream(stream.decoder_stream);
+  }
+}
diff --git a/src/compression.h b/src/compression.h
index 4daba33b5..b27a86531 100644
--- a/src/compression.h
+++ b/src/compression.h
@@ -65,6 +65,7 @@ struct LZMA_INFO {
   static CompStatus stream_run_decode(stream_t* stream, CompStep step);
   static CompStatus stream_run(stream_t* stream, CompStep step);
   static void stream_end_decode(stream_t* stream);
+  static size_t state_size(const stream_t& stream);
 };
 
 
@@ -94,6 +95,7 @@ struct LIBZIM_PRIVATE_API ZSTD_INFO {
   static CompStatus stream_run_decode(stream_t* stream, CompStep step);
   static void stream_end_encode(stream_t* stream);
   static void stream_end_decode(stream_t* stream);
+  static size_t state_size(const stream_t& stream);
 };
 
 
diff --git a/src/concurrent_cache.h b/src/concurrent_cache.h
index a2dd0bd2f..d7b4a57c7 100644
--- a/src/concurrent_cache.h
+++ b/src/concurrent_cache.h
@@ -23,6 +23,7 @@
 
 #include "lrucache.h"
 
+#include <chrono>
 #include <cstddef>
 #include <future>
 #include <mutex>
@@ -30,6 +31,30 @@
 namespace zim
 {
 
+template<typename CostEstimation>
+struct FutureToValueCostEstimation {
+  template<typename T>
+  static size_t cost(const std::shared_future<T>& future) {
+    // The future is the value in the cache.
+    // When calling getOrPut, if the key is not in the cache,
+    // we add a future and then we compute the value and set the future.
+    // But lrucache call us when we add the future, meaning before we have
+    // computed the value. If we wait here (or use future.get), we will dead lock
+    // as we need to exit before setting the value.
+    // So in this case, we return 0. `ConcurrentCache::getOrPut` will correctly increase
+    // the current cache size when it have an actual value.
+    // We still need to compute the size of the value if the future has a value as it
+    // is also use to decrease the cache size when the value is drop.
+    std::future_status status = future.wait_for(std::chrono::nanoseconds::zero());
+    if (status == std::future_status::ready) {
+      return CostEstimation::cost(future.get());
+    } else {
+      return 0;
+    }
+  }
+
+};
+
 /**
    ConcurrentCache implements a concurrent thread-safe cache
 
@@ -39,16 +64,16 @@ namespace zim
    safe, and, in case of a cache miss, will block until that element becomes
    available.
  */
-template <typename Key, typename Value>
-class ConcurrentCache
+template <typename Key, typename Value, typename CostEstimation>
+class ConcurrentCache: private lru_cache<Key, std::shared_future<Value>, FutureToValueCostEstimation<CostEstimation>>
 {
 private: // types
   typedef std::shared_future<Value> ValuePlaceholder;
-  typedef lru_cache<Key, ValuePlaceholder> Impl;
+  typedef lru_cache<Key, ValuePlaceholder, FutureToValueCostEstimation<CostEstimation>> Impl;
 
 public: // types
-  explicit ConcurrentCache(size_t maxEntries)
-    : impl_(maxEntries)
+  explicit ConcurrentCache(size_t maxCost)
+    : Impl(maxCost)
   {}
 
   // Gets the entry corresponding to the given key. If the entry is not in the
@@ -65,11 +90,33 @@ class ConcurrentCache
   {
     std::promise<Value> valuePromise;
     std::unique_lock<std::mutex> l(lock_);
-    const auto x = impl_.getOrPut(key, valuePromise.get_future().share());
+    auto shared_future = valuePromise.get_future().share();
+    const auto x = Impl::getOrPut(key, shared_future);
     l.unlock();
     if ( x.miss() ) {
       try {
         valuePromise.set_value(f());
+        auto cost = CostEstimation::cost(x.value().get());
+        // There is a small window when the valuePromise may be drop from lru cache after
+        // we set the value but before we increase the size of the cache.
+        // In this case we decrease the size of `cost` before increasing it.
+        // First of all it should be pretty rare as we have just put the future in the cache so it
+        // should not be the least used item.
+        // If it happens, this should not be a problem if current_size is bigger than `cost` (most of the time)
+        // For the really rare specific case of current cach size being lower than `cost` (if possible),
+        // `decreaseCost` will clamp the new size to 0.
+        {
+          std::unique_lock<std::mutex> l(lock_);
+          // There is a window when the shared_future is drop from the cache while we are computing the value.
+          // If this is the case, we readd the shared_future in the cache.
+          if (!Impl::exists(key)) {
+            // We don't have have to increase the cache as the future is already set, so the cost will be valid.
+            Impl::put(key, shared_future);
+          } else {
+            // We just have to increase the cost as we used 0 for unset future.
+            Impl::increaseCost(cost);
+          }
+        }
       } catch (std::exception& e) {
         drop(key);
         throw;
@@ -82,26 +129,31 @@ class ConcurrentCache
   bool drop(const Key& key)
   {
     std::unique_lock<std::mutex> l(lock_);
-    return impl_.drop(key);
+    return Impl::drop(key);
+  }
+
+  template<class F>
+  void dropAll(F f) {
+    std::unique_lock<std::mutex> l(lock_);
+    Impl::dropAll(f);
   }
 
-  size_t getMaxSize() const {
+  size_t getMaxCost() const {
     std::unique_lock<std::mutex> l(lock_);
-    return impl_.getMaxSize();
+    return Impl::getMaxCost();
   }
 
-  size_t getCurrentSize() const {
+  size_t getCurrentCost() const {
     std::unique_lock<std::mutex> l(lock_);
-    return impl_.size();
+    return Impl::cost();
   }
 
-  void setMaxSize(size_t newSize) {
+  void setMaxCost(size_t newSize) {
     std::unique_lock<std::mutex> l(lock_);
-    return impl_.setMaxSize(newSize);
+    return Impl::setMaxCost(newSize);
   }
 
 private: // data
-  Impl impl_;
   mutable std::mutex lock_;
 };
 
diff --git a/src/decoderstreamreader.h b/src/decoderstreamreader.h
index d48582b7e..43f2b6256 100644
--- a/src/decoderstreamreader.h
+++ b/src/decoderstreamreader.h
@@ -23,6 +23,7 @@
 
 #include "compression.h"
 #include "istreamreader.h"
+#include <cstddef>
 
 namespace zim
 {
@@ -49,6 +50,10 @@ class DecoderStreamReader : public IStreamReader
     Decoder::stream_end_decode(&m_decoderState);
   }
 
+  size_t getMemorySize() const override {
+    return m_encodedDataReader->getMemorySize() + m_encodedDataChunk.size().v + Decoder::state_size(m_decoderState);
+  }
+
 private: // functions
   void readNextChunk()
   {
diff --git a/src/dirent_accessor.h b/src/dirent_accessor.h
index ef3953128..2156e1033 100644
--- a/src/dirent_accessor.h
+++ b/src/dirent_accessor.h
@@ -55,9 +55,9 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
   std::shared_ptr<const Dirent> getDirent(entry_index_t idx) const;
   entry_index_t getDirentCount() const  {  return m_direntCount; }
 
-  size_t getMaxCacheSize() const { return m_direntCache.getMaxSize(); }
-  size_t getCurrentCacheSize() const { return m_direntCache.size(); }
-  void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxSize(nbDirents); }
+  size_t getMaxCacheSize() const { return m_direntCache.getMaxCost(); }
+  size_t getCurrentCacheSize() const { return m_direntCache.cost(); }
+  void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxCost(nbDirents); }
 
 private: // functions
   std::shared_ptr<const Dirent> readDirent(offset_t) const;
@@ -67,7 +67,7 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor
   std::unique_ptr<const Reader>  mp_pathPtrReader;
   entry_index_t                  m_direntCount;
 
-  mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>> m_direntCache;
+  mutable lru_cache<entry_index_type, std::shared_ptr<const Dirent>, UnitCostEstimation> m_direntCache;
   mutable std::mutex m_direntCacheLock;
 
   mutable std::vector<char>  m_bufferDirentZone;
diff --git a/src/file_reader.h b/src/file_reader.h
index 817e24c31..b247f21cb 100644
--- a/src/file_reader.h
+++ b/src/file_reader.h
@@ -34,6 +34,7 @@ class LIBZIM_PRIVATE_API BaseFileReader : public Reader {
       : _offset(offset), _size(size) {}
     ~BaseFileReader() = default;
     zsize_t size() const override { return _size; };
+    size_t getMemorySize() const override { return 0; };
     offset_t offset() const override { return _offset; };
 
     virtual const Buffer get_mmap_buffer(offset_t offset,
diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp
index 3d63f3e8f..46751a488 100644
--- a/src/fileimpl.cpp
+++ b/src/fileimpl.cpp
@@ -20,6 +20,7 @@
  */
 
 #include "dirent_lookup.h"
+#include "zim/zim.h"
 #include "zim_types.h"
 #include <memory>
 #define CHUNK_SIZE 1024
@@ -31,7 +32,6 @@
 #include "buffer_reader.h"
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <sstream>
 #include <cstring>
 #include <fstream>
 #include <numeric>
@@ -162,6 +162,11 @@ class Grouping
 
 } //unnamed namespace
 
+  ClusterCache& getClusterCache() {
+    static ClusterCache clusterCache(CLUSTER_CACHE_SIZE);
+    return clusterCache;
+  }
+
   //////////////////////////////////////////////////////////////////////
   // FileImpl
   //
@@ -187,7 +192,6 @@ class Grouping
     : zimFile(_zimFile),
       zimReader(makeFileReader(zimFile)),
       direntReader(new DirentReader(zimReader)),
-      clusterCache(CLUSTER_CACHE_SIZE),
       m_hasFrontArticlesIndex(true),
       m_startUserEntry(0),
       m_endUserEntry(0),
@@ -244,25 +248,41 @@ class Grouping
       const_cast<entry_index_t&>(m_endUserEntry) = getCountArticles();
     }
 
-    auto result = tmpDirentLookup.find('X', "listing/titleOrdered/v1");
-    if (result.first) {
-      mp_titleDirentAccessor = getTitleAccessorV1(result.second);
-    }
+    // Following code will may create cluster and we want to remove them from cache
+    // if something goes wrong.
+    try {
+      auto result = tmpDirentLookup.find('X', "listing/titleOrdered/v1");
+      if (result.first) {
+        mp_titleDirentAccessor = getTitleAccessorV1(result.second);
+      }
 
-    if (!mp_titleDirentAccessor) {
-      if (!header.hasTitleListingV0()) {
-        throw ZimFileFormatError("Zim file doesn't contain a title ordered index");
+      if (!mp_titleDirentAccessor) {
+        if (!header.hasTitleListingV0()) {
+          throw ZimFileFormatError("Zim file doesn't contain a title ordered index");
+        }
+        offset_t titleOffset(header.getTitleIdxPos());
+        zsize_t  titleSize(sizeof(entry_index_type)*header.getArticleCount());
+        mp_titleDirentAccessor = getTitleAccessor(titleOffset, titleSize, "Title index table");
+        const_cast<bool&>(m_hasFrontArticlesIndex) = false;
       }
-      offset_t titleOffset(header.getTitleIdxPos());
-      zsize_t  titleSize(sizeof(entry_index_type)*header.getArticleCount());
-      mp_titleDirentAccessor = getTitleAccessor(titleOffset, titleSize, "Title index table");
-      const_cast<bool&>(m_hasFrontArticlesIndex) = false;
+      m_byTitleDirentLookup.reset(new ByTitleDirentLookup(mp_titleDirentAccessor.get()));
+
+      readMimeTypes();
+    } catch (...) {
+      dropCachedClusters();
+      throw;
     }
-    m_byTitleDirentLookup.reset(new ByTitleDirentLookup(mp_titleDirentAccessor.get()));
+  }
 
-    readMimeTypes();
+  FileImpl::~FileImpl() {
+    dropCachedClusters();
   }
 
+  void FileImpl::dropCachedClusters() const {
+    getClusterCache().dropAll([=](const std::tuple<FileImpl*, cluster_index_type>& key) {return std::get<0>(key) == this;});
+  }
+
+
   std::unique_ptr<IndirectDirentAccessor> FileImpl::getTitleAccessorV1(const entry_index_t idx)
   {
     auto dirent = mp_pathDirentAccessor->getDirent(idx);
@@ -465,19 +485,21 @@ class Grouping
     return entry_index_t(m_articleListByCluster[idx.v]);
   }
 
-  FileImpl::ClusterHandle FileImpl::readCluster(cluster_index_t idx)
+  ClusterHandle FileImpl::readCluster(cluster_index_t idx)
   {
     offset_t clusterOffset(getClusterOffset(idx));
     log_debug("read cluster " << idx << " from offset " << clusterOffset);
     return Cluster::read(*zimReader, clusterOffset);
   }
 
-  std::shared_ptr<const Cluster> FileImpl::getCluster(cluster_index_t idx)
+  ClusterHandle FileImpl::getCluster(cluster_index_t idx)
   {
     if (idx >= getCountClusters())
       throw ZimFileFormatError("cluster index out of range");
 
-    auto cluster = clusterCache.getOrPut(idx.v, [=](){ return readCluster(idx); });
+    auto cluster_index_type = idx.v;
+    auto key = std::make_tuple(this, cluster_index_type);
+    auto cluster = getClusterCache().getOrPut(key, [=](){ return readCluster(idx); });
 #if ENV32BIT
     // There was a bug in the way we create the zim files using ZSTD compression.
     // We were using a too hight compression level and so a window of 128Mb.
@@ -492,7 +514,7 @@ class Grouping
       // 5.0 is not a perfect way to detect faulty zim file (it will generate false
       // positives) but it should be enough.
       if (header.getMajorVersion() == 5 && header.getMinorVersion() == 0) {
-        clusterCache.drop(idx.v);
+        getClusterCache().drop(key);
       }
     }
 #endif
@@ -566,11 +588,11 @@ class Grouping
 
     struct zim_MD5_CTX md5ctx;
     zim_MD5Init(&md5ctx);
-    
+
     unsigned char ch[CHUNK_SIZE];
     offset_type checksumPos = header.getChecksumPos();
     offset_type toRead = checksumPos;
-    
+
     for(auto part = zimFile->begin();
         part != zimFile->end();
         part++) {
@@ -580,7 +602,7 @@ class Grouping
         zim_MD5Update(&md5ctx, ch, CHUNK_SIZE);
         toRead-=CHUNK_SIZE;
       }
-      
+
       // Previous read was good, so we have exited the previous `while` because
       // `toRead<CHUNK_SIZE`. Let's try to read `toRead` chars and process them later.
       // Else, the previous `while` exited because we didn't succeed to read
@@ -589,12 +611,12 @@ class Grouping
       if(stream.good()){
         stream.read(reinterpret_cast<char*>(ch),toRead);
       }
-      
+
       // It updates the checksum with the remaining amount of data when we
       // reach the end of the file or part
       zim_MD5Update(&md5ctx, ch, stream.gcount());
       toRead-=stream.gcount();
-    
+
       if (stream.bad()) {
         perror("error while reading file");
         return false;
@@ -790,17 +812,6 @@ bool checkTitleListing(const IndirectDirentAccessor& accessor, entry_index_type
     return true;
   }
 
-
-  size_t FileImpl::getClusterCacheMaxSize() const {
-    return clusterCache.getMaxSize();
-  }
-  size_t FileImpl::getClusterCacheCurrentSize() const {
-    return clusterCache.getCurrentSize();
-  }
-  void FileImpl::setClusterCacheMaxSize(size_t nbClusters) {
-    clusterCache.setMaxSize(nbClusters);
-  }
-
   size_t FileImpl::getDirentCacheMaxSize() const {
     return mp_pathDirentAccessor->getMaxCacheSize();
   }
diff --git a/src/fileimpl.h b/src/fileimpl.h
index 45452fe37..17cae5aec 100644
--- a/src/fileimpl.h
+++ b/src/fileimpl.h
@@ -24,6 +24,7 @@
 
 #include <atomic>
 #include <string>
+#include <tuple>
 #include <vector>
 #include <memory>
 #include <zim/zim.h>
@@ -42,6 +43,11 @@
 
 namespace zim
 {
+  class FileImpl;
+  typedef std::shared_ptr<const Cluster> ClusterHandle;
+  typedef ConcurrentCache<std::tuple<FileImpl*, cluster_index_type>, ClusterHandle, ClusterMemorySize> ClusterCache;
+  ClusterCache& getClusterCache();
+
   class FileImpl
   {
       std::shared_ptr<FileCompound> zimFile;
@@ -54,9 +60,6 @@ namespace zim
       std::shared_ptr<const DirectDirentAccessor> mp_pathDirentAccessor;
       std::unique_ptr<const IndirectDirentAccessor> mp_titleDirentAccessor;
 
-      typedef std::shared_ptr<const Cluster> ClusterHandle;
-      ConcurrentCache<cluster_index_type, ClusterHandle> clusterCache;
-
       const bool m_hasFrontArticlesIndex;
       const entry_index_t m_startUserEntry;
       const entry_index_t m_endUserEntry;
@@ -106,6 +109,7 @@ namespace zim
       explicit FileImpl(FdInput fd);
       explicit FileImpl(const std::vector<FdInput>& fds);
 #endif
+      ~FileImpl();
 
       time_t getMTime() const;
 
@@ -153,9 +157,6 @@ namespace zim
 
       bool checkIntegrity(IntegrityCheck checkType);
 
-      size_t getClusterCacheMaxSize() const;
-      size_t getClusterCacheCurrentSize() const;
-      void setClusterCacheMaxSize(size_t nbClusters);
       size_t getDirentCacheMaxSize() const;
       size_t getDirentCacheCurrentSize() const;
       void setDirentCacheMaxSize(size_t nbDirents);
@@ -165,6 +166,8 @@ namespace zim
       explicit FileImpl(std::shared_ptr<FileCompound> zimFile);
       FileImpl(std::shared_ptr<FileCompound> zimFile, offset_t offset, zsize_t size);
 
+      void dropCachedClusters() const;
+
       std::unique_ptr<IndirectDirentAccessor> getTitleAccessorV1(const entry_index_t idx);
       std::unique_ptr<IndirectDirentAccessor> getTitleAccessor(const offset_t offset, const zsize_t size, const std::string& name);
 
diff --git a/src/istreamreader.h b/src/istreamreader.h
index a0a5349b2..d2c937974 100644
--- a/src/istreamreader.h
+++ b/src/istreamreader.h
@@ -59,6 +59,9 @@ class LIBZIM_PRIVATE_API IStreamReader
   // Reads a blob of the specified size from the stream
   virtual std::unique_ptr<const Reader> sub_reader(zsize_t size);
 
+  // Get the total size occuped by the reader
+  virtual size_t getMemorySize() const = 0;
+
 private: // virtual methods
   // Reads exactly 'nbytes' bytes into the provided buffer 'buf'
   // (which must be at least that big). Throws an exception if
diff --git a/src/lrucache.h b/src/lrucache.h
index 03a3644e6..7b768cf32 100644
--- a/src/lrucache.h
+++ b/src/lrucache.h
@@ -43,10 +43,39 @@
 #include <cstddef>
 #include <stdexcept>
 #include <cassert>
+#include <vector>
+#include <iostream>
 
 namespace zim {
 
-template<typename key_t, typename value_t>
+struct UnitCostEstimation {
+  template<typename value_t>
+  static size_t cost(const value_t& value) {
+    return 1;
+  }
+};
+
+/**
+ * A lru cache where the cost of each item can be different than 1.
+ *
+ * Most lru cache is limited by the number of items stored.
+ * This implementation may have a different "size" per item, so the current size of
+ * this lru is not the number of item but the sum of all items' size.
+ *
+ * This implementation used is pretty simple (dumb) and have few limitations:
+ * - We consider than size of a item do not change over time. Especially the size of a
+ *   item when we put it MUST be equal to the size of the same item when we drop it.
+ * - Cache eviction is still a Least Recently Used (LRU), so we drop the least used item(s) util
+ *   we have enough space. No other consideration is used to select which item to drop.
+ *
+ * This lru is parametrized by a CostEstimation type. The type must have a static method `cost`
+ * taking a reference to a `value_t` and returing its "cost". As already said, this method must
+ * always return the same cost for the same value.
+ *
+ * While cost could be any kind of value, this implemention is intended to be used only with
+ * `UnitCostEstimation` (classic lru) and `FutureToValueCostEstimation<ClusterMemorySize>`.
+ */
+template<typename key_t, typename value_t, typename CostEstimation>
 class lru_cache {
 public: // types
   typedef typename std::pair<key_t, value_t> key_value_pair_t;
@@ -81,9 +110,10 @@ class lru_cache {
   };
 
 public: // functions
-  explicit lru_cache(size_t max_size) :
-    _max_size(max_size) {
-  }
+  explicit lru_cache(size_t max_cost) :
+    _max_cost(max_cost),
+    _current_cost(0)
+  {}
 
   // If 'key' is present in the cache, returns the associated value,
   // otherwise puts the given value into the cache (and returns it with
@@ -103,6 +133,8 @@ class lru_cache {
     auto it = _cache_items_map.find(key);
     if (it != _cache_items_map.end()) {
       _cache_items_list.splice(_cache_items_list.begin(), _cache_items_list, it->second);
+      decreaseCost(CostEstimation::cost(it->second->second));
+      increaseCost(CostEstimation::cost(value));
       it->second->second = value;
     } else {
       putMissing(key, value);
@@ -120,37 +152,87 @@ class lru_cache {
   }
 
   bool drop(const key_t& key) {
+    list_iterator_t list_it;
     try {
-      auto list_it = _cache_items_map.at(key);
-      _cache_items_list.erase(list_it);
-      _cache_items_map.erase(key);
-      return true;
+      list_it = _cache_items_map.at(key);
     } catch (std::out_of_range& e) {
       return false;
     }
+    decreaseCost(CostEstimation::cost(list_it->second));
+    _cache_items_list.erase(list_it);
+    _cache_items_map.erase(key);
+    return true;
+  }
+
+  template<class F>
+  void dropAll(F f) {
+    std::vector<key_t> keys_to_drop;
+    for (auto key_iter:_cache_items_map) {
+      key_t key = key_iter.first;
+      if (f(key)) {
+        keys_to_drop.push_back(key);
+      }
+    }
+
+    for(auto key:keys_to_drop) {
+      drop(key);
+    }
   }
 
   bool exists(const key_t& key) const {
     return _cache_items_map.find(key) != _cache_items_map.end();
   }
 
-  size_t size() const {
-    return _cache_items_map.size();
+  size_t cost() const {
+    return _current_cost;
   }
 
-  size_t getMaxSize() const {
-    return _max_size;
+  size_t getMaxCost() const {
+    return _max_cost;
   }
 
-  void setMaxSize(size_t newSize) {
-    while (newSize < this->size()) {
+  void setMaxCost(size_t newMaxCost) {
+    while (newMaxCost < this->cost()) {
       dropLast();
     }
-    _max_size = newSize;
+    _max_cost = newMaxCost;
+  }
+
+ protected:
+
+  void increaseCost(size_t extra_cost) {
+    // increaseSize is called after we have added a value to the cache to update
+    // the size of the current cache.
+    // We must ensure that we don't drop the value we just added.
+    // While it is technically ok to keep no value if max cache size is 0 (or memory size < of the size of one cluster)
+    // it will make recreate the value all the time.
+    // Let's be nice with our user and be tolerent to misconfiguration.
+    if (!extra_cost) {
+      // Don't try to remove an item if we have new size == 0.
+      // This is the case when concurent cache add a future without value.
+      // We will handle the real increase size when concurent cache will directly call us.
+      return;
+    }
+    _current_cost += extra_cost;
+    while (_current_cost > _max_cost && size() > 1) {
+      dropLast();
+    }
+  }
+
+  void decreaseCost(size_t costToRemove) {
+    if (costToRemove > _current_cost) {
+      std::cerr << "WARNING: We have detected inconsistant cache management, trying to remove " << costToRemove << " from a cache with size " << _current_cost << std::endl;
+      std::cerr << "Please open an issue on https://github.com/openzim/libzim/issues with this message and the zim file you use" << std::endl;
+      _current_cost = 0;
+    } else {
+      _current_cost -= costToRemove;
+    }
   }
 
 private: // functions
   void dropLast() {
+    auto list_it = _cache_items_list.back();
+    decreaseCost(CostEstimation::cost(list_it.second));
     _cache_items_map.erase(_cache_items_list.back().first);
     _cache_items_list.pop_back();
   }
@@ -159,15 +241,19 @@ class lru_cache {
     assert(_cache_items_map.find(key) == _cache_items_map.end());
     _cache_items_list.push_front(key_value_pair_t(key, value));
     _cache_items_map[key] = _cache_items_list.begin();
-    if (_cache_items_map.size() > _max_size) {
-      dropLast();
-    }
+    increaseCost(CostEstimation::cost(value));
+  }
+
+  size_t size() const {
+    return _cache_items_map.size();
   }
 
+
 private: // data
   std::list<key_value_pair_t> _cache_items_list;
   std::map<key_t, list_iterator_t> _cache_items_map;
-  size_t _max_size;
+  size_t _max_cost;
+  size_t _current_cost;
 };
 
 } // namespace zim
diff --git a/src/rawstreamreader.h b/src/rawstreamreader.h
index 504f67e63..36c30f38c 100644
--- a/src/rawstreamreader.h
+++ b/src/rawstreamreader.h
@@ -35,6 +35,10 @@ class RawStreamReader : public IStreamReader
       m_readerPos(0)
   {}
 
+  size_t getMemorySize() const override {
+    return m_reader->getMemorySize();
+  }
+
   void readImpl(char* buf, zsize_t nbytes) override
   {
     m_reader->read(buf, m_readerPos, zsize_t(nbytes));
diff --git a/src/reader.h b/src/reader.h
index 8a4dc655c..5fdf7de6b 100644
--- a/src/reader.h
+++ b/src/reader.h
@@ -36,6 +36,7 @@ class LIBZIM_PRIVATE_API Reader {
   public:
     Reader() {};
     virtual zsize_t size() const = 0;
+    virtual size_t getMemorySize() const = 0;
     virtual ~Reader() {};
 
     void read(char* dest, offset_t offset, zsize_t size) const {
diff --git a/test/archive.cpp b/test/archive.cpp
index 4d7c63ff1..571190c53 100644
--- a/test/archive.cpp
+++ b/test/archive.cpp
@@ -43,6 +43,18 @@ using zim::unittests::TempFile;
 using zim::unittests::TestItem;
 using zim::unittests::IsFrontArticle;
 
+class ZimArchive: public testing::Test {
+  protected:
+  void SetUp() override {
+    zim::setClusterCacheMaxSize(0);
+    zim::setClusterCacheMaxSize(CLUSTER_CACHE_SIZE);
+    ASSERT_EQ(zim::getClusterCacheCurrentSize(), 0);
+  }
+  void TearDown() override {
+    ASSERT_EQ(zim::getClusterCacheCurrentSize(), 0);
+  }
+};
+
 using TestContextImpl = std::vector<std::pair<std::string, std::string> >;
 struct TestContext : TestContextImpl {
   TestContext(const std::initializer_list<value_type>& il)
@@ -80,7 +92,7 @@ emptyZimArchiveContent()
   return content;
 }
 
-TEST(ZimArchive, openingAnInvalidZimArchiveFails)
+TEST_F(ZimArchive, openingAnInvalidZimArchiveFails)
 {
   const char* const prefixes[] = { "ZIM\x04", "" };
   const unsigned char bytes[] = {0x00, 0x01, 0x11, 0x30, 0xFF};
@@ -101,7 +113,7 @@ TEST(ZimArchive, openingAnInvalidZimArchiveFails)
   }
 }
 
-TEST(ZimArchive, openingAnEmptyZimArchiveSucceeds)
+TEST_F(ZimArchive, openingAnEmptyZimArchiveSucceeds)
 {
   const auto tmpfile = makeTempFile("empty_zim_file", emptyZimArchiveContent());
 
@@ -122,7 +134,7 @@ bool isNastyOffset(int offset) {
   return true;
 }
 
-TEST(ZimArchive, nastyEmptyZimArchive)
+TEST_F(ZimArchive, nastyEmptyZimArchive)
 {
   const std::string correctContent = emptyZimArchiveContent();
   for ( int offset = 0; offset < 80; ++offset ) {
@@ -136,7 +148,7 @@ TEST(ZimArchive, nastyEmptyZimArchive)
   }
 }
 
-TEST(ZimArchive, wrongChecksumInEmptyZimArchive)
+TEST_F(ZimArchive, wrongChecksumInEmptyZimArchive)
 {
   std::string zimfileContent = emptyZimArchiveContent();
   zimfileContent[85] = '\xff';
@@ -147,7 +159,7 @@ TEST(ZimArchive, wrongChecksumInEmptyZimArchive)
 }
 
 
-TEST(ZimArchive, openCreatedArchive)
+TEST_F(ZimArchive, openCreatedArchive)
 {
   TempFile temp("zimfile");
   auto tempPath = temp.path();
@@ -245,7 +257,7 @@ TEST(ZimArchive, openCreatedArchive)
 }
 
 #if WITH_TEST_DATA
-TEST(ZimArchive, openRealZimArchive)
+TEST_F(ZimArchive, openRealZimArchive)
 {
   const char* const zimfiles[] = {
     "small.zim",
@@ -266,7 +278,7 @@ TEST(ZimArchive, openRealZimArchive)
   }
 }
 
-TEST(ZimArchive, openSplitZimArchive)
+TEST_F(ZimArchive, openSplitZimArchive)
 {
   const char* fname = "wikibooks_be_all_nopic_2017-02_splitted.zim";
 
@@ -286,119 +298,237 @@ struct TestCacheConfig {
   size_t direntLookupCacheSize;
 };
 
+struct RefEntry {
+  void test_is_equal(const zim::Entry& entry) {
+    ASSERT_EQ(path, entry.getPath());
+    ASSERT_EQ(title, entry.getTitle());
+    ASSERT_EQ(isRedirect, entry.isRedirect());
+    if (isRedirect) {
+      zim::entry_index_type redirectId = redirect_or_hash;
+      ASSERT_EQ(redirectId, entry.getRedirectEntryIndex());
+    } else {
+      auto hash = std::hash<std::string>{}(std::string(entry.getItem().getData()));
+      ASSERT_EQ(redirect_or_hash, hash);
+    }
+  }
 
-#define ASSERT_ARCHIVE_EQUIVALENT(REF_ARCHIVE, TEST_ARCHIVE)  \
-  ASSERT_ARCHIVE_EQUIVALENT_LIMIT(REF_ARCHIVE, TEST_ARCHIVE, REF_ARCHIVE.getEntryCount())
+  std::string path;
+  std::string title;
+  bool        isRedirect;
+  // size_t is either 32 or 64 bits and entry_index_type (redirect id) is always 32 bits.
+  size_t      redirect_or_hash;
+};
 
-#define ASSERT_ARCHIVE_EQUIVALENT_LIMIT(REF_ARCHIVE, TEST_ARCHIVE, LIMIT)             \
-  {                                                                                   \
-    auto range = REF_ARCHIVE.iterEfficient();                                         \
-    auto ref_it = range.begin();                                                      \
-    ASSERT_ARCHIVE_EQUIVALENT_IT_LIMIT(ref_it, range.end(), TEST_ARCHIVE, LIMIT)      \
+struct RefArchiveContent {
+  RefArchiveContent(const zim::Archive& archive) {
+    for (auto entry:archive.iterEfficient()) {
+      RefEntry ref_entry = {
+          entry.getPath(),
+          entry.getTitle(),
+          entry.isRedirect(),
+          entry.isRedirect() ? entry.getRedirectEntryIndex() : std::hash<std::string>{}(std::string(entry.getItem().getData())),
+      };
+      ref_entries.push_back(ref_entry);
+    }
   }
 
-
-#define ASSERT_ARCHIVE_EQUIVALENT_IT_LIMIT(REF_IT, REF_END, TEST_ARCHIVE, LIMIT)      \
-  for (auto i = 0U; i<LIMIT && REF_IT != REF_END; i++, REF_IT++) {                    \
-    auto test_entry = TEST_ARCHIVE.getEntryByPath(REF_IT->getPath());                 \
-    ASSERT_EQ(REF_IT->getPath(), test_entry.getPath());                               \
-    ASSERT_EQ(REF_IT->getTitle(), test_entry.getTitle());                             \
-    ASSERT_EQ(REF_IT->isRedirect(), test_entry.isRedirect());                         \
-    if (REF_IT->isRedirect()) {                                                       \
-      ASSERT_EQ(REF_IT->getRedirectEntryIndex(), test_entry.getRedirectEntryIndex()); \
-    }                                                                                 \
-    auto ref_item = REF_IT->getItem(true);                                            \
-    auto test_item = test_entry.getItem(true);                                        \
-    ASSERT_EQ(ref_item.getClusterIndex(), test_item.getClusterIndex());               \
-    ASSERT_EQ(ref_item.getBlobIndex(), test_item.getBlobIndex());                     \
-    ASSERT_EQ(ref_item.getData(), test_item.getData());                               \
+  void test_is_equal(const zim::Archive& archive) {
+    for (auto ref_entry:ref_entries) {
+      auto entry = archive.getEntryByPath(ref_entry.path);
+      ref_entry.test_is_equal(entry);
+    }
   }
+  std::vector<RefEntry> ref_entries;
+};
 
-TEST(ZimArchive, cacheDontImpactReading)
+
+TEST_F(ZimArchive, cacheDontImpactReading)
 {
   const TestCacheConfig cacheConfigs[] = {
     {0, 0, 0},
-    {1, 1, 1},
-    {2, 2, 2},
-    {10, 10, 10},
-    {1000, 2000, 1000},
-    {0, 2000, 1000},
-    {1000, 0, 1000},
-    {1000, 2000, 0},
-    {1, 2000, 1000},
-    {1000, 1, 1000},
-    {1000, 2000, 1},
+    {1, 1<<20, 1},
+    {2, 2<<20, 2},
+    {10, 10<<20, 10},
+    {1000, 2000<<20, 1000},
+    {0, 2000<<20, 1000},
+    {1000, 0<<20, 1000},
+    {1000, 2000<<20, 0},
+    {1, 2000<<20, 1000},
+    {1000, 1<<20, 1000},
+    {1000, 2000<<20, 1},
   };
 
   for (auto& testfile: getDataFilePath("small.zim")) {
-    auto ref_archive = zim::Archive(testfile.path);
+    RefArchiveContent ref_archive(zim::Archive(testfile.path));
 
     for (auto cacheConfig: cacheConfigs) {
       auto test_archive = zim::Archive(testfile.path);
       test_archive.setDirentCacheMaxSize(cacheConfig.direntCacheSize);
       test_archive.setDirentLookupCacheMaxSize(cacheConfig.direntLookupCacheSize);
-      test_archive.setClusterCacheMaxSize(cacheConfig.clusterCacheSize);
+      zim::setClusterCacheMaxSize(cacheConfig.clusterCacheSize);
 
       EXPECT_EQ(test_archive.getDirentCacheMaxSize(), cacheConfig.direntCacheSize);
       EXPECT_EQ(test_archive.getDirentLookupCacheMaxSize(), cacheConfig.direntLookupCacheSize);
-      EXPECT_EQ(test_archive.getClusterCacheMaxSize(), cacheConfig.clusterCacheSize);
+      EXPECT_EQ(zim::getClusterCacheMaxSize(), cacheConfig.clusterCacheSize);
 
-      ASSERT_ARCHIVE_EQUIVALENT(ref_archive, test_archive)
+      ref_archive.test_is_equal(test_archive);
     }
   }
 }
 
+TEST_F(ZimArchive, cacheClean) {
+  for (auto& testfile: getDataFilePath("wikibooks_be_all_nopic_2017-02.zim")) {
+    EXPECT_EQ(zim::getClusterCacheCurrentSize(), 0); // No clusters in cache
+    {
+      auto archive = zim::Archive(testfile.path);
+      auto range = archive.iterEfficient();
+      auto it = range.begin();
+      for (auto i = 0; i<50 && it != range.end(); i++, it++) {
+        // Be sure to search by path to populate the dirent cache
+        auto entry = archive.getEntryByPath(it->getPath());
+        auto item = entry.getItem(true);
+        auto data = item.getData();
+      }
+      EXPECT_GT(zim::getClusterCacheCurrentSize(), 0); // No clusters in cache
+    }
+    EXPECT_EQ(zim::getClusterCacheCurrentSize(), 0); // No clusters in cache
+  }
+}
 
-TEST(ZimArchive, cacheChange)
+TEST_F(ZimArchive, cacheChange)
 {
-  for (auto& testfile: getDataFilePath("wikibooks_be_all_nopic_2017-02.zim")) {
-    auto ref_archive = zim::Archive(testfile.path);
+  // We test only one variant here.
+  // Each variant has cluster of different size (especially the old "withns" which
+  // have a cluster compressed with a algorithm/compression level making input stream
+  // having a size of 64MB),
+  // this make all the following reasoning about cluster size a bit too complex.
+  // As the test here don't test that we can read all variant, we don't have too.
+  for (auto& testfile: getDataFilePath("wikibooks_be_all_nopic_2017-02.zim", {"noTitleListingV0"})) {
+    // wikibooks has only 2 clusters. One of 492121 bytes and one of 823716 bytes.
+    // For a total of 1315837 bytes.
+    // Has we try to keep one cluster in the cache, any size under the size of one
+    // cluster will not be respected.
+    // So we will define 2 limits:
+    // 850<<10 : size higher than a cluster size but under 2
+    // 2 << 20 : size higher than two clusters
+    const size_t L1_SIZE = 850 << 10;
+    const size_t L2_SIZE = 2 << 20;
+
+    EXPECT_EQ(zim::getClusterCacheCurrentSize(), 0);
+    RefArchiveContent ref_archive(zim::Archive(testfile.path));
     auto archive = zim::Archive(testfile.path);
 
     archive.setDirentCacheMaxSize(30);
-    archive.setClusterCacheMaxSize(5);
+    zim::setClusterCacheMaxSize(L2_SIZE);
 
-    auto range = ref_archive.iterEfficient();
-    auto ref_it = range.begin();
-    ASSERT_ARCHIVE_EQUIVALENT_IT_LIMIT(ref_it, range.end(), archive, 50)
+    auto ref_it = ref_archive.ref_entries.begin();
+    for (auto i = 0; i<50 && ref_it != ref_archive.ref_entries.end(); i++, ref_it++) {
+      auto entry = archive.getEntryByPath(ref_it->path);
+      ref_it->test_is_equal(entry);
+    }
     EXPECT_EQ(archive.getDirentCacheCurrentSize(), 30);
-    EXPECT_EQ(archive.getClusterCacheCurrentSize(), 2); // Only 2 clusters in the file
+    EXPECT_LE(zim::getClusterCacheCurrentSize(), L2_SIZE); // Only 2 clusters in the file
 
     // Reduce cache size
     archive.setDirentCacheMaxSize(10);
-    archive.setClusterCacheMaxSize(1);
+    zim::setClusterCacheMaxSize(L1_SIZE);
 
     EXPECT_EQ(archive.getDirentCacheCurrentSize(), 10);
-    EXPECT_EQ(archive.getClusterCacheCurrentSize(), 1);
+    EXPECT_LE(zim::getClusterCacheCurrentSize(), L1_SIZE);
 
     // We want to test change of cache while we are iterating on the archive.
     // So we don't reset the ref_it to `range.begin()`.
-    ASSERT_ARCHIVE_EQUIVALENT_IT_LIMIT(ref_it, range.end(), archive, 50)
+    for (auto i = 0; i<50 && ref_it != ref_archive.ref_entries.end(); i++, ref_it++) {
+      auto entry = archive.getEntryByPath(ref_it->path);
+      ref_it->test_is_equal(entry);
+    }
 
     EXPECT_EQ(archive.getDirentCacheCurrentSize(), 10);
-    EXPECT_EQ(archive.getClusterCacheCurrentSize(), 1);
+    EXPECT_LE(zim::getClusterCacheCurrentSize(), L1_SIZE);
 
     // Clean cache
     // (More than testing the value, this is needed as we want to be sure the cache is actually populated later)
     archive.setDirentCacheMaxSize(0);
-    archive.setClusterCacheMaxSize(0);
+    zim::setClusterCacheMaxSize(0);
 
     EXPECT_EQ(archive.getDirentCacheCurrentSize(), 0);
-    EXPECT_EQ(archive.getClusterCacheCurrentSize(), 0);
+    EXPECT_EQ(zim::getClusterCacheCurrentSize(), 0);
 
     // Increase the cache
     archive.setDirentCacheMaxSize(20);
-    archive.setClusterCacheMaxSize(1);
+    zim::setClusterCacheMaxSize(L1_SIZE);
     EXPECT_EQ(archive.getDirentCacheCurrentSize(), 0);
-    EXPECT_EQ(archive.getClusterCacheCurrentSize(), 0);
+    EXPECT_EQ(zim::getClusterCacheCurrentSize(), 0);
 
-    ASSERT_ARCHIVE_EQUIVALENT(ref_archive, archive)
+    ref_archive.test_is_equal(archive);
     EXPECT_EQ(archive.getDirentCacheCurrentSize(), 20);
-    EXPECT_EQ(archive.getClusterCacheCurrentSize(), 1);
+    EXPECT_LE(zim::getClusterCacheCurrentSize(), L1_SIZE);
+  }
+}
+
+
+TEST_F(ZimArchive, MultiZimCache)
+{
+  // Get a list of several zim files to open (whatever the variant)
+  std::vector<std::string> zimPaths;
+  const char* const zimfiles[] = {
+    "wikibooks_be_all_nopic_2017-02.zim",
+    "wikibooks_be_all_nopic_2017-02_splitted.zim",
+    "wikipedia_en_climate_change_mini_2024-06.zim"
+  };
+
+  for ( const std::string fname : zimfiles ) {
+    for (auto& testfile: getDataFilePath(fname)) {
+      zimPaths.push_back(testfile.path);
+    }
+  }
+
+
+  const size_t SMALL_LIMIT = 5 << 20;
+  const size_t BIG_LIMIT = 200 << 20;
+  zim::setClusterCacheMaxSize(BIG_LIMIT);
+
+  std::vector<zim::Archive> archives;
+  for (auto path:zimPaths) {
+    auto archive = zim::Archive(path);
+    for (auto entry:archive.iterEfficient()) {
+      auto item = entry.getItem(true);
+      auto data = item.getData();
+    }
+    archives.push_back(archive);
+  }
+
+  EXPECT_LE(zim::getClusterCacheCurrentSize(), BIG_LIMIT);
+  zim::setClusterCacheMaxSize(SMALL_LIMIT);
+  EXPECT_LE(zim::getClusterCacheCurrentSize(), SMALL_LIMIT);
+
+  // Opening an archive should increase the cluster cache
+  zim::setClusterCacheMaxSize(BIG_LIMIT);
+  auto current_limit = zim::getClusterCacheCurrentSize();
+  {
+    auto archive = zim::Archive(zimPaths[0]);
+    for (auto entry:archive.iterEfficient()) {
+      auto item = entry.getItem(true);
+      auto data = item.getData();
+    }
+    EXPECT_GT(zim::getClusterCacheCurrentSize(), current_limit);
+    current_limit = zim::getClusterCacheCurrentSize();
+  }
+  // Destroying an archive should decrease the cluster cache
+  EXPECT_LT(zim::getClusterCacheCurrentSize(), current_limit);
+
+  // Be sure that decreasing the number of archives open also decrease the
+  // current cache size, until we reach 0.
+  current_limit = zim::getClusterCacheCurrentSize();
+  while (!archives.empty()) {
+    archives.pop_back();
+    EXPECT_LE(zim::getClusterCacheCurrentSize(), current_limit);
+    current_limit = zim::getClusterCacheCurrentSize();
   }
+  EXPECT_EQ(zim::getClusterCacheCurrentSize(), 0);
 }
 
-TEST(ZimArchive, openDontFallbackOnNonSplitZimArchive)
+TEST_F(ZimArchive, openDontFallbackOnNonSplitZimArchive)
 {
   const char* fname = "wikibooks_be_all_nopic_2017-02.zim";
 
@@ -414,7 +544,7 @@ TEST(ZimArchive, openDontFallbackOnNonSplitZimArchive)
   }
 }
 
-TEST(ZimArchive, openNonExistantZimArchive)
+TEST_F(ZimArchive, openNonExistantZimArchive)
 {
   const std::string fname = "non_existant.zim";
 
@@ -427,7 +557,7 @@ TEST(ZimArchive, openNonExistantZimArchive)
   }
 }
 
-TEST(ZimArchive, openNonExistantZimSplitArchive)
+TEST_F(ZimArchive, openNonExistantZimSplitArchive)
 {
   const std::string fname = "non_existant.zimaa";
 
@@ -440,7 +570,7 @@ TEST(ZimArchive, openNonExistantZimSplitArchive)
   }
 }
 
-TEST(ZimArchive, randomEntry)
+TEST_F(ZimArchive, randomEntry)
 {
   const char* const zimfiles[] = {
     "wikibooks_be_all_nopic_2017-02.zim",
@@ -465,7 +595,7 @@ TEST(ZimArchive, randomEntry)
   }
 }
 
-TEST(ZimArchive, illustration)
+TEST_F(ZimArchive, illustration)
 {
   const char* const zimfiles[] = {
     "small.zim",
@@ -510,7 +640,7 @@ struct TestDataInfo {
   }
 };
 
-TEST(ZimArchive, articleNumber)
+TEST_F(ZimArchive, articleNumber)
 {
   TestDataInfo zimfiles[] = {
      // Name                                          mediaCount,  withns                               nons                                 noTitleListingV0
@@ -564,7 +694,8 @@ class CapturedStderr
 #define EXPECT_BROKEN_ZIMFILE(ZIMPATH, EXPECTED_STDERROR_TEXT) \
   CapturedStderr stderror;                                     \
   EXPECT_FALSE(zim::validate(ZIMPATH, checksToRun));           \
-  EXPECT_EQ(EXPECTED_STDERROR_TEXT, std::string(stderror)) << ZIMPATH;
+  EXPECT_EQ(EXPECTED_STDERROR_TEXT, std::string(stderror)) << ZIMPATH; \
+  ASSERT_EQ(zim::getClusterCacheCurrentSize(), 0);
 
 #define TEST_BROKEN_ZIM_NAME(ZIMNAME, EXPECTED)                \
 for(auto& testfile: getDataFilePath(ZIMNAME)) {EXPECT_BROKEN_ZIMFILE(testfile.path, EXPECTED)}
@@ -576,7 +707,7 @@ for(auto& testfile: getDataFilePath(ZIMNAME, CAT)) {EXPECT_BROKEN_ZIMFILE(testfi
 #define WITH_TITLE_IDX_CAT {"withns", "nons"}
 
 #if WITH_TEST_DATA
-TEST(ZimArchive, validate)
+TEST_F(ZimArchive, validate)
 {
   zim::IntegrityCheckList all;
   all.set();
@@ -774,7 +905,7 @@ void checkEquivalence(const zim::Archive& archive1, const zim::Archive& archive2
 #endif
 }
 
-TEST(ZimArchive, multipart)
+TEST_F(ZimArchive, multipart)
 {
   auto nonSplittedZims = getDataFilePath("wikibooks_be_all_nopic_2017-02.zim");
   auto splittedZims = getDataFilePath("wikibooks_be_all_nopic_2017-02_splitted.zim");
@@ -803,7 +934,7 @@ TEST(ZimArchive, multipart)
 #endif
 
 #ifndef _WIN32
-TEST(ZimArchive, openByFD)
+TEST_F(ZimArchive, openByFD)
 {
   for(auto& testfile: getDataFilePath("small.zim")) {
     const zim::Archive archive1(testfile.path);
@@ -814,7 +945,7 @@ TEST(ZimArchive, openByFD)
   }
 }
 
-TEST(ZimArchive, openZIMFileEmbeddedInAnotherFile)
+TEST_F(ZimArchive, openZIMFileEmbeddedInAnotherFile)
 {
   auto normalZims = getDataFilePath("small.zim");
   auto embeddedZims = getDataFilePath("small.zim.embedded");
@@ -830,7 +961,7 @@ TEST(ZimArchive, openZIMFileEmbeddedInAnotherFile)
   }
 }
 
-TEST(ZimArchive, openZIMFileMultiPartEmbeddedInAnotherFile)
+TEST_F(ZimArchive, openZIMFileMultiPartEmbeddedInAnotherFile)
 {
   auto normalZims = getDataFilePath("small.zim");
   auto embeddedZims = getDataFilePath("small.zim.embedded.multi");
@@ -873,7 +1004,7 @@ zim::Blob readItemData(const zim::Item::DirectAccessInfo& dai, zim::size_type si
   return zim::Blob(data, size);
 }
 
-TEST(ZimArchive, getDirectAccessInformation)
+TEST_F(ZimArchive, getDirectAccessInformation)
 {
   for(auto& testfile:getDataFilePath("small.zim")) {
     const zim::Archive archive(testfile.path);
@@ -894,7 +1025,7 @@ TEST(ZimArchive, getDirectAccessInformation)
 }
 
 #ifndef _WIN32
-TEST(ZimArchive, getDirectAccessInformationInAnArchiveOpenedByFD)
+TEST_F(ZimArchive, getDirectAccessInformationInAnArchiveOpenedByFD)
 {
   for(auto& testfile:getDataFilePath("small.zim")) {
     const int fd = OPEN_READ_ONLY(testfile.path);
@@ -915,7 +1046,7 @@ TEST(ZimArchive, getDirectAccessInformationInAnArchiveOpenedByFD)
   }
 }
 
-TEST(ZimArchive, getDirectAccessInformationFromEmbeddedArchive)
+TEST_F(ZimArchive, getDirectAccessInformationFromEmbeddedArchive)
 {
   auto normalZims = getDataFilePath("small.zim");
   auto embeddedZims = getDataFilePath("small.zim.embedded");
diff --git a/test/istreamreader.cpp b/test/istreamreader.cpp
index a96b59156..78b2f1135 100644
--- a/test/istreamreader.cpp
+++ b/test/istreamreader.cpp
@@ -34,18 +34,25 @@ using namespace zim;
 // Implement the IStreamReader interface in the simplest way
 class InfiniteZeroStream : public IStreamReader
 {
-  void readImpl(char* buf, zim::zsize_t nbytes) { memset(buf, 0, nbytes.v); }
+  void readImpl(char* buf, zim::zsize_t nbytes) override { memset(buf, 0, nbytes.v); }
+  size_t getMemorySize() const override {
+    return 0;
+  }
 };
 
 class InfiniteIncreasingStream: public IStreamReader
 {
   zim::offset_type current_offset = 0;
 
-  void readImpl(char* buf, zim::zsize_t nbytes) {
+  void readImpl(char* buf, zim::zsize_t nbytes) override {
     for (size_type i=0; i<nbytes.v; i++) {
       buf[i] = (current_offset++)%256;
     }
   }
+
+  size_t getMemorySize() const override {
+    return 0;
+  }
 };
 
 // ... and test that it compiles and works as intended
@@ -73,7 +80,7 @@ TEST(IStreamReader, sub_reader_zero)
   EXPECT_EQ(buffer.size().v, N);
   EXPECT_EQ(0, memcmp(buffer.data(), zerobuf, N));
 }
-  
+
 TEST(IStreamReader, read_increasing)
 {
   InfiniteIncreasingStream iis;
@@ -96,7 +103,7 @@ TEST(IStreamReader, sub_reader_increasing)
   auto buffer = subReader->get_buffer(zim::offset_t(0), zim::zsize_t(N));
   EXPECT_EQ(buffer.size().v, N);
   EXPECT_EQ(0, memcmp(buffer.data(), refbuf, N));
-  
+
   buffer = subReader->get_buffer(zim::offset_t(5), zim::zsize_t(N-5));
   EXPECT_EQ(buffer.size().v, N-5);
   EXPECT_EQ(0, memcmp(buffer.data(), refbuf+5, N-5));
diff --git a/test/lrucache.cpp b/test/lrucache.cpp
index 67dba943a..e43b914c5 100644
--- a/test/lrucache.cpp
+++ b/test/lrucache.cpp
@@ -38,47 +38,110 @@ const unsigned int TEST2_CACHE_CAPACITY = 50u;
 const unsigned int TEST2_CACHE_CAPACITY_SMALL = 10u;
 
 TEST(CacheTest, SimplePut) {
-    zim::lru_cache<int, int> cache_lru(1);
+    zim::lru_cache<int, int, zim::UnitCostEstimation> cache_lru(1);
     cache_lru.put(7, 777);
     EXPECT_TRUE(cache_lru.exists(7));
     EXPECT_EQ(777, cache_lru.get(7));
-    EXPECT_EQ(1u, cache_lru.size());
+    EXPECT_EQ(1u, cache_lru.cost());
 }
 
 TEST(CacheTest, OverwritingPut) {
-    zim::lru_cache<int, int> cache_lru(1);
+    zim::lru_cache<int, int, zim::UnitCostEstimation> cache_lru(1);
     cache_lru.put(7, 777);
     cache_lru.put(7, 222);
     EXPECT_TRUE(cache_lru.exists(7));
     EXPECT_EQ(222, cache_lru.get(7));
-    EXPECT_EQ(1u, cache_lru.size());
+    EXPECT_EQ(1u, cache_lru.cost());
 }
 
 TEST(CacheTest, MissingValue) {
-    zim::lru_cache<int, int> cache_lru(1);
+    zim::lru_cache<int, int, zim::UnitCostEstimation> cache_lru(1);
     EXPECT_TRUE(cache_lru.get(7).miss());
     EXPECT_FALSE(cache_lru.get(7).hit());
     EXPECT_THROW(cache_lru.get(7).value(), std::range_error);
 }
 
 TEST(CacheTest, DropValue) {
-    zim::lru_cache<int, int> cache_lru(3);
+    zim::lru_cache<int, int, zim::UnitCostEstimation> cache_lru(3);
     cache_lru.put(7, 777);
     cache_lru.put(8, 888);
     cache_lru.put(9, 999);
-    EXPECT_EQ(3u, cache_lru.size());
+    EXPECT_EQ(3u, cache_lru.cost());
     EXPECT_TRUE(cache_lru.exists(7));
     EXPECT_EQ(777, cache_lru.get(7));
 
     EXPECT_TRUE(cache_lru.drop(7));
 
-    EXPECT_EQ(2u, cache_lru.size());
+    EXPECT_EQ(2u, cache_lru.cost());
     EXPECT_FALSE(cache_lru.exists(7));
     EXPECT_THROW(cache_lru.get(7).value(), std::range_error);
 
     EXPECT_FALSE(cache_lru.drop(7));
 }
 
+struct IdCost {
+    static size_t cost(size_t value ) {
+        return value;
+    }
+};
+
+TEST(CacheTest, VariableCost) {
+    zim::lru_cache<size_t, size_t, IdCost> cache_lru(100);
+
+    cache_lru.put(1, 11);
+    cache_lru.put(2, 22);
+    cache_lru.put(3, 33);
+    EXPECT_EQ(66u, cache_lru.cost());
+
+    cache_lru.put(4, 44);
+    EXPECT_EQ(99u, cache_lru.cost());
+    EXPECT_FALSE(cache_lru.exists(1));
+    EXPECT_TRUE(cache_lru.exists(2));
+    EXPECT_TRUE(cache_lru.exists(3));
+    EXPECT_TRUE(cache_lru.exists(4));
+
+    cache_lru.put(5, 55);
+    EXPECT_EQ(99u, cache_lru.cost());
+    EXPECT_FALSE(cache_lru.exists(1));
+    EXPECT_FALSE(cache_lru.exists(2));
+    EXPECT_FALSE(cache_lru.exists(3));
+    EXPECT_TRUE(cache_lru.exists(4));
+    EXPECT_TRUE(cache_lru.exists(5));
+
+    cache_lru.put(1, 11);
+    EXPECT_EQ(66u, cache_lru.cost());
+    EXPECT_TRUE(cache_lru.exists(1));
+    EXPECT_FALSE(cache_lru.exists(2));
+    EXPECT_FALSE(cache_lru.exists(3));
+    EXPECT_FALSE(cache_lru.exists(4));
+    EXPECT_TRUE(cache_lru.exists(5));
+}
+
+TEST(CacheTest, TooBigValue) {
+    zim::lru_cache<size_t, size_t, IdCost> cache_lru(10);
+
+    cache_lru.put(1, 11);
+    EXPECT_EQ(11u, cache_lru.cost());
+    EXPECT_TRUE(cache_lru.exists(1));
+
+    cache_lru.put(2, 22);
+    EXPECT_EQ(22u, cache_lru.cost());
+    EXPECT_FALSE(cache_lru.exists(1));
+    EXPECT_TRUE(cache_lru.exists(2));
+
+    cache_lru.put(3, 33);
+    EXPECT_EQ(33u, cache_lru.cost());
+    EXPECT_FALSE(cache_lru.exists(1));
+    EXPECT_FALSE(cache_lru.exists(2));
+    EXPECT_TRUE(cache_lru.exists(3));
+
+    cache_lru.put(1, 11);
+    EXPECT_EQ(11u, cache_lru.cost());
+    EXPECT_TRUE(cache_lru.exists(1));
+    EXPECT_FALSE(cache_lru.exists(2));
+    EXPECT_FALSE(cache_lru.exists(3));
+}
+
 #define EXPECT_RANGE_MISSING_FROM_CACHE(CACHE, START, END) \
 for (unsigned i = START; i  < END; ++i)  {                 \
   EXPECT_FALSE(CACHE.exists(i));                           \
@@ -93,7 +156,7 @@ for (unsigned i = START; i  < END; ++i)  {                              \
 
 
 TEST(CacheTest1, KeepsAllValuesWithinCapacity) {
-    zim::lru_cache<int, int> cache_lru(TEST2_CACHE_CAPACITY);
+    zim::lru_cache<int, int, zim::UnitCostEstimation> cache_lru(TEST2_CACHE_CAPACITY);
 
     for (int i = 0; i < NUM_OF_TEST2_RECORDS; ++i) {
         cache_lru.put(i, i);
@@ -103,37 +166,37 @@ TEST(CacheTest1, KeepsAllValuesWithinCapacity) {
 
     EXPECT_RANGE_FULLY_IN_CACHE(cache_lru, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY), NUM_OF_TEST2_RECORDS, 1)
 
-    size_t size = cache_lru.size();
+    size_t size = cache_lru.cost();
     EXPECT_EQ(TEST2_CACHE_CAPACITY, size);
 }
 
 TEST(CacheTest1, ChangeCacheCapacity) {
-    zim::lru_cache<int, int> cache_lru(TEST2_CACHE_CAPACITY);
+    zim::lru_cache<int, int, zim::UnitCostEstimation> cache_lru(TEST2_CACHE_CAPACITY);
 
     for (int i = 0; i < NUM_OF_TEST2_RECORDS; ++i) {
         cache_lru.put(i, i);
     }
 
-    EXPECT_EQ(TEST2_CACHE_CAPACITY, cache_lru.size());
+    EXPECT_EQ(TEST2_CACHE_CAPACITY, cache_lru.cost());
     EXPECT_RANGE_MISSING_FROM_CACHE(cache_lru, 0, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY))
     EXPECT_RANGE_FULLY_IN_CACHE(cache_lru, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY), NUM_OF_TEST2_RECORDS, 1)
 
-    cache_lru.setMaxSize(TEST2_CACHE_CAPACITY_SMALL);
-    EXPECT_EQ(TEST2_CACHE_CAPACITY_SMALL, cache_lru.size());
+    cache_lru.setMaxCost(TEST2_CACHE_CAPACITY_SMALL);
+    EXPECT_EQ(TEST2_CACHE_CAPACITY_SMALL, cache_lru.cost());
     EXPECT_RANGE_MISSING_FROM_CACHE(cache_lru, 0, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY_SMALL))
     EXPECT_RANGE_FULLY_IN_CACHE(cache_lru, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY_SMALL), NUM_OF_TEST2_RECORDS, 1)
 
-    cache_lru.setMaxSize(TEST2_CACHE_CAPACITY);
+    cache_lru.setMaxCost(TEST2_CACHE_CAPACITY);
     for (int i = 0; i < NUM_OF_TEST2_RECORDS; ++i) {
         cache_lru.put(i, 1000*i);
     }
-    EXPECT_EQ(TEST2_CACHE_CAPACITY, cache_lru.size());
+    EXPECT_EQ(TEST2_CACHE_CAPACITY, cache_lru.cost());
     EXPECT_RANGE_MISSING_FROM_CACHE(cache_lru, 0, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY))
     EXPECT_RANGE_FULLY_IN_CACHE(cache_lru, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY), NUM_OF_TEST2_RECORDS, 1000)
 }
 
 TEST(ConcurrentCacheTest, handleException) {
-    zim::ConcurrentCache<int, int> cache(1);
+    zim::ConcurrentCache<int, int, zim::UnitCostEstimation> cache(1);
     auto val = cache.getOrPut(7, []() { return 777; });
     EXPECT_EQ(val, 777);
     EXPECT_THROW(cache.getOrPut(8, []() { throw std::runtime_error("oups"); return 0; }), std::runtime_error);