diff --git a/include/zim/archive.h b/include/zim/archive.h index 36242ccd1..a5985d1d9 100644 --- a/include/zim/archive.h +++ b/include/zim/archive.h @@ -534,6 +534,70 @@ namespace zim */ std::shared_ptr getImpl() const { return m_impl; } + /** Get the maximum size of the cluster cache. + * + * @return The maximum number of clusters stored in the cache. + */ + size_t getClusterCacheMaxSize() const; + + /** Get the current size of the cluster cache. + * + * @return The number of clusters currently stored in the cache. + */ + size_t getClusterCacheCurrentSize() const; + + /** Set the size of the cluster cache. + * + * If the new size is lower than the number of currently stored clusters + * some clusters will be dropped from cache to respect the new size. + * + * @param nbClusters The maximum number of clusters stored in the cache. + */ + void setClusterCacheMaxSize(size_t nbClusters); + + /** Get the size of the dirent cache. + * + * @return The maximum number of dirents stored in the cache. + */ + size_t getDirentCacheMaxSize() const; + + /** Get the current size of the dirent cache. + * + * @return The number of dirents currently stored in the cache. + */ + size_t getDirentCacheCurrentSize() const; + + /** Set the size of the dirent cache. + * + * If the new size is lower than the number of currently stored dirents + * some dirents will be dropped from cache to respect the new size. + * + * @param nbDirents The maximum number of dirents stored in the cache. + */ + void setDirentCacheMaxSize(size_t nbDirents); + + /** Get the size of the dirent lookup cache. + * + * The returned size returns the default size or the last set size. + * This may not correspond to the actual size of the dirent lookup cache. + * See `set_dirent_lookup_cache_max_size` for more information. + * + * @return The maximum number of sub ranges created in the lookup cache. + */ + size_t getDirentLookupCacheMaxSize() const; + + /** Set the size of the dirent lookup cache. + * + * Contrary to other `set__cache_max_size`, this method is useless once + * the lookup cache is created. + * The lookup cache is created at first access to a entry in the archive. + * So this method must be called before any access to content (including metadata). + * It is best to call this method first, just after the archive creation. + * + * @param nbRanges The maximum number of sub ranges created in the lookup cache. + */ + void setDirentLookupCacheMaxSize(size_t nbRanges); + #ifdef ZIM_PRIVATE cluster_index_type getClusterCount() const; offset_type getClusterOffset(cluster_index_type idx) const; diff --git a/src/archive.cpp b/src/archive.cpp index fcb43ed97..e370a9bf2 100644 --- a/src/archive.cpp +++ b/src/archive.cpp @@ -504,6 +504,47 @@ namespace zim return m_impl->hasNewNamespaceScheme(); } + size_t Archive::getClusterCacheMaxSize() const + { + return m_impl->getClusterCacheMaxSize(); + } + + size_t Archive::getClusterCacheCurrentSize() const + { + return m_impl->getClusterCacheCurrentSize(); + } + + void Archive::setClusterCacheMaxSize(size_t nbClusters) + { + m_impl->setClusterCacheMaxSize(nbClusters); + } + + size_t Archive::getDirentCacheMaxSize() const + { + return m_impl->getDirentCacheMaxSize(); + } + + size_t Archive::getDirentCacheCurrentSize() const + { + return m_impl->getDirentCacheCurrentSize(); + } + + void Archive::setDirentCacheMaxSize(size_t nbDirents) + { + m_impl->setDirentCacheMaxSize(nbDirents); + } + + + size_t Archive::getDirentLookupCacheMaxSize() const + { + return m_impl->getDirentLookupCacheMaxSize(); + } + + void Archive::setDirentLookupCacheMaxSize(size_t nbRanges) + { + m_impl->setDirentLookupCacheMaxSize(nbRanges); + } + cluster_index_type Archive::getClusterCount() const { return cluster_index_type(m_impl->getCountClusters()); diff --git a/src/compression.cpp b/src/compression.cpp index a931ee8ab..f5d3352c8 100644 --- a/src/compression.cpp +++ b/src/compression.cpp @@ -21,8 +21,6 @@ #include "compression.h" -#include "envvalue.h" - #include #include @@ -30,8 +28,7 @@ const std::string LZMA_INFO::name = "lzma"; void LZMA_INFO::init_stream_decoder(stream_t* stream, char* raw_data) { *stream = LZMA_STREAM_INIT; - unsigned memsize = zim::envMemSize("ZIM_LZMA_MEMORY_SIZE", LZMA_MEMORY_SIZE * 1024 * 1024); - auto errcode = lzma_stream_decoder(stream, memsize, 0); + auto errcode = lzma_stream_decoder(stream, LZMA_MEMORY_SIZE * 1024 * 1024, 0); if (errcode != LZMA_OK) { throw std::runtime_error("Impossible to allocated needed memory to uncompress lzma stream"); } diff --git a/src/concurrent_cache.h b/src/concurrent_cache.h index 0533d656f..a2dd0bd2f 100644 --- a/src/concurrent_cache.h +++ b/src/concurrent_cache.h @@ -23,6 +23,7 @@ #include "lrucache.h" +#include #include #include @@ -84,9 +85,24 @@ class ConcurrentCache return impl_.drop(key); } + size_t getMaxSize() const { + std::unique_lock l(lock_); + return impl_.getMaxSize(); + } + + size_t getCurrentSize() const { + std::unique_lock l(lock_); + return impl_.size(); + } + + void setMaxSize(size_t newSize) { + std::unique_lock l(lock_); + return impl_.setMaxSize(newSize); + } + private: // data Impl impl_; - std::mutex lock_; + mutable std::mutex lock_; }; } // namespace zim diff --git a/src/dirent_accessor.cpp b/src/dirent_accessor.cpp index c19c41ed0..54cbf4801 100644 --- a/src/dirent_accessor.cpp +++ b/src/dirent_accessor.cpp @@ -21,7 +21,6 @@ #include "direntreader.h" #include "_dirent.h" -#include "envvalue.h" #include @@ -36,7 +35,7 @@ DirectDirentAccessor::DirectDirentAccessor( : mp_direntReader(direntReader), mp_pathPtrReader(std::move(pathPtrReader)), m_direntCount(direntCount), - m_direntCache(envValue("ZIM_DIRENTCACHE", DIRENT_CACHE_SIZE)), + m_direntCache(DIRENT_CACHE_SIZE), m_bufferDirentZone(256) {} diff --git a/src/dirent_accessor.h b/src/dirent_accessor.h index fb1bc3ec5..ef3953128 100644 --- a/src/dirent_accessor.h +++ b/src/dirent_accessor.h @@ -24,6 +24,7 @@ #include "lrucache.h" #include "config.h" +#include #include #include #include @@ -54,6 +55,10 @@ class LIBZIM_PRIVATE_API DirectDirentAccessor std::shared_ptr getDirent(entry_index_t idx) const; entry_index_t getDirentCount() const { return m_direntCount; } + size_t getMaxCacheSize() const { return m_direntCache.getMaxSize(); } + size_t getCurrentCacheSize() const { return m_direntCache.size(); } + void setMaxCacheSize(size_t nbDirents) const { m_direntCache.setMaxSize(nbDirents); } + private: // functions std::shared_ptr readDirent(offset_t) const; diff --git a/src/dirent_lookup.h b/src/dirent_lookup.h index 1a5215615..4776c83e5 100644 --- a/src/dirent_lookup.h +++ b/src/dirent_lookup.h @@ -42,11 +42,13 @@ class DirentLookup public: // functions explicit DirentLookup(const DirentAccessor* _direntAccessor); + virtual ~DirentLookup() = default; index_t getNamespaceRangeBegin(char ns) const; index_t getNamespaceRangeEnd(char ns) const; + virtual size_t getSize() const { return 0; }; - Result find(char ns, const std::string& key) const; + virtual Result find(char ns, const std::string& key) const; protected: // functions int compareWithDirentAt(char ns, const std::string& key, entry_index_type i) const; @@ -83,7 +85,8 @@ class FastDirentLookup : public DirentLookup public: // functions FastDirentLookup(const DirentAccessor* _direntAccessor, entry_index_type cacheEntryCount); - typename BaseType::Result find(char ns, const std::string& key) const; + virtual size_t getSize() const; + virtual typename BaseType::Result find(char ns, const std::string& key) const; private: // functions std::string getDirentKey(entry_index_type i) const; @@ -204,6 +207,12 @@ DirentLookup::find(char ns, const std::string& key) const return findInRange(0, direntCount, ns, key); } +template +size_t FastDirentLookup::getSize() const { + return lookupGrid.getSize(); +} + + template typename DirentLookup::Result DirentLookup::findInRange(entry_index_type l, entry_index_type u, char ns, const std::string& key) const diff --git a/src/envvalue.cpp b/src/envvalue.cpp deleted file mode 100644 index 1d5c64f64..000000000 --- a/src/envvalue.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (C) 2009 Tommi Maekitalo - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and - * NON-INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#include -#include - -namespace zim -{ - unsigned envValue(const char* env, unsigned def) - { - const char* v = ::getenv(env); - if (v) - { - std::istringstream s(v); - s >> def; - } - return def; - } - - unsigned envMemSize(const char* env, unsigned def) - { - const char* v = ::getenv(env); - if (v) - { - char unit = '\0'; - std::istringstream s(v); - s >> def >> unit; - - switch (unit) - { - case 'k': - case 'K': def *= 1024; break; - case 'm': - case 'M': def *= 1024 * 1024; break; - case 'g': - case 'G': def *= 1024 * 1024 * 1024; break; - } - } - return def; - } -} - diff --git a/src/envvalue.h b/src/envvalue.h deleted file mode 100644 index d6dffd481..000000000 --- a/src/envvalue.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2009 Tommi Maekitalo - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of the - * License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * is provided AS IS, WITHOUT ANY WARRANTY; without even the implied - * warranty of MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, and - * NON-INFRINGEMENT. See the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - * - */ - -#ifndef ZIM_ENVVALUE_H -#define ZIM_ENVVALUE_H - -namespace zim -{ - unsigned envValue(const char* env, unsigned def); - unsigned envMemSize(const char* env, unsigned def); -} - -#endif // ZIM_ENVVALUE_H diff --git a/src/fileimpl.cpp b/src/fileimpl.cpp index 0a115ba28..3d63f3e8f 100644 --- a/src/fileimpl.cpp +++ b/src/fileimpl.cpp @@ -19,6 +19,9 @@ * */ +#include "dirent_lookup.h" +#include "zim_types.h" +#include #define CHUNK_SIZE 1024 #include "fileimpl.h" #include @@ -34,7 +37,6 @@ #include #include "config.h" #include "log.h" -#include "envvalue.h" #include "md5.h" #include "tools.h" #include "fileheader.h" @@ -185,10 +187,12 @@ class Grouping : zimFile(_zimFile), zimReader(makeFileReader(zimFile)), direntReader(new DirentReader(zimReader)), - clusterCache(envValue("ZIM_CLUSTERCACHE", CLUSTER_CACHE_SIZE)), + clusterCache(CLUSTER_CACHE_SIZE), m_hasFrontArticlesIndex(true), m_startUserEntry(0), - m_endUserEntry(0) + m_endUserEntry(0), + m_direntLookupCreated(false), + m_direntLookupSize(DIRENT_LOOKUP_CACHE_SIZE) { log_trace("read file \"" << zimFile->filename() << '"'); @@ -231,7 +235,19 @@ class Grouping quickCheckForCorruptFile(); - mp_titleDirentAccessor = getTitleAccessor("listing/titleOrdered/v1"); + DirentLookup tmpDirentLookup(mp_pathDirentAccessor.get()); + + if (header.useNewNamespaceScheme()) { + const_cast(m_startUserEntry) = tmpDirentLookup.getNamespaceRangeBegin('C'); + const_cast(m_endUserEntry) = tmpDirentLookup.getNamespaceRangeEnd('C'); + } else { + const_cast(m_endUserEntry) = getCountArticles(); + } + + auto result = tmpDirentLookup.find('X', "listing/titleOrdered/v1"); + if (result.first) { + mp_titleDirentAccessor = getTitleAccessorV1(result.second); + } if (!mp_titleDirentAccessor) { if (!header.hasTitleListingV0()) { @@ -247,14 +263,9 @@ class Grouping readMimeTypes(); } - std::unique_ptr FileImpl::getTitleAccessor(const std::string& path) + std::unique_ptr FileImpl::getTitleAccessorV1(const entry_index_t idx) { - auto result = direntLookup().find('X', path); - if (!result.first) { - return nullptr; - } - - auto dirent = mp_pathDirentAccessor->getDirent(result.second); + auto dirent = mp_pathDirentAccessor->getDirent(idx); auto cluster = getCluster(dirent->getClusterNumber()); if (cluster->isCompressed()) { // This is a ZimFileFormatError. @@ -263,7 +274,7 @@ class Grouping } auto titleOffset = getClusterOffset(dirent->getClusterNumber()) + cluster->getBlobOffset(dirent->getBlobNumber()); auto titleSize = cluster->getBlobSize(dirent->getBlobNumber()); - return getTitleAccessor(titleOffset, titleSize, "Title index table" + path); + return getTitleAccessor(titleOffset, titleSize, "Title index v1"); } std::unique_ptr FileImpl::getTitleAccessor(const offset_t offset, const zsize_t size, const std::string& name) @@ -285,11 +296,15 @@ class Grouping // in the call stack. // 2. With `glibc` an exceptional execution of `std::call_once` doesn't // unlock the mutex associated with the `std::once_flag` object. - if ( !m_direntLookup ) { + if (!m_direntLookupCreated.load(std::memory_order_acquire)) { std::lock_guard lock(m_direntLookupCreationMutex); if ( !m_direntLookup ) { - const auto cacheSize = envValue("ZIM_DIRENTLOOKUPCACHE", DIRENT_LOOKUP_CACHE_SIZE); - m_direntLookup.reset(new DirentLookup(mp_pathDirentAccessor.get(), cacheSize)); + if (m_direntLookupSize == 0) { + m_direntLookup = std::make_unique(mp_pathDirentAccessor.get()); + } else { + m_direntLookup = std::make_unique(mp_pathDirentAccessor.get(), m_direntLookupSize); + } + m_direntLookupCreated.store(true, std::memory_order_release); } } return *m_direntLookup; @@ -364,13 +379,6 @@ class Grouping p = zp+1; } - - if (header.useNewNamespaceScheme()) { - const_cast(m_startUserEntry) = getNamespaceBeginOffset('C'); - const_cast(m_endUserEntry) = getNamespaceEndOffset('C'); - } else { - const_cast(m_endUserEntry) = getCountArticles(); - } } FileImpl::FindxResult FileImpl::findx(char ns, const std::string& path) @@ -756,7 +764,11 @@ bool checkTitleListing(const IndirectDirentAccessor& accessor, entry_index_type ret = checkTitleListing(*titleDirentAccessor, articleCount); } - auto titleDirentAccessor = getTitleAccessor("listing/titleOrdered/v1"); + auto titleDirentAccessor = std::unique_ptr(); + auto result = direntLookup().find('X', "listing/titleOrdered/v1"); + if (result.first) { + titleDirentAccessor = getTitleAccessorV1(result.second); + } if (titleDirentAccessor) { ret &= checkTitleListing(*titleDirentAccessor, articleCount); } @@ -778,4 +790,32 @@ bool checkTitleListing(const IndirectDirentAccessor& accessor, entry_index_type return true; } + + size_t FileImpl::getClusterCacheMaxSize() const { + return clusterCache.getMaxSize(); + } + size_t FileImpl::getClusterCacheCurrentSize() const { + return clusterCache.getCurrentSize(); + } + void FileImpl::setClusterCacheMaxSize(size_t nbClusters) { + clusterCache.setMaxSize(nbClusters); + } + + size_t FileImpl::getDirentCacheMaxSize() const { + return mp_pathDirentAccessor->getMaxCacheSize(); + } + size_t FileImpl::getDirentCacheCurrentSize() const { + return mp_pathDirentAccessor->getCurrentCacheSize(); + } + void FileImpl::setDirentCacheMaxSize(size_t nbDirents) { + mp_pathDirentAccessor->setMaxCacheSize(nbDirents); + } + + size_t FileImpl::getDirentLookupCacheMaxSize() const { + if (!m_direntLookupCreated.load(std::memory_order_acquire)) { + return m_direntLookupSize; + } else { + return m_direntLookup->getSize(); + } + } } diff --git a/src/fileimpl.h b/src/fileimpl.h index a604e88b3..45452fe37 100644 --- a/src/fileimpl.h +++ b/src/fileimpl.h @@ -22,6 +22,7 @@ #ifndef ZIM_FILEIMPL_H #define ZIM_FILEIMPL_H +#include #include #include #include @@ -75,9 +76,12 @@ namespace zim } }; - using DirentLookup = zim::FastDirentLookup; + using DirentLookup = zim::DirentLookup; + using FastDirentLookup = zim::FastDirentLookup; mutable std::unique_ptr m_direntLookup; mutable std::mutex m_direntLookupCreationMutex; + mutable std::atomic_bool m_direntLookupCreated; + size_t m_direntLookupSize; struct ByTitleDirentLookupConfig @@ -148,11 +152,20 @@ namespace zim bool is_multiPart() const; bool checkIntegrity(IntegrityCheck checkType); + + size_t getClusterCacheMaxSize() const; + size_t getClusterCacheCurrentSize() const; + void setClusterCacheMaxSize(size_t nbClusters); + size_t getDirentCacheMaxSize() const; + size_t getDirentCacheCurrentSize() const; + void setDirentCacheMaxSize(size_t nbDirents); + size_t getDirentLookupCacheMaxSize() const; + void setDirentLookupCacheMaxSize(size_t nbRanges) { m_direntLookupSize = nbRanges; }; private: explicit FileImpl(std::shared_ptr zimFile); FileImpl(std::shared_ptr zimFile, offset_t offset, zsize_t size); - std::unique_ptr getTitleAccessor(const std::string& path); + std::unique_ptr getTitleAccessorV1(const entry_index_t idx); std::unique_ptr getTitleAccessor(const offset_t offset, const zsize_t size, const std::string& name); void prepareArticleListByCluster() const; diff --git a/src/lrucache.h b/src/lrucache.h index 3389446cb..03a3644e6 100644 --- a/src/lrucache.h +++ b/src/lrucache.h @@ -138,14 +138,29 @@ class lru_cache { return _cache_items_map.size(); } + size_t getMaxSize() const { + return _max_size; + } + + void setMaxSize(size_t newSize) { + while (newSize < this->size()) { + dropLast(); + } + _max_size = newSize; + } + private: // functions + void dropLast() { + _cache_items_map.erase(_cache_items_list.back().first); + _cache_items_list.pop_back(); + } + void putMissing(const key_t& key, const value_t& value) { assert(_cache_items_map.find(key) == _cache_items_map.end()); _cache_items_list.push_front(key_value_pair_t(key, value)); _cache_items_map[key] = _cache_items_list.begin(); if (_cache_items_map.size() > _max_size) { - _cache_items_map.erase(_cache_items_list.back().first); - _cache_items_list.pop_back(); + dropLast(); } } diff --git a/src/meson.build b/src/meson.build index 5fa1f7f3d..bba76b17e 100644 --- a/src/meson.build +++ b/src/meson.build @@ -13,7 +13,6 @@ common_sources = [ 'dirent.cpp', 'dirent_accessor.cpp', 'entry.cpp', - 'envvalue.cpp', 'fileheader.cpp', 'fileimpl.cpp', 'file_compound.cpp', diff --git a/src/narrowdown.h b/src/narrowdown.h index 3be55ac0f..807cfd9e9 100644 --- a/src/narrowdown.h +++ b/src/narrowdown.h @@ -98,6 +98,10 @@ class NarrowDown : pred(&keyContentArea) {} + size_t getSize() const { + return entries.size(); + } + // Add another entry to the search index. The key of the next item is used // to derive and store a shorter pseudo-key as explained in the long comment // above the class. diff --git a/test/archive.cpp b/test/archive.cpp index 4c59f836c..4d7c63ff1 100644 --- a/test/archive.cpp +++ b/test/archive.cpp @@ -280,6 +280,124 @@ TEST(ZimArchive, openSplitZimArchive) } } +struct TestCacheConfig { + size_t direntCacheSize; + size_t clusterCacheSize; + size_t direntLookupCacheSize; +}; + + +#define ASSERT_ARCHIVE_EQUIVALENT(REF_ARCHIVE, TEST_ARCHIVE) \ + ASSERT_ARCHIVE_EQUIVALENT_LIMIT(REF_ARCHIVE, TEST_ARCHIVE, REF_ARCHIVE.getEntryCount()) + +#define ASSERT_ARCHIVE_EQUIVALENT_LIMIT(REF_ARCHIVE, TEST_ARCHIVE, LIMIT) \ + { \ + auto range = REF_ARCHIVE.iterEfficient(); \ + auto ref_it = range.begin(); \ + ASSERT_ARCHIVE_EQUIVALENT_IT_LIMIT(ref_it, range.end(), TEST_ARCHIVE, LIMIT) \ + } + + +#define ASSERT_ARCHIVE_EQUIVALENT_IT_LIMIT(REF_IT, REF_END, TEST_ARCHIVE, LIMIT) \ + for (auto i = 0U; igetPath()); \ + ASSERT_EQ(REF_IT->getPath(), test_entry.getPath()); \ + ASSERT_EQ(REF_IT->getTitle(), test_entry.getTitle()); \ + ASSERT_EQ(REF_IT->isRedirect(), test_entry.isRedirect()); \ + if (REF_IT->isRedirect()) { \ + ASSERT_EQ(REF_IT->getRedirectEntryIndex(), test_entry.getRedirectEntryIndex()); \ + } \ + auto ref_item = REF_IT->getItem(true); \ + auto test_item = test_entry.getItem(true); \ + ASSERT_EQ(ref_item.getClusterIndex(), test_item.getClusterIndex()); \ + ASSERT_EQ(ref_item.getBlobIndex(), test_item.getBlobIndex()); \ + ASSERT_EQ(ref_item.getData(), test_item.getData()); \ + } + +TEST(ZimArchive, cacheDontImpactReading) +{ + const TestCacheConfig cacheConfigs[] = { + {0, 0, 0}, + {1, 1, 1}, + {2, 2, 2}, + {10, 10, 10}, + {1000, 2000, 1000}, + {0, 2000, 1000}, + {1000, 0, 1000}, + {1000, 2000, 0}, + {1, 2000, 1000}, + {1000, 1, 1000}, + {1000, 2000, 1}, + }; + + for (auto& testfile: getDataFilePath("small.zim")) { + auto ref_archive = zim::Archive(testfile.path); + + for (auto cacheConfig: cacheConfigs) { + auto test_archive = zim::Archive(testfile.path); + test_archive.setDirentCacheMaxSize(cacheConfig.direntCacheSize); + test_archive.setDirentLookupCacheMaxSize(cacheConfig.direntLookupCacheSize); + test_archive.setClusterCacheMaxSize(cacheConfig.clusterCacheSize); + + EXPECT_EQ(test_archive.getDirentCacheMaxSize(), cacheConfig.direntCacheSize); + EXPECT_EQ(test_archive.getDirentLookupCacheMaxSize(), cacheConfig.direntLookupCacheSize); + EXPECT_EQ(test_archive.getClusterCacheMaxSize(), cacheConfig.clusterCacheSize); + + ASSERT_ARCHIVE_EQUIVALENT(ref_archive, test_archive) + } + } +} + + +TEST(ZimArchive, cacheChange) +{ + for (auto& testfile: getDataFilePath("wikibooks_be_all_nopic_2017-02.zim")) { + auto ref_archive = zim::Archive(testfile.path); + auto archive = zim::Archive(testfile.path); + + archive.setDirentCacheMaxSize(30); + archive.setClusterCacheMaxSize(5); + + auto range = ref_archive.iterEfficient(); + auto ref_it = range.begin(); + ASSERT_ARCHIVE_EQUIVALENT_IT_LIMIT(ref_it, range.end(), archive, 50) + EXPECT_EQ(archive.getDirentCacheCurrentSize(), 30); + EXPECT_EQ(archive.getClusterCacheCurrentSize(), 2); // Only 2 clusters in the file + + // Reduce cache size + archive.setDirentCacheMaxSize(10); + archive.setClusterCacheMaxSize(1); + + EXPECT_EQ(archive.getDirentCacheCurrentSize(), 10); + EXPECT_EQ(archive.getClusterCacheCurrentSize(), 1); + + // We want to test change of cache while we are iterating on the archive. + // So we don't reset the ref_it to `range.begin()`. + ASSERT_ARCHIVE_EQUIVALENT_IT_LIMIT(ref_it, range.end(), archive, 50) + + EXPECT_EQ(archive.getDirentCacheCurrentSize(), 10); + EXPECT_EQ(archive.getClusterCacheCurrentSize(), 1); + + // Clean cache + // (More than testing the value, this is needed as we want to be sure the cache is actually populated later) + archive.setDirentCacheMaxSize(0); + archive.setClusterCacheMaxSize(0); + + EXPECT_EQ(archive.getDirentCacheCurrentSize(), 0); + EXPECT_EQ(archive.getClusterCacheCurrentSize(), 0); + + // Increase the cache + archive.setDirentCacheMaxSize(20); + archive.setClusterCacheMaxSize(1); + EXPECT_EQ(archive.getDirentCacheCurrentSize(), 0); + EXPECT_EQ(archive.getClusterCacheCurrentSize(), 0); + + ASSERT_ARCHIVE_EQUIVALENT(ref_archive, archive) + EXPECT_EQ(archive.getDirentCacheCurrentSize(), 20); + EXPECT_EQ(archive.getClusterCacheCurrentSize(), 1); + } +} + TEST(ZimArchive, openDontFallbackOnNonSplitZimArchive) { const char* fname = "wikibooks_be_all_nopic_2017-02.zim"; diff --git a/test/lrucache.cpp b/test/lrucache.cpp index fff97717e..67dba943a 100644 --- a/test/lrucache.cpp +++ b/test/lrucache.cpp @@ -35,6 +35,7 @@ const int NUM_OF_TEST2_RECORDS = 100; const unsigned int TEST2_CACHE_CAPACITY = 50u; +const unsigned int TEST2_CACHE_CAPACITY_SMALL = 10u; TEST(CacheTest, SimplePut) { zim::lru_cache cache_lru(1); @@ -78,6 +79,19 @@ TEST(CacheTest, DropValue) { EXPECT_FALSE(cache_lru.drop(7)); } +#define EXPECT_RANGE_MISSING_FROM_CACHE(CACHE, START, END) \ +for (unsigned i = START; i < END; ++i) { \ + EXPECT_FALSE(CACHE.exists(i)); \ +} + +#define EXPECT_RANGE_FULLY_IN_CACHE(CACHE, START, END, VALUE_KEY_RATIO) \ +for (unsigned i = START; i < END; ++i) { \ + EXPECT_TRUE(CACHE.exists(i)); \ + EXPECT_EQ(i*VALUE_KEY_RATIO, cache_lru.get(i)); \ +} + + + TEST(CacheTest1, KeepsAllValuesWithinCapacity) { zim::lru_cache cache_lru(TEST2_CACHE_CAPACITY); @@ -85,19 +99,39 @@ TEST(CacheTest1, KeepsAllValuesWithinCapacity) { cache_lru.put(i, i); } - for (unsigned i = 0; i < NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY; ++i) { - EXPECT_FALSE(cache_lru.exists(i)); - } + EXPECT_RANGE_MISSING_FROM_CACHE(cache_lru, 0, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY)) - for (int i = NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY; i < NUM_OF_TEST2_RECORDS; ++i) { - EXPECT_TRUE(cache_lru.exists(i)); - EXPECT_EQ(i, cache_lru.get(i)); - } + EXPECT_RANGE_FULLY_IN_CACHE(cache_lru, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY), NUM_OF_TEST2_RECORDS, 1) size_t size = cache_lru.size(); EXPECT_EQ(TEST2_CACHE_CAPACITY, size); } +TEST(CacheTest1, ChangeCacheCapacity) { + zim::lru_cache cache_lru(TEST2_CACHE_CAPACITY); + + for (int i = 0; i < NUM_OF_TEST2_RECORDS; ++i) { + cache_lru.put(i, i); + } + + EXPECT_EQ(TEST2_CACHE_CAPACITY, cache_lru.size()); + EXPECT_RANGE_MISSING_FROM_CACHE(cache_lru, 0, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY)) + EXPECT_RANGE_FULLY_IN_CACHE(cache_lru, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY), NUM_OF_TEST2_RECORDS, 1) + + cache_lru.setMaxSize(TEST2_CACHE_CAPACITY_SMALL); + EXPECT_EQ(TEST2_CACHE_CAPACITY_SMALL, cache_lru.size()); + EXPECT_RANGE_MISSING_FROM_CACHE(cache_lru, 0, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY_SMALL)) + EXPECT_RANGE_FULLY_IN_CACHE(cache_lru, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY_SMALL), NUM_OF_TEST2_RECORDS, 1) + + cache_lru.setMaxSize(TEST2_CACHE_CAPACITY); + for (int i = 0; i < NUM_OF_TEST2_RECORDS; ++i) { + cache_lru.put(i, 1000*i); + } + EXPECT_EQ(TEST2_CACHE_CAPACITY, cache_lru.size()); + EXPECT_RANGE_MISSING_FROM_CACHE(cache_lru, 0, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY)) + EXPECT_RANGE_FULLY_IN_CACHE(cache_lru, (NUM_OF_TEST2_RECORDS - TEST2_CACHE_CAPACITY), NUM_OF_TEST2_RECORDS, 1000) +} + TEST(ConcurrentCacheTest, handleException) { zim::ConcurrentCache cache(1); auto val = cache.getOrPut(7, []() { return 777; });