Skip to content

Commit a66836d

Browse files
committed
fixup! Ensure cluster cache is properly clean in case of exception in constructor.
1 parent 5815e91 commit a66836d

File tree

2 files changed

+44
-35
lines changed

2 files changed

+44
-35
lines changed

src/cluster.cpp

Lines changed: 42 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -178,42 +178,49 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression*
178178
}
179179
}
180180

181-
}
182-
183-
// This function must return a constant size for a given cluster.
184-
// This is important as we want to remove the same size that what we add when we remove
185-
// the cluster from the cache.
186-
// However, because of partial decompression, this size can change:
187-
// - As we advance in the compression, we can create new blob readers in `m_blobReaders`
188-
// - The stream itself may allocate memory.
189-
// To solve this, we take the average and say a cluster's blob readers will half be created and
190-
// so we assume a readers size of half the full uncompressed cluster data size.
191-
// If cluster is not compressed, we never store its content (mmap is created on demand and not cached),
192-
// so we use a size of 0 for the readers.
193-
// It also appears that when we get the size of the stream, we reach a state where no
194-
// futher allocation will be done by it. Probably because:
195-
// - We already started to decompress the stream to read the offsets
196-
// - Cluster data size is smaller than window size associated to compression level (?)
197-
// We anyway check that and print a warning if this is not the case, hopping that user will create
198-
// an issue allowing us for further analysis.
199-
size_t zim::ClusterMemorySize::get_cluster_size(const Cluster& cluster) {
200-
if (!cluster.m_memorySize) {
201-
auto offsets_size = sizeof(offset_t) * cluster.m_blobOffsets.size();
202-
auto readers_size = 0;
203-
if (cluster.isCompressed()) {
204-
readers_size = cluster.m_blobOffsets.back().v / 2;
181+
// This function must return a constant size for a given cluster.
182+
// This is important as we want to remove the same size that what we add when we remove
183+
// the cluster from the cache.
184+
// However, because of partial decompression, this size can change:
185+
// - As we advance in the compression, we can create new blob readers in `m_blobReaders`
186+
// - The stream itself may allocate memory.
187+
// To solve this, we take the average and say a cluster's blob readers will half be created and
188+
// so we assume a readers size of half the full uncompressed cluster data size.
189+
// If cluster is not compressed, we never store its content (mmap is created on demand and not cached),
190+
// so we use a size of 0 for the readers.
191+
// It also appears that when we get the size of the stream, we reach a state where no
192+
// futher allocation will be done by it. Probably because:
193+
// - We already started to decompress the stream to read the offsets
194+
// - Cluster data size is smaller than window size associated to compression level (?)
195+
// We anyway check that and print a warning if this is not the case, hopping that user will create
196+
// an issue allowing us for further analysis.
197+
// Note:
198+
// - No need to protect this method from concurent access as it will be called by the concurent_cache which will
199+
// have a lock (on lru cache) to ensure only one thread access it in the same time.
200+
size_t Cluster::getMemorySize() const {
201+
if (!m_memorySize) {
202+
auto offsets_size = sizeof(offset_t) * m_blobOffsets.size();
203+
auto readers_size = 0;
204+
if (isCompressed()) {
205+
readers_size = m_blobOffsets.back().v / 2;
206+
}
207+
m_streamSize = m_reader->getMemorySize();
208+
// Compression level define a huge window and make decompression stream allocate a huge memory to store it.
209+
// However, the used memory will not be greater than the content itself, even if window is bigger.
210+
// On linux (at least), the real used memory will be the actual memory used, not the one allocated.
211+
// So, let's clamm the the stream size to the size of the content itself.
212+
m_memorySize = offsets_size + readers_size + std::min(m_streamSize, m_blobOffsets.back().v);
205213
}
206-
cluster.m_streamSize = cluster.m_reader->getMemorySize();
207-
// Compression level define a huge window and make decompression stream allocate a huge memory to store it.
208-
// However, the used memory will not be greater than the content itself, even if window is bigger.
209-
// On linux (at least), the real used memory will be the actual memory used, not the one allocated.
210-
// So, let's clamm the the stream size to the size of the content itself.
211-
cluster.m_memorySize = offsets_size + readers_size + std::min(cluster.m_streamSize, cluster.m_blobOffsets.back().v);
214+
auto streamSize = m_reader->getMemorySize();
215+
if (streamSize != m_streamSize) {
216+
std::cerr << "WARNING: stream size have changed from " << m_streamSize << " to " << streamSize << std::endl;
217+
std::cerr << "Please open an issue on https://github.com/openzim/libzim/issues with this message and the zim file you use" << std::endl;
218+
}
219+
return m_memorySize;
212220
}
213-
auto streamSize = cluster.m_reader->getMemorySize();
214-
if (streamSize != cluster.m_streamSize) {
215-
std::cerr << "WARNING: stream size have changed from " << cluster.m_streamSize << " to " << streamSize << std::endl;
216-
std::cerr << "Please open an issue on https://github.com/openzim/libzim/issues with this message and the zim file you use" << std::endl;
221+
222+
size_t ClusterMemorySize::get_cluster_size(const Cluster& cluster) {
223+
return cluster.getMemorySize();
217224
}
218-
return cluster.m_memorySize;
225+
219226
}

src/cluster.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,8 @@ namespace zim
9595
Blob getBlob(blob_index_t n, offset_t offset, zsize_t size) const;
9696

9797
static std::shared_ptr<Cluster> read(const Reader& zimReader, offset_t clusterOffset);
98+
99+
size_t getMemorySize() const;
98100
friend struct ClusterMemorySize;
99101
};
100102

0 commit comments

Comments
 (0)