|
31 | 31 |
|
32 | 32 | #include "log.h" |
33 | 33 |
|
34 | | -#include "config.h" |
35 | | - |
36 | 34 | log_define("zim.cluster") |
37 | 35 |
|
38 | 36 | #define log_debug1(e) |
@@ -190,20 +188,27 @@ getClusterReader(const Reader& zimReader, offset_t offset, Cluster::Compression* |
190 | 188 | // - The stream itself may allocate memory. |
191 | 189 | // To solve this, we take the average and say a cluster's blob readers will half be created and |
192 | 190 | // so we assume a readers size of half the full uncompressed cluster data size. |
| 191 | +// If cluster is not compressed, we never store its content (mmap is created on demand and not cached), |
| 192 | +// so we use a size of 0 for the readers. |
193 | 193 | // It also appears that when we get the size of the stream, we reach a state where no |
194 | 194 | // futher allocation will be done by it. Probably because: |
195 | | -// - We already started to decompresse the stream to read the offsets |
| 195 | +// - We already started to decompress the stream to read the offsets |
196 | 196 | // - Cluster data size is smaller than window size associated to compression level (?) |
197 | 197 | // We anyway check that and print a warning if this is not the case, hopping that user will create |
198 | 198 | // an issue allowing us for further analysis. |
199 | 199 | size_t zim::ClusterMemorySize::get_cluster_size(const Cluster& cluster) { |
200 | 200 | if (!cluster.m_memorySize) { |
201 | | - auto base_struct = sizeof(Cluster); |
202 | 201 | auto offsets_size = sizeof(offset_t) * cluster.m_blobOffsets.size(); |
203 | | - auto readers_size = cluster.m_blobOffsets.back().v / 2; |
| 202 | + auto readers_size = 0; |
| 203 | + if (cluster.isCompressed()) { |
| 204 | + readers_size = cluster.m_blobOffsets.back().v / 2; |
| 205 | + } |
204 | 206 | cluster.m_streamSize = cluster.m_reader->getMemorySize(); |
205 | | - cluster.m_memorySize = base_struct + offsets_size + readers_size + cluster.m_streamSize; |
206 | | - std::cout << cluster.m_memorySize << " = base:" << base_struct <<" offsets:" << offsets_size <<" readers:" << readers_size <<" input:" << cluster.m_streamSize << std::endl; |
| 207 | + // Compression level define a huge window and make decompression stream allocate a huge memory to store it. |
| 208 | + // However, the used memory will not be greater than the content itself, even if window is bigger. |
| 209 | + // On linux (at least), the real used memory will be the actual memory used, not the one allocated. |
| 210 | + // So, let's clamm the the stream size to the size of the content itself. |
| 211 | + cluster.m_memorySize = offsets_size + readers_size + std::min(cluster.m_streamSize, cluster.m_blobOffsets.back().v); |
207 | 212 | } |
208 | 213 | auto streamSize = cluster.m_reader->getMemorySize(); |
209 | 214 | if (streamSize != cluster.m_streamSize) { |
|
0 commit comments