31
31
#include " Logger/Logger.h"
32
32
#include " Shared/approx_quantile.h"
33
33
#include " Shared/quantile.h"
34
+ #include " Shared/thread_count.h"
34
35
#include " StringDictionary/StringDictionaryProxy.h"
35
36
#include " ThirdParty/robin_hood.h"
36
37
@@ -41,6 +42,19 @@ class ResultSet;
41
42
* managed allocator object
42
43
*/
43
44
class RowSetMemoryOwner final : public SimpleAllocator, boost::noncopyable {
45
+ private:
46
+ struct ThreadMemPool {
47
+ ThreadMemPool () : data(nullptr ), size(0 ) {}
48
+ ThreadMemPool (const ThreadMemPool& other) = default ;
49
+ ThreadMemPool& operator =(const ThreadMemPool& other) = default ;
50
+
51
+ int8_t * data;
52
+ size_t size;
53
+ };
54
+
55
+ constexpr static size_t SMALL_MEM_POOL_SIZE = 10 << 20 ; // 10MB
56
+ constexpr static size_t MAX_IGNORED_FRAGMENT = 1 << 20 ; // 1MB
57
+
44
58
public:
45
59
RowSetMemoryOwner (DataProvider* data_provider,
46
60
const size_t arena_block_size,
@@ -52,6 +66,7 @@ class RowSetMemoryOwner final : public SimpleAllocator, boost::noncopyable {
52
66
// size up to 256 bytes to avoid such cache conflicts. This allows to significantly
53
67
// reduce amount of allocated virtual memory which is important for ASAN runs.
54
68
allocator_ = std::make_unique<Arena>(arena_block_size);
69
+ small_mem_pools_.resize (cpu_threads ());
55
70
}
56
71
57
72
enum class StringTranslationType { SOURCE_INTERSECTION, SOURCE_UNION };
@@ -67,6 +82,35 @@ class RowSetMemoryOwner final : public SimpleAllocator, boost::noncopyable {
67
82
allocator_->allocate (std::max (num_bytes, (size_t )256 )));
68
83
}
69
84
85
+ int8_t * allocateSmallMtNoLock (size_t size, size_t thread_idx = 0 ) override {
86
+ if (size > SMALL_MEM_POOL_SIZE) {
87
+ return allocate (size);
88
+ }
89
+
90
+ // Round-up size to keep 8-byte alignment.
91
+ size = (size + 7 ) & (~7 );
92
+
93
+ // Normally, we use TBB thread index and don't expect it to be greater than
94
+ // cpu_threads() but we don't respect g_cpu_threads_override currently for TBB.
95
+ if (thread_idx >= small_mem_pools_.size ()) {
96
+ return allocate (size);
97
+ }
98
+
99
+ auto & pool = small_mem_pools_[thread_idx];
100
+ if (size > pool.size ) {
101
+ if (pool.size > MAX_IGNORED_FRAGMENT) {
102
+ return allocate (size);
103
+ }
104
+ pool.data = allocate (SMALL_MEM_POOL_SIZE);
105
+ pool.size = SMALL_MEM_POOL_SIZE;
106
+ }
107
+
108
+ auto res = pool.data ;
109
+ pool.data += size;
110
+ pool.size -= size;
111
+ return res;
112
+ }
113
+
70
114
int8_t * allocateCountDistinctBuffer (const size_t num_bytes,
71
115
const size_t thread_idx = 0 ) {
72
116
int8_t * buffer = allocate (num_bytes, thread_idx);
@@ -267,6 +311,10 @@ class RowSetMemoryOwner final : public SimpleAllocator, boost::noncopyable {
267
311
size_t arena_block_size_; // for cloning
268
312
std::unique_ptr<Arena> allocator_;
269
313
314
+ // Small memory pools that get memory from the base arena and are used
315
+ // for lock-free allocation of small memory batches in execution kernels.
316
+ std::vector<ThreadMemPool> small_mem_pools_;
317
+
270
318
mutable std::mutex state_mutex_;
271
319
272
320
friend class ResultSet ;
0 commit comments