Skip to content

Commit f9d0c3e

Browse files
author
Venkatesh Prasad
committed
PS-9647: MySQL Perf Improvements
https://perconadev.atlassian.net/browse/PS-9647 This patch introduces a new hybrid data structure for MVCC ReadView from Enhanced MySQL Typically, online transactions are short rather than long, and transaction IDs increase continuously. To leverage these characteristics, a hybrid data structure is used: a static array for consecutive short transaction IDs and a vector for long transactions. With a 2048-byte array, up to 16,384 consecutive active transaction IDs can be stored, each bit representing a transaction ID. The minimum short transaction ID is used to differentiate between short and long transactions. IDs smaller than this minimum go into the long transaction vector, while IDs equal to or greater than it are placed in the short transaction array. For an ID in changes_visible, if it is below the minimum short transaction ID, a direct query is made to the vector, which is efficient due to the generally small number of long transactions. If the ID is equal to or above the minimum short transaction ID, a bitwise query is performed, with a time complexity of O(1), compared to the previous O(log n) complexity. This improvement enhances efficiency and reduces cache migration between NUMA nodes, as O(1) queries typically complete within a single CPU time slice. - In the short_rw_trx_ids_bitmap structure, MAX_SHORT_ACTIVE_BYTES is set to 65536, theoretically accommodating up to 524,288 consecutive short transaction IDs. - If the limit is exceeded, the oldest short transaction IDs are converted into long transactions and stored in long_rw_trx_ids. - Global long and short transactions are distinguished by min_short_valid_id: IDs smaller than this value are treated as global long transactions, while IDs equal to or greater are considered global short transactions. During the copying process from the global active transaction list, the short_rw_trx_ids_bitmap structure, which uses only one bit per transaction ID, allows for much higher copying efficiency compared to the native MySQL solution. For example, with 1000 active transactions, the native MySQL version would require copying at least 8000 bytes, whereas the optimized solution may only need a few hundred bytes. This results in a significant improvement in copying efficiency.
1 parent b715136 commit f9d0c3e

File tree

5 files changed

+494
-58
lines changed

5 files changed

+494
-58
lines changed

storage/innobase/include/read0types.h

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ class MVCC;
4646
/** Read view lists the trx ids of those transactions for which a consistent
4747
read should not see the modifications to the database. */
4848

49+
#define MAX_TOP_ACTIVE_BYTES 8192
50+
#define MAX_SHORT_ACTIVE_BYTES 65536
51+
4952
class ReadView {
5053
/** This is similar to a std::vector but it is not a drop
5154
in replacement. It is specific to ReadView. */
@@ -173,14 +176,33 @@ class ReadView {
173176

174177
if (id >= m_low_limit_id) {
175178
return (false);
176-
177-
} else if (m_ids.empty()) {
179+
} else if (empty()) {
178180
return (true);
179181
}
180182

181-
const ids_t::value_type *p = m_ids.data();
183+
/* first search short bitmap */
184+
if (m_has_short_actives && id >= m_short_min_id) {
185+
if (id > m_short_max_id) {
186+
return false;
187+
}
188+
unsigned int trim_id = id & 0x7FFFF;
189+
unsigned int trim_min_id = m_short_min_id & 0x7FFFF;
190+
unsigned int array_index = (trim_id >> 3);
191+
unsigned int array_min_index = (trim_min_id >> 3);
192+
array_index = (MAX_SHORT_ACTIVE_BYTES + array_index - array_min_index) %
193+
MAX_TOP_ACTIVE_BYTES;
194+
unsigned int array_remainder = trim_id & (0x7);
195+
int is_value_set = top_active[array_index] & (1 << (7 - array_remainder));
196+
if (is_value_set) {
197+
return false;
198+
} else {
199+
return true;
200+
}
201+
}
202+
203+
const ids_t::value_type *p = m_long_ids.data();
182204

183-
return (!std::binary_search(p, p + m_ids.size(), id));
205+
return (!std::binary_search(p, p + m_long_ids.size(), id));
184206
}
185207

186208
/**
@@ -235,7 +257,18 @@ class ReadView {
235257

236258
/**
237259
@return true if there are no transaction ids in the snapshot */
238-
bool empty() const { return (m_ids.empty()); }
260+
bool empty() const {
261+
bool long_empty = m_long_ids.empty();
262+
if (long_empty) {
263+
if (!m_has_short_actives) {
264+
return true;
265+
} else {
266+
return false;
267+
}
268+
} else {
269+
return false;
270+
}
271+
}
239272

240273
/**
241274
Clones a read view object. The resulting read view has identical change
@@ -264,17 +297,19 @@ class ReadView {
264297
fprintf(file, "Read view low limit trx n:o " TRX_ID_FMT "\n",
265298
low_limit_no());
266299
print_limits(file);
267-
fprintf(file, "Read view individually stored trx ids:\n");
268-
for (ulint i = 0; i < m_ids.size(); i++)
269-
fprintf(file, "Read view trx id " TRX_ID_FMT "\n", m_ids.data()[i]);
300+
fprintf(file, "Read view individually stored long trx ids:\n");
301+
for (ulint i = 0; i < m_long_ids.size(); i++)
302+
fprintf(file, "Read view trx id " TRX_ID_FMT "\n", m_long_ids.data()[i]);
270303
}
271304

272305
bool is_cloned() const noexcept { return (m_cloned); }
273306

274307
private:
275308
/**
276309
Copy the transaction ids from the source vector */
277-
inline void copy_trx_ids(const trx_ids_t &trx_ids);
310+
inline void copy_long_trx_ids(const trx_ids_t &trx_ids,
311+
trx_id_t min_short_id);
312+
inline void copy_short_trx_ids();
278313

279314
/**
280315
Opens a read view where exactly the transactions serialized before this
@@ -307,6 +342,7 @@ class ReadView {
307342
ReadView &operator=(const ReadView &);
308343

309344
private:
345+
unsigned char top_active[MAX_TOP_ACTIVE_BYTES];
310346
/** The read should not see any transaction with trx id >= this
311347
value. In other words, this is the "high water mark". */
312348
trx_id_t m_low_limit_id;
@@ -322,7 +358,7 @@ class ReadView {
322358

323359
/** Set of RW transactions that was active when this snapshot
324360
was taken */
325-
ids_t m_ids;
361+
ids_t m_long_ids;
326362

327363
/** The view does not need to see the undo logs for transactions
328364
whose transaction number is strictly smaller (<) than this value:
@@ -337,6 +373,10 @@ class ReadView {
337373
trx_id_t m_view_low_limit_no;
338374
#endif /* UNIV_DEBUG */
339375

376+
trx_id_t m_short_min_id;
377+
trx_id_t m_short_max_id;
378+
bool m_has_short_actives;
379+
340380
/** AC-NL-RO transaction view that has been "closed". */
341381
bool m_closed;
342382

storage/innobase/include/trx0sys.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -553,7 +553,11 @@ struct trx_sys_t {
553553
take a snapshot of these transactions whose changes are not visible to it.
554554
We should remove transactions from the list before committing in memory and
555555
releasing locks to ensure right order of removal and consistent snapshot. */
556-
trx_ids_t rw_trx_ids;
556+
trx_ids_t long_rw_trx_ids;
557+
unsigned char short_rw_trx_ids_bitmap[MAX_SHORT_ACTIVE_BYTES];
558+
int short_rw_trx_valid_number;
559+
trx_id_t min_short_valid_id;
560+
trx_id_t max_short_valid_id;
557561

558562
char pad7[ut::INNODB_CACHE_LINE_SIZE];
559563

0 commit comments

Comments
 (0)