Skip to content

Commit bec2652

Browse files
committed
Optimize transaction command parsing to avoid unnecessary tokenization
Previously, the parser always tokenized the full command, even when we only needed to check whether it was a transaction command. Now, it first extracts the first word to determine relevance and performs full tokenization only when necessary.
1 parent 7c665b9 commit bec2652

File tree

3 files changed

+133
-33
lines changed

3 files changed

+133
-33
lines changed

include/PgSQL_ExplicitTxnStateMgr.h

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ struct TxnCmd {
5656
*/
5757
class PgSQL_TxnCmdParser {
5858
public:
59+
PgSQL_TxnCmdParser() noexcept { tokens.reserve(16); }
60+
~PgSQL_TxnCmdParser() noexcept = default;
61+
5962
TxnCmd parse(std::string_view input, bool in_transaction_mode) noexcept;
6063

6164
private:
@@ -67,14 +70,20 @@ class PgSQL_TxnCmdParser {
6770
TxnCmd parse_start(size_t& pos) noexcept;
6871

6972
// Helpers
70-
static std::string to_lower(std::string_view s) noexcept {
71-
std::string s_copy(s);
72-
std::transform(s_copy.begin(), s_copy.end(), s_copy.begin(), ::tolower);
73-
return s_copy;
73+
inline static bool iequals(std::string_view a, std::string_view b) noexcept {
74+
if (a.size() != b.size()) return false;
75+
for (size_t i = 0; i < a.size(); ++i) {
76+
char ca = a[i];
77+
char cb = b[i];
78+
if (ca >= 'A' && ca <= 'Z') ca += 32;
79+
if (cb >= 'A' && cb <= 'Z') cb += 32;
80+
if (ca != cb) return false;
81+
}
82+
return true;
7483
}
7584

76-
inline static bool contains(std::vector<std::string_view>&& list, std::string_view value) noexcept {
77-
for (const auto& item : list) if (item == value) return true;
85+
inline static bool contains(std::vector<std::string_view>& list, std::string_view value) noexcept {
86+
for (const auto& item : list) if (iequals(item, value)) return true;
7887
return false;
7988
}
8089
};

lib/PgSQL_ExplicitTxnStateMgr.cpp

Lines changed: 117 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -327,23 +327,105 @@ bool PgSQL_ExplicitTxnStateMgr::handle_transaction(std::string_view input) {
327327
return true;
328328
}
329329

330-
331330
TxnCmd PgSQL_TxnCmdParser::parse(std::string_view input, bool in_transaction_mode) noexcept {
332-
tokens.clear();
333331
TxnCmd cmd;
334-
bool in_quote = false;
332+
333+
if (input.empty()) return cmd;
334+
335+
// Extract first word without full tokenization
335336
size_t start = 0;
337+
size_t end = 0;
338+
339+
while (start < input.size() && fast_isspace(input[start])) {
340+
start++;
341+
}
342+
343+
if (start >= input.size()) return cmd;
344+
345+
// Find end of first word
346+
end = start;
347+
bool in_quote = false;
336348
char quote_char = 0;
337349

338-
// Tokenize with quote handling
339-
for (size_t i = 0; i <= input.size(); ++i) {
350+
while (end < input.size()) {
351+
char c = input[end];
352+
353+
if (!in_quote && (c == '"' || c == '\'')) {
354+
// If we hit a quote at the start, this isn't a transaction command
355+
return cmd;
356+
}
357+
358+
if (fast_isspace(c) || c == ';') {
359+
break;
360+
}
361+
362+
end++;
363+
}
364+
365+
std::string_view first_word = input.substr(start, end - start);
366+
367+
// Check if this is a transaction command we care about
368+
TxnCmd::Type cmd_type = TxnCmd::UNKNOWN;
369+
370+
if (in_transaction_mode) {
371+
if (iequals(first_word, "begin")) {
372+
cmd.type = TxnCmd::BEGIN;
373+
return cmd;
374+
}
375+
376+
if (iequals(first_word, "start")) {
377+
cmd_type = TxnCmd::BEGIN;
378+
} else if (iequals(first_word, "savepoint")) {
379+
cmd_type = TxnCmd::SAVEPOINT;
380+
} else if (iequals(first_word, "release")) {
381+
cmd_type = TxnCmd::RELEASE;
382+
} else if (iequals(first_word, "rollback")) {
383+
cmd_type = TxnCmd::ROLLBACK;
384+
}
385+
} else {
386+
387+
if (iequals(first_word, "commit") || iequals(first_word, "end")) {
388+
cmd.type = TxnCmd::COMMIT;
389+
return cmd;
390+
}
391+
392+
if (iequals(first_word, "abort")) {
393+
cmd.type = TxnCmd::ROLLBACK;
394+
return cmd;
395+
}
396+
397+
if (iequals(first_word, "rollback")) {
398+
cmd_type = TxnCmd::ROLLBACK;
399+
}
400+
}
401+
402+
// If not a transaction command, return early
403+
if (cmd_type == TxnCmd::UNKNOWN) {
404+
return cmd;
405+
}
406+
407+
// Continue tokenization from where we left off
408+
tokens.clear();
409+
410+
// Continue tokenizing the rest of the input
411+
in_quote = false;
412+
quote_char = 0;
413+
start = end; // Continue from after the first word
414+
415+
while (start < input.size() && fast_isspace(input[start])) {
416+
start++;
417+
}
418+
419+
// Tokenize the remaining input
420+
for (size_t i = start; i <= input.size(); ++i) {
340421
const bool at_end = i == input.size();
341422
const char c = at_end ? 0 : input[i];
342423

343424
if (in_quote) {
344425
if (c == quote_char || at_end) {
345426
tokens.emplace_back(input.substr(start + 1, i - start - 1));
346427
in_quote = false;
428+
start = i + 1;
347429
}
348430
continue;
349431
}
@@ -353,41 +435,50 @@ TxnCmd PgSQL_TxnCmdParser::parse(std::string_view input, bool in_transaction_mod
353435
quote_char = c;
354436
start = i;
355437
}
356-
else if (isspace(c) || c == ';' || at_end) {
438+
else if (fast_isspace(c) || c == ';' || at_end) {
357439
if (start < i) tokens.emplace_back(input.substr(start, i - start));
358440
start = i + 1;
359441
}
360442
}
361443

362-
if (tokens.empty()) return cmd;
363-
364444
size_t pos = 0;
365-
const std::string first = to_lower(tokens[pos++]);
366-
367-
if (in_transaction_mode == true) {
368-
if (first == "begin") cmd.type = TxnCmd::BEGIN;
369-
else if (first == "start") cmd = parse_start(pos);
370-
else if (first == "savepoint") cmd = parse_savepoint(pos);
371-
else if (first == "release") cmd = parse_release(pos);
372-
else if (first == "rollback") cmd = parse_rollback(pos);
445+
446+
if (in_transaction_mode) {
447+
448+
switch (cmd_type) {
449+
case TxnCmd::BEGIN:
450+
cmd = parse_start(pos);
451+
break;
452+
case TxnCmd::SAVEPOINT:
453+
cmd = parse_savepoint(pos);
454+
break;
455+
case TxnCmd::RELEASE:
456+
cmd = parse_release(pos);
457+
break;
458+
case TxnCmd::ROLLBACK:
459+
cmd = parse_rollback(pos);
460+
break;
461+
default:
462+
break;
463+
}
373464
} else {
374-
if (first == "commit" || first == "end") cmd.type = TxnCmd::COMMIT;
375-
else if (first == "abort") cmd.type = TxnCmd::ROLLBACK;
376-
else if (first == "rollback") cmd = parse_rollback(pos);
465+
if (cmd_type == TxnCmd::ROLLBACK)
466+
cmd = parse_rollback(pos);
377467
}
468+
378469
return cmd;
379470
}
380471

381472
TxnCmd PgSQL_TxnCmdParser::parse_rollback(size_t& pos) noexcept {
382473
TxnCmd cmd{ TxnCmd::ROLLBACK };
383-
while (pos < tokens.size() && contains({ "work", "transaction" }, to_lower(tokens[pos]))) pos++;
474+
while (pos < tokens.size() && contains({ "work", "transaction" }, tokens[pos])) pos++;
384475

385-
if (pos < tokens.size() && to_lower(tokens[pos]) == "to") {
476+
if (pos < tokens.size() && iequals(tokens[pos], "to")) {
386477
cmd.type = TxnCmd::ROLLBACK_TO;
387-
if (++pos < tokens.size() && to_lower(tokens[pos]) == "savepoint") pos++;
478+
if (++pos < tokens.size() && iequals(tokens[pos], "savepoint")) pos++;
388479
if (pos < tokens.size()) cmd.savepoint = tokens[pos++];
389-
} else if (pos < tokens.size() && to_lower(tokens[pos]) == "and") {
390-
if (++pos < tokens.size() && to_lower(tokens[pos]) == "chain") {
480+
} else if (pos < tokens.size() && iequals(tokens[pos], "and")) {
481+
if (++pos < tokens.size() && iequals(tokens[pos], "chain")) {
391482
cmd.type = TxnCmd::ROLLBACK_AND_CHAIN;
392483
pos++;
393484
}
@@ -403,14 +494,14 @@ TxnCmd PgSQL_TxnCmdParser::parse_savepoint(size_t& pos) noexcept {
403494

404495
TxnCmd PgSQL_TxnCmdParser::parse_release(size_t& pos) noexcept {
405496
TxnCmd cmd{ TxnCmd::RELEASE };
406-
if (pos < tokens.size() && to_lower(tokens[pos]) == "savepoint") pos++;
497+
if (pos < tokens.size() && iequals(tokens[pos], "savepoint")) pos++;
407498
if (pos < tokens.size()) cmd.savepoint = tokens[pos++];
408499
return cmd;
409500
}
410501

411502
TxnCmd PgSQL_TxnCmdParser::parse_start(size_t& pos) noexcept {
412503
TxnCmd cmd{ TxnCmd::UNKNOWN };
413-
if (pos < tokens.size() && to_lower(tokens[pos]) == "transaction") {
504+
if (pos < tokens.size() && iequals(tokens[pos], "transaction")) {
414505
cmd.type = TxnCmd::BEGIN;
415506
pos++;
416507
}

lib/ProxySQL_Config.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ int ProxySQL_Config::Read_Global_Variables_from_configfile(const char *prefix) {
9292
char *query=(char *)malloc(strlen(q)+strlen(prefix)+strlen(n)+strlen(value_string.c_str()));
9393
sprintf(query,q, prefix, n, value_string.c_str());
9494
//fprintf(stderr, "%s\n", query);
95-
admindb->execute(query);
95+
admindb->execute(query);
9696
free(query);
9797
}
9898
admindb->execute("PRAGMA foreign_keys = ON");

0 commit comments

Comments
 (0)