Skip to content

Commit e744c2b

Browse files
committed
Optimize transaction command parsing to avoid unnecessary tokenization
Previously, the parser always tokenized the full command, even when we only needed to check whether it was a transaction command. Now, it first extracts the first word to determine relevance and performs full tokenization only when necessary.
1 parent 7c665b9 commit e744c2b

File tree

6 files changed

+149
-49
lines changed

6 files changed

+149
-49
lines changed

include/PgSQL_ExplicitTxnStateMgr.h

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ struct TxnCmd {
5656
*/
5757
class PgSQL_TxnCmdParser {
5858
public:
59+
PgSQL_TxnCmdParser() noexcept { tokens.reserve(16); }
60+
~PgSQL_TxnCmdParser() noexcept = default;
61+
5962
TxnCmd parse(std::string_view input, bool in_transaction_mode) noexcept;
6063

6164
private:
@@ -67,14 +70,20 @@ class PgSQL_TxnCmdParser {
6770
TxnCmd parse_start(size_t& pos) noexcept;
6871

6972
// Helpers
70-
static std::string to_lower(std::string_view s) noexcept {
71-
std::string s_copy(s);
72-
std::transform(s_copy.begin(), s_copy.end(), s_copy.begin(), ::tolower);
73-
return s_copy;
73+
inline static bool iequals(std::string_view a, std::string_view b) noexcept {
74+
if (a.size() != b.size()) return false;
75+
for (size_t i = 0; i < a.size(); ++i) {
76+
char ca = a[i];
77+
char cb = b[i];
78+
if (ca >= 'A' && ca <= 'Z') ca += 32;
79+
if (cb >= 'A' && cb <= 'Z') cb += 32;
80+
if (ca != cb) return false;
81+
}
82+
return true;
7483
}
7584

7685
inline static bool contains(std::vector<std::string_view>&& list, std::string_view value) noexcept {
77-
for (const auto& item : list) if (item == value) return true;
86+
for (const auto& item : list) if (iequals(item, value)) return true;
7887
return false;
7988
}
8089
};

lib/PgSQL_ExplicitTxnStateMgr.cpp

Lines changed: 117 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -327,23 +327,105 @@ bool PgSQL_ExplicitTxnStateMgr::handle_transaction(std::string_view input) {
327327
return true;
328328
}
329329

330-
331330
TxnCmd PgSQL_TxnCmdParser::parse(std::string_view input, bool in_transaction_mode) noexcept {
332-
tokens.clear();
333331
TxnCmd cmd;
334-
bool in_quote = false;
332+
333+
if (input.empty()) return cmd;
334+
335+
// Extract first word without full tokenization
335336
size_t start = 0;
337+
size_t end = 0;
338+
339+
while (start < input.size() && fast_isspace(input[start])) {
340+
start++;
341+
}
342+
343+
if (start >= input.size()) return cmd;
344+
345+
// Find end of first word
346+
end = start;
347+
bool in_quote = false;
336348
char quote_char = 0;
337349

338-
// Tokenize with quote handling
339-
for (size_t i = 0; i <= input.size(); ++i) {
350+
while (end < input.size()) {
351+
char c = input[end];
352+
353+
if (!in_quote && (c == '"' || c == '\'')) {
354+
// If we hit a quote at the start, this isn't a transaction command
355+
return cmd;
356+
}
357+
358+
if (fast_isspace(c) || c == ';') {
359+
break;
360+
}
361+
362+
end++;
363+
}
364+
365+
std::string_view first_word = input.substr(start, end - start);
366+
367+
// Check if this is a transaction command we care about
368+
TxnCmd::Type cmd_type = TxnCmd::UNKNOWN;
369+
370+
if (in_transaction_mode) {
371+
if (iequals(first_word, "begin")) {
372+
cmd.type = TxnCmd::BEGIN;
373+
return cmd;
374+
}
375+
376+
if (iequals(first_word, "start")) {
377+
cmd_type = TxnCmd::BEGIN;
378+
} else if (iequals(first_word, "savepoint")) {
379+
cmd_type = TxnCmd::SAVEPOINT;
380+
} else if (iequals(first_word, "release")) {
381+
cmd_type = TxnCmd::RELEASE;
382+
} else if (iequals(first_word, "rollback")) {
383+
cmd_type = TxnCmd::ROLLBACK;
384+
}
385+
} else {
386+
387+
if (iequals(first_word, "commit") || iequals(first_word, "end")) {
388+
cmd.type = TxnCmd::COMMIT;
389+
return cmd;
390+
}
391+
392+
if (iequals(first_word, "abort")) {
393+
cmd.type = TxnCmd::ROLLBACK;
394+
return cmd;
395+
}
396+
397+
if (iequals(first_word, "rollback")) {
398+
cmd_type = TxnCmd::ROLLBACK;
399+
}
400+
}
401+
402+
// If not a transaction command, return early
403+
if (cmd_type == TxnCmd::UNKNOWN) {
404+
return cmd;
405+
}
406+
407+
// Continue tokenization from where we left off
408+
tokens.clear();
409+
410+
// Continue tokenizing the rest of the input
411+
in_quote = false;
412+
quote_char = 0;
413+
start = end; // Continue from after the first word
414+
415+
while (start < input.size() && fast_isspace(input[start])) {
416+
start++;
417+
}
418+
419+
// Tokenize the remaining input
420+
for (size_t i = start; i <= input.size(); ++i) {
340421
const bool at_end = i == input.size();
341422
const char c = at_end ? 0 : input[i];
342423

343424
if (in_quote) {
344425
if (c == quote_char || at_end) {
345426
tokens.emplace_back(input.substr(start + 1, i - start - 1));
346427
in_quote = false;
428+
start = i + 1;
347429
}
348430
continue;
349431
}
@@ -353,41 +435,50 @@ TxnCmd PgSQL_TxnCmdParser::parse(std::string_view input, bool in_transaction_mod
353435
quote_char = c;
354436
start = i;
355437
}
356-
else if (isspace(c) || c == ';' || at_end) {
438+
else if (fast_isspace(c) || c == ';' || at_end) {
357439
if (start < i) tokens.emplace_back(input.substr(start, i - start));
358440
start = i + 1;
359441
}
360442
}
361443

362-
if (tokens.empty()) return cmd;
363-
364444
size_t pos = 0;
365-
const std::string first = to_lower(tokens[pos++]);
366-
367-
if (in_transaction_mode == true) {
368-
if (first == "begin") cmd.type = TxnCmd::BEGIN;
369-
else if (first == "start") cmd = parse_start(pos);
370-
else if (first == "savepoint") cmd = parse_savepoint(pos);
371-
else if (first == "release") cmd = parse_release(pos);
372-
else if (first == "rollback") cmd = parse_rollback(pos);
445+
446+
if (in_transaction_mode) {
447+
448+
switch (cmd_type) {
449+
case TxnCmd::BEGIN:
450+
cmd = parse_start(pos);
451+
break;
452+
case TxnCmd::SAVEPOINT:
453+
cmd = parse_savepoint(pos);
454+
break;
455+
case TxnCmd::RELEASE:
456+
cmd = parse_release(pos);
457+
break;
458+
case TxnCmd::ROLLBACK:
459+
cmd = parse_rollback(pos);
460+
break;
461+
default:
462+
break;
463+
}
373464
} else {
374-
if (first == "commit" || first == "end") cmd.type = TxnCmd::COMMIT;
375-
else if (first == "abort") cmd.type = TxnCmd::ROLLBACK;
376-
else if (first == "rollback") cmd = parse_rollback(pos);
465+
if (cmd_type == TxnCmd::ROLLBACK)
466+
cmd = parse_rollback(pos);
377467
}
468+
378469
return cmd;
379470
}
380471

381472
TxnCmd PgSQL_TxnCmdParser::parse_rollback(size_t& pos) noexcept {
382473
TxnCmd cmd{ TxnCmd::ROLLBACK };
383-
while (pos < tokens.size() && contains({ "work", "transaction" }, to_lower(tokens[pos]))) pos++;
474+
while (pos < tokens.size() && contains({ "work", "transaction" }, tokens[pos])) pos++;
384475

385-
if (pos < tokens.size() && to_lower(tokens[pos]) == "to") {
476+
if (pos < tokens.size() && iequals(tokens[pos], "to")) {
386477
cmd.type = TxnCmd::ROLLBACK_TO;
387-
if (++pos < tokens.size() && to_lower(tokens[pos]) == "savepoint") pos++;
478+
if (++pos < tokens.size() && iequals(tokens[pos], "savepoint")) pos++;
388479
if (pos < tokens.size()) cmd.savepoint = tokens[pos++];
389-
} else if (pos < tokens.size() && to_lower(tokens[pos]) == "and") {
390-
if (++pos < tokens.size() && to_lower(tokens[pos]) == "chain") {
480+
} else if (pos < tokens.size() && iequals(tokens[pos], "and")) {
481+
if (++pos < tokens.size() && iequals(tokens[pos], "chain")) {
391482
cmd.type = TxnCmd::ROLLBACK_AND_CHAIN;
392483
pos++;
393484
}
@@ -403,14 +494,14 @@ TxnCmd PgSQL_TxnCmdParser::parse_savepoint(size_t& pos) noexcept {
403494

404495
TxnCmd PgSQL_TxnCmdParser::parse_release(size_t& pos) noexcept {
405496
TxnCmd cmd{ TxnCmd::RELEASE };
406-
if (pos < tokens.size() && to_lower(tokens[pos]) == "savepoint") pos++;
497+
if (pos < tokens.size() && iequals(tokens[pos], "savepoint")) pos++;
407498
if (pos < tokens.size()) cmd.savepoint = tokens[pos++];
408499
return cmd;
409500
}
410501

411502
TxnCmd PgSQL_TxnCmdParser::parse_start(size_t& pos) noexcept {
412503
TxnCmd cmd{ TxnCmd::UNKNOWN };
413-
if (pos < tokens.size() && to_lower(tokens[pos]) == "transaction") {
504+
if (pos < tokens.size() && iequals(tokens[pos], "transaction")) {
414505
cmd.type = TxnCmd::BEGIN;
415506
pos++;
416507
}

lib/PgSQL_Protocol.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -741,7 +741,7 @@ std::vector<std::pair<std::string, std::string>> PgSQL_Protocol::parse_options(c
741741

742742
while (pos < input.size()) {
743743
// Skip leading spaces
744-
while (pos < input.size() && std::isspace(input[pos])) {
744+
while (pos < input.size() && fast_isspace(input[pos])) {
745745
++pos;
746746
}
747747

@@ -751,7 +751,7 @@ std::vector<std::pair<std::string, std::string>> PgSQL_Protocol::parse_options(c
751751
pos += 2; // Skip "-c", "--"
752752
}
753753

754-
while (pos < input.size() && std::isspace(input[pos])) {
754+
while (pos < input.size() && fast_isspace(input[pos])) {
755755
++pos;
756756
}
757757

@@ -772,7 +772,7 @@ std::vector<std::pair<std::string, std::string>> PgSQL_Protocol::parse_options(c
772772
bool last_was_escape = false;
773773
while (pos < input.size()) {
774774
char c = input[pos];
775-
if (std::isspace(c) && !last_was_escape) {
775+
if (fast_isspace(c) && !last_was_escape) {
776776
break;
777777
}
778778
if (c == '\\' && !last_was_escape) {

lib/PgSQL_Session.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6557,7 +6557,7 @@ std::vector<std::string> PgSQL_DateStyle_Util::split_datestyle(std::string_view
65576557
int* lastNonSpace = (currentToken == 1) ? &lastNonSpace1 : &lastNonSpace2;
65586558

65596559
// Cache is-space check.
6560-
bool is_space = std::isspace(static_cast<unsigned char>(c));
6560+
bool is_space = fast_isspace(static_cast<unsigned char>(c));
65616561
// Skip leading whitespace for a new token.
65626562
if (currentStr->empty() && is_space) {
65636563
continue;

lib/PgSQL_Variables_Validator.cpp

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ bool pgsql_variable_validate_maintenance_work_mem(const char* value, const param
183183
if (transformed_value) *transformed_value = nullptr;
184184

185185
// Skip leading whitespace
186-
while (isspace((unsigned char)*p)) p++;
186+
while (fast_isspace((unsigned char)*p)) p++;
187187

188188
// Parse numeric part
189189
num = strtoll(p, &endptr, 10);
@@ -196,11 +196,11 @@ bool pgsql_variable_validate_maintenance_work_mem(const char* value, const param
196196
p = endptr;
197197

198198
// Skip whitespace after number
199-
while (isspace((unsigned char)*p)) p++;
199+
while (fast_isspace((unsigned char)*p)) p++;
200200

201201
// Parse unit
202202
if (*p != '\0') {
203-
char tmp_unit = tolower(*p);
203+
char tmp_unit = ::tolower(*p);
204204
switch (tmp_unit) {
205205
case 'k':
206206
case 'm':
@@ -210,15 +210,15 @@ bool pgsql_variable_validate_maintenance_work_mem(const char* value, const param
210210
unit = toupper(*p++);
211211
has_unit = true;
212212
// Check optional 'b'/'B'
213-
if (tolower(*p) == 'b') p++;
213+
if (::tolower(*p) == 'b') p++;
214214
break;
215215
default:
216216
return false;
217217
}
218218
}
219219

220220
// Skip trailing whitespace
221-
while (isspace((unsigned char)*p)) p++;
221+
while (fast_isspace((unsigned char)*p)) p++;
222222

223223
// Validate entire string consumed
224224
if (*p != '\0') return false;
@@ -241,7 +241,7 @@ bool pgsql_variable_validate_maintenance_work_mem_v2(const char* value, const pa
241241
const char* input = value;
242242

243243
/* Trim leading whitespace */
244-
while (isspace((unsigned char)*input)) input++;
244+
while (fast_isspace((unsigned char)*input)) input++;
245245

246246
/* Parse numeric part */
247247
uint64_t number;
@@ -256,7 +256,7 @@ bool pgsql_variable_validate_maintenance_work_mem_v2(const char* value, const pa
256256
//num_len = endptr - input;
257257

258258
// Skip whitespace after number
259-
while (isspace((unsigned char)*endptr)) endptr++;
259+
while (fast_isspace((unsigned char)*endptr)) endptr++;
260260

261261
/* Parse unit part */
262262
const char* unit_ptr = endptr;
@@ -273,7 +273,7 @@ bool pgsql_variable_validate_maintenance_work_mem_v2(const char* value, const pa
273273
/* Convert unit to lowercase for validation */
274274
char u[3] = { 0 };
275275
for (int i = 0; i < 2 && unit_ptr[i]; i++)
276-
u[i] = tolower((unsigned char)unit_ptr[i]);
276+
u[i] = ::tolower((unsigned char)unit_ptr[i]);
277277

278278
/* Validate unit and set multiplier */
279279
if (unit_len == 1 && u[0] == 'b') {
@@ -332,7 +332,7 @@ bool pgsql_variable_validate_maintenance_work_mem_v3(const char* value, const pa
332332
(void)session;
333333

334334
// Trim leading whitespace
335-
while (isspace((unsigned char)*value)) value++;
335+
while (fast_isspace((unsigned char)*value)) value++;
336336

337337
char* endptr;
338338
const char* num_start = value;
@@ -371,7 +371,7 @@ bool pgsql_variable_validate_maintenance_work_mem_v3(const char* value, const pa
371371
// Convert unit to lowercase for validation
372372
char u[3] = { 0 };
373373
for (int i = 0; i < 2 && unit_ptr[i]; i++)
374-
u[i] = tolower((unsigned char)unit_ptr[i]);
374+
u[i] = ::tolower((unsigned char)unit_ptr[i]);
375375

376376
// Validate units and set multipliers
377377
if (unit_len == 1 && u[0] == 'b') {
@@ -471,7 +471,7 @@ bool pgsql_variable_validate_search_path(const char* value, const params_t* para
471471

472472
while (*token && result) {
473473
/* skip leading whitespace */
474-
while (*token && isspace((unsigned char)*token)) token++;
474+
while (*token && fast_isspace((unsigned char)*token)) token++;
475475
if (*token == '\0') break;
476476

477477
const char* part_start = token;
@@ -508,7 +508,7 @@ bool pgsql_variable_validate_search_path(const char* value, const params_t* para
508508
}
509509
} else {
510510
// unquoted identifier or $user
511-
while (*token && *token != ',' && !isspace(*token)) token++;
511+
while (*token && *token != ',' && !fast_isspace(*token)) token++;
512512
part_len = (size_t)(token - part_start);
513513
if (part_len == 0 || part_len > 63) {
514514
result = false;
@@ -543,7 +543,7 @@ bool pgsql_variable_validate_search_path(const char* value, const params_t* para
543543
normalized[norm_pos] = '\0';
544544

545545
// skip whitespace after part
546-
while (*token && isspace(*token)) token++;
546+
while (*token && fast_isspace(*token)) token++;
547547

548548
// expect comma or end
549549
if (*token == ',') {

lib/ProxySQL_Config.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ int ProxySQL_Config::Read_Global_Variables_from_configfile(const char *prefix) {
9292
char *query=(char *)malloc(strlen(q)+strlen(prefix)+strlen(n)+strlen(value_string.c_str()));
9393
sprintf(query,q, prefix, n, value_string.c_str());
9494
//fprintf(stderr, "%s\n", query);
95-
admindb->execute(query);
95+
admindb->execute(query);
9696
free(query);
9797
}
9898
admindb->execute("PRAGMA foreign_keys = ON");

0 commit comments

Comments
 (0)