|
| 1 | +package io.opentelemetry.javaagent.instrumentation.vertx.v3_9.sql; |
| 2 | + |
| 3 | +import java.util.logging.Level; |
| 4 | +import java.util.logging.Logger; |
| 5 | +import java.util.regex.Pattern; |
| 6 | + |
| 7 | +public class SanitizeSQLString { |
| 8 | + private static final Logger logger = Logger.getLogger(SanitizeSQLString.class.getName()); |
| 9 | + // Replace single-quoted string literals: 'foo', with ? |
| 10 | + private static final Pattern SINGLE_QUOTE_STRING = |
| 11 | + Pattern.compile("'([^'\\\\]|\\\\.)*'"); |
| 12 | + |
| 13 | + // Replace double-quoted string literals: "foo", with ? |
| 14 | + // Note: double quotes in MySQL often quote identifiers, but some apps use them for strings. |
| 15 | + private static final Pattern DOUBLE_QUOTE_STRING = |
| 16 | + Pattern.compile("\"([^\"\\\\]|\\\\.)*\""); |
| 17 | + |
| 18 | + // Collapse IN lists: IN (1, 2, 'a') -> IN (?) |
| 19 | + private static final Pattern IN_CLAUSE = |
| 20 | + Pattern.compile("(?i)\\bIN\\s*\\([^)]*\\)"); |
| 21 | + |
| 22 | + // Numeric literal not adjacent to letters/dot/underscore to avoid replacing column names like col1 or 1.2.3 |
| 23 | + // Matches -123, 45.67, 0, .5 (we'll stick with -?\d+(\.\d+)? for safety) |
| 24 | + private static final Pattern NUMERIC_LITERAL = |
| 25 | + Pattern.compile("(?<![A-Za-z0-9_\\.])(-?\\d+(?:\\.\\d+)?)(?![A-Za-z0-9_\\.])"); |
| 26 | + |
| 27 | + // Optional: match SQL hex numbers like 0xABCD (treat as literal) |
| 28 | + private static final Pattern HEX_LITERAL = |
| 29 | + Pattern.compile("(?<![A-Za-z0-9_\\.])0x[0-9A-Fa-f]+(?![A-Za-z0-9_\\.])"); |
| 30 | + |
| 31 | + private static final Pattern DATE_LITERAL = |
| 32 | + Pattern.compile("(?i)(DATE|TIMESTAMP)\\s*'[^']*'"); |
| 33 | + |
| 34 | + public static String sanitize(String sql) { |
| 35 | + |
| 36 | + if (sql == null || sql.isEmpty()) { |
| 37 | + return sql; |
| 38 | + } |
| 39 | + String s = sql; |
| 40 | + try { |
| 41 | + |
| 42 | + s = SINGLE_QUOTE_STRING.matcher(s).replaceAll("\\?"); |
| 43 | + s = DOUBLE_QUOTE_STRING.matcher(s).replaceAll("\\?"); |
| 44 | + |
| 45 | + // 2) Collapse IN (...) lists to IN (?) |
| 46 | + s = IN_CLAUSE.matcher(s).replaceAll("IN (?)"); |
| 47 | + |
| 48 | + // 3) Replace hex literals (optional) |
| 49 | + s = HEX_LITERAL.matcher(s).replaceAll("?"); |
| 50 | + |
| 51 | + // 4) Replace numeric literals with ? |
| 52 | + s = NUMERIC_LITERAL.matcher(s).replaceAll("?"); |
| 53 | + s = DATE_LITERAL.matcher(s).replaceAll("$1 ?"); |
| 54 | + |
| 55 | + // 5) Normalize whitespace: collapse multiple spaces/newlines into single space |
| 56 | + s = s.replaceAll("\\s+", " ").trim(); |
| 57 | + |
| 58 | + } catch (Exception e) { |
| 59 | + logger.log(Level.WARNING, "failed to sanitize SQL string: " + sql, e); |
| 60 | + s = "mysql ??"; |
| 61 | + } |
| 62 | + return s; |
| 63 | + } |
| 64 | +} |
0 commit comments