Skip to content

Commit 8c0c767

Browse files
committed
Sql sanitizer should by default treat double-quoted fragments as string literals
1 parent bb926c0 commit 8c0c767

File tree

24 files changed

+243
-42
lines changed

24 files changed

+243
-42
lines changed

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/config/internal/CommonConfig.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ public final class CommonConfig {
3434
private final Set<String> knownHttpRequestMethods;
3535
private final EnduserConfig enduserConfig;
3636
private final boolean statementSanitizationEnabled;
37+
private final boolean statementSanitizationAnsiQuotes;
3738
private final boolean sqlCommenterEnabled;
3839
private final boolean emitExperimentalHttpClientTelemetry;
3940
private final boolean emitExperimentalHttpServerTelemetry;
@@ -88,6 +89,8 @@ public CommonConfig(InstrumentationConfig config) {
8889
new ArrayList<>(HttpConstants.KNOWN_METHODS)));
8990
statementSanitizationEnabled =
9091
config.getBoolean("otel.instrumentation.common.db-statement-sanitizer.enabled", true);
92+
statementSanitizationAnsiQuotes =
93+
config.getBoolean("otel.instrumentation.common.db-statement-sanitizer.ansi-quotes", false);
9194
sqlCommenterEnabled =
9295
config.getBoolean(
9396
"otel.instrumentation.common.experimental.db-sqlcommenter.enabled", false);
@@ -142,6 +145,10 @@ public boolean isStatementSanitizationEnabled() {
142145
return statementSanitizationEnabled;
143146
}
144147

148+
public boolean isStatementSanitizationAnsiQuotes() {
149+
return statementSanitizationAnsiQuotes;
150+
}
151+
145152
public boolean isSqlCommenterEnabled() {
146153
return sqlCommenterEnabled;
147154
}

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/DbClientSpanNameExtractor.java

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,21 @@ public static <REQUEST> SpanNameExtractor<REQUEST> create(
3636
*/
3737
public static <REQUEST> SpanNameExtractor<REQUEST> create(
3838
SqlClientAttributesGetter<REQUEST, ?> getter) {
39-
return new SqlClientSpanNameExtractor<>(getter);
39+
return create(getter, SqlDialect.DEFAULT);
40+
}
41+
42+
/**
43+
* Returns a {@link SpanNameExtractor} that constructs the span name according to DB semantic
44+
* conventions: {@code <db.operation> <db.name>.<identifier>}.
45+
*
46+
* @see SqlStatementInfo#getOperation() used to extract {@code <db.operation>}.
47+
* @see DbClientAttributesGetter#getDbNamespace(Object) used to extract {@code <db.namespace>}.
48+
* @see SqlStatementInfo#getMainIdentifier() used to extract {@code <db.table>} or stored
49+
* procedure name.
50+
*/
51+
public static <REQUEST> SpanNameExtractor<REQUEST> create(
52+
SqlClientAttributesGetter<REQUEST, ?> getter, SqlDialect sqlDialect) {
53+
return new SqlClientSpanNameExtractor<>(getter, sqlDialect.ansiQuotes());
4054
}
4155

4256
private static final String DEFAULT_SPAN_NAME = "DB Query";
@@ -87,14 +101,19 @@ private static final class SqlClientSpanNameExtractor<REQUEST>
87101
extends DbClientSpanNameExtractor<REQUEST> {
88102

89103
private final SqlClientAttributesGetter<REQUEST, ?> getter;
104+
private final boolean ansiQuotes;
90105

91-
private SqlClientSpanNameExtractor(SqlClientAttributesGetter<REQUEST, ?> getter) {
106+
private SqlClientSpanNameExtractor(
107+
SqlClientAttributesGetter<REQUEST, ?> getter, boolean ansiQuotes) {
92108
this.getter = getter;
109+
this.ansiQuotes = ansiQuotes;
93110
}
94111

95112
@Override
96113
public String extract(REQUEST request) {
97114
String namespace = getter.getDbNamespace(request);
115+
SqlDialect dialect =
116+
SqlStatementSanitizerUtil.getDialect(getter.getDbSystem(request), ansiQuotes);
98117
Collection<String> rawQueryTexts = getter.getRawQueryTexts(request);
99118

100119
if (rawQueryTexts.isEmpty()) {
@@ -106,22 +125,22 @@ public String extract(REQUEST request) {
106125
return computeSpanName(namespace, null, null);
107126
}
108127
SqlStatementInfo sanitizedStatement =
109-
SqlStatementSanitizerUtil.sanitize(rawQueryTexts.iterator().next());
128+
SqlStatementSanitizerUtil.sanitize(rawQueryTexts.iterator().next(), dialect);
110129
return computeSpanName(
111130
namespace, sanitizedStatement.getOperation(), sanitizedStatement.getMainIdentifier());
112131
}
113132

114133
if (rawQueryTexts.size() == 1) {
115134
SqlStatementInfo sanitizedStatement =
116-
SqlStatementSanitizerUtil.sanitize(rawQueryTexts.iterator().next());
135+
SqlStatementSanitizerUtil.sanitize(rawQueryTexts.iterator().next(), dialect);
117136
String operation = sanitizedStatement.getOperation();
118137
if (isBatch(request)) {
119138
operation = "BATCH " + operation;
120139
}
121140
return computeSpanName(namespace, operation, sanitizedStatement.getMainIdentifier());
122141
}
123142

124-
MultiQuery multiQuery = MultiQuery.analyze(rawQueryTexts, false);
143+
MultiQuery multiQuery = MultiQuery.analyze(rawQueryTexts, dialect, false);
125144
return computeSpanName(
126145
namespace,
127146
multiQuery.getOperation() != null ? "BATCH " + multiQuery.getOperation() : "BATCH",

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/MultiQuery.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,12 +24,13 @@ private MultiQuery(
2424
}
2525

2626
static MultiQuery analyze(
27-
Collection<String> rawQueryTexts, boolean statementSanitizationEnabled) {
27+
Collection<String> rawQueryTexts, SqlDialect dialect, boolean statementSanitizationEnabled) {
2828
UniqueValue uniqueMainIdentifier = new UniqueValue();
2929
UniqueValue uniqueOperation = new UniqueValue();
3030
Set<String> uniqueStatements = new LinkedHashSet<>();
3131
for (String rawQueryText : rawQueryTexts) {
32-
SqlStatementInfo sanitizedStatement = SqlStatementSanitizerUtil.sanitize(rawQueryText);
32+
SqlStatementInfo sanitizedStatement =
33+
SqlStatementSanitizerUtil.sanitize(rawQueryText, dialect);
3334
String mainIdentifier = sanitizedStatement.getMainIdentifier();
3435
uniqueMainIdentifier.set(mainIdentifier);
3536
String operation = sanitizedStatement.getOperation();

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlClientAttributesExtractor.java

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,20 @@ public static <REQUEST, RESPONSE> SqlClientAttributesExtractorBuilder<REQUEST, R
6767
private final AttributeKey<String> oldSemconvTableAttribute;
6868
private final boolean statementSanitizationEnabled;
6969
private final boolean captureQueryParameters;
70+
private final boolean statementSanitizationAnsiQuotes;
7071

7172
SqlClientAttributesExtractor(
7273
SqlClientAttributesGetter<REQUEST, RESPONSE> getter,
7374
AttributeKey<String> oldSemconvTableAttribute,
7475
boolean statementSanitizationEnabled,
76+
boolean statementSanitizationAnsiQuotes,
7577
boolean captureQueryParameters) {
7678
this.getter = getter;
7779
this.oldSemconvTableAttribute = oldSemconvTableAttribute;
7880
// capturing query parameters disables statement sanitization
7981
this.statementSanitizationEnabled = !captureQueryParameters && statementSanitizationEnabled;
8082
this.captureQueryParameters = captureQueryParameters;
83+
this.statementSanitizationAnsiQuotes = statementSanitizationAnsiQuotes;
8184
internalNetworkExtractor = new InternalNetworkAttributesExtractor<>(getter, true, false);
8285
serverAttributesExtractor = ServerAttributesExtractor.create(getter);
8386
}
@@ -86,14 +89,18 @@ public static <REQUEST, RESPONSE> SqlClientAttributesExtractorBuilder<REQUEST, R
8689
@Override
8790
public void onStart(AttributesBuilder attributes, Context parentContext, REQUEST request) {
8891
Collection<String> rawQueryTexts = getter.getRawQueryTexts(request);
92+
SqlDialect dialect =
93+
SqlStatementSanitizerUtil.getDialect(
94+
getter.getDbSystem(request), statementSanitizationAnsiQuotes);
8995

9096
Long batchSize = getter.getBatchSize(request);
9197
boolean isBatch = batchSize != null && batchSize > 1;
9298

9399
if (SemconvStability.emitOldDatabaseSemconv()) {
94100
if (rawQueryTexts.size() == 1) { // for backcompat(?)
95101
String rawQueryText = rawQueryTexts.iterator().next();
96-
SqlStatementInfo sanitizedStatement = SqlStatementSanitizerUtil.sanitize(rawQueryText);
102+
SqlStatementInfo sanitizedStatement =
103+
SqlStatementSanitizerUtil.sanitize(rawQueryText, dialect);
97104
String operation = sanitizedStatement.getOperation();
98105
internalSet(
99106
attributes,
@@ -112,7 +119,8 @@ public void onStart(AttributesBuilder attributes, Context parentContext, REQUEST
112119
}
113120
if (rawQueryTexts.size() == 1) {
114121
String rawQueryText = rawQueryTexts.iterator().next();
115-
SqlStatementInfo sanitizedStatement = SqlStatementSanitizerUtil.sanitize(rawQueryText);
122+
SqlStatementInfo sanitizedStatement =
123+
SqlStatementSanitizerUtil.sanitize(rawQueryText, dialect);
116124
String operation = sanitizedStatement.getOperation();
117125
internalSet(
118126
attributes,
@@ -124,7 +132,8 @@ public void onStart(AttributesBuilder attributes, Context parentContext, REQUEST
124132
}
125133
} else if (rawQueryTexts.size() > 1) {
126134
MultiQuery multiQuery =
127-
MultiQuery.analyze(getter.getRawQueryTexts(request), statementSanitizationEnabled);
135+
MultiQuery.analyze(
136+
getter.getRawQueryTexts(request), dialect, statementSanitizationEnabled);
128137
internalSet(attributes, DB_QUERY_TEXT, join("; ", multiQuery.getStatements()));
129138

130139
String operation =

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlClientAttributesExtractorBuilder.java

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public final class SqlClientAttributesExtractorBuilder<REQUEST, RESPONSE> {
2121
AttributeKey<String> oldSemconvTableAttribute = DB_SQL_TABLE;
2222
boolean statementSanitizationEnabled = true;
2323
boolean captureQueryParameters = false;
24+
boolean setStatementSanitizationAnsiQuotes = false;
2425

2526
SqlClientAttributesExtractorBuilder(SqlClientAttributesGetter<REQUEST, RESPONSE> getter) {
2627
this.getter = getter;
@@ -49,6 +50,19 @@ public SqlClientAttributesExtractorBuilder<REQUEST, RESPONSE> setStatementSaniti
4950
return this;
5051
}
5152

53+
/**
54+
* Sets whether the SQL sanitizer should treat double-quoted fragments as string literals or
55+
* identifiers. By default, double quotes are used for string literals. When the sanitizer is able
56+
* to detect that the used database does not support double-quoted string literals then this flag
57+
* will be automatically switched.
58+
*/
59+
@CanIgnoreReturnValue
60+
public SqlClientAttributesExtractorBuilder<REQUEST, RESPONSE>
61+
setSetStatementSanitizationAnsiQuotes(boolean setStatementSanitizationAnsiQuotes) {
62+
this.setStatementSanitizationAnsiQuotes = setStatementSanitizationAnsiQuotes;
63+
return this;
64+
}
65+
5266
/**
5367
* Sets whether the query parameters should be captured as span attributes named {@code
5468
* db.query.parameter.<key>}. Enabling this option disables the statement sanitization. Disabled
@@ -70,6 +84,10 @@ public SqlClientAttributesExtractorBuilder<REQUEST, RESPONSE> setCaptureQueryPar
7084
*/
7185
public AttributesExtractor<REQUEST, RESPONSE> build() {
7286
return new SqlClientAttributesExtractor<>(
73-
getter, oldSemconvTableAttribute, statementSanitizationEnabled, captureQueryParameters);
87+
getter,
88+
oldSemconvTableAttribute,
89+
statementSanitizationEnabled,
90+
setStatementSanitizationAnsiQuotes,
91+
captureQueryParameters);
7492
}
7593
}

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlDialect.java

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,26 @@
77

88
/** Enumeration of sql dialects that are handled differently by {@link SqlStatementSanitizer}. */
99
public enum SqlDialect {
10-
DEFAULT,
11-
// couchbase uses double quotes for string literals
12-
COUCHBASE
10+
DEFAULT(false), // double quotes for string literals
11+
ANSI_QUOTES(true), // double quotes for identifiers, single quotes for string literals
12+
13+
CASSANDRA(true),
14+
CLICKHOUSE(true),
15+
COUCHBASE(false),
16+
GEODE(true),
17+
INFLUXDB(true);
18+
19+
private final boolean ansiQuotes;
20+
21+
SqlDialect(boolean ansiQuotes) {
22+
this.ansiQuotes = ansiQuotes;
23+
}
24+
25+
/**
26+
* Returns {@code true} if the dialect uses double quotes for identifiers (ANSI SQL standard),
27+
* {@code false} if double quotes are used for string literals.
28+
*/
29+
boolean ansiQuotes() {
30+
return ansiQuotes;
31+
}
1332
}

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlStatementSanitizer.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -45,15 +45,15 @@ public SqlStatementInfo sanitize(@Nullable String statement, SqlDialect dialect)
4545
// cache growing too large
4646
// https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/13180
4747
if (statement.length() > LARGE_STATEMENT_THRESHOLD) {
48-
return sanitizeImpl(statement, dialect);
48+
return sanitizeImpl(statement, dialect.ansiQuotes());
4949
}
5050
return sqlToStatementInfoCache.computeIfAbsent(
51-
CacheKey.create(statement, dialect), k -> sanitizeImpl(statement, dialect));
51+
CacheKey.create(statement, dialect), k -> sanitizeImpl(statement, dialect.ansiQuotes()));
5252
}
5353

54-
private static SqlStatementInfo sanitizeImpl(String statement, SqlDialect dialect) {
54+
private static SqlStatementInfo sanitizeImpl(String statement, boolean ansiQuotes) {
5555
supportability.incrementCounter(SQL_STATEMENT_SANITIZER_CACHE_MISS);
56-
return AutoSqlSanitizer.sanitize(statement, dialect);
56+
return AutoSqlSanitizer.sanitize(statement, ansiQuotes);
5757
}
5858

5959
// visible for tests

instrumentation-api-incubator/src/main/java/io/opentelemetry/instrumentation/api/incubator/semconv/db/SqlStatementSanitizerUtil.java

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,22 +5,45 @@
55

66
package io.opentelemetry.instrumentation.api.incubator.semconv.db;
77

8+
import io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlStatementSanitizer.CacheKey;
89
import io.opentelemetry.instrumentation.api.instrumenter.Instrumenter;
910
import io.opentelemetry.instrumentation.api.internal.InstrumenterContext;
11+
import io.opentelemetry.semconv.DbAttributes;
12+
import java.util.Arrays;
1013
import java.util.HashMap;
14+
import java.util.HashSet;
1115
import java.util.Map;
16+
import java.util.Set;
1217

1318
/**
1419
* Helper class for sanitizing sql that keeps sanitization results in {@link InstrumenterContext} so
1520
* that each statement would be sanitized only once for given {@link Instrumenter} call.
1621
*/
1722
class SqlStatementSanitizerUtil {
1823
private static final SqlStatementSanitizer sanitizer = SqlStatementSanitizer.create(true);
24+
private static final Set<String> dbsWithAnsiQuotes =
25+
new HashSet<>(
26+
Arrays.asList(
27+
DbAttributes.DbSystemNameValues.POSTGRESQL,
28+
"oracle",
29+
"h2",
30+
"hsqldb",
31+
"db2",
32+
"derby",
33+
"hanadb"));
1934

20-
static SqlStatementInfo sanitize(String queryText) {
21-
Map<String, SqlStatementInfo> map =
35+
static SqlStatementInfo sanitize(String queryText, SqlDialect dialect) {
36+
Map<CacheKey, SqlStatementInfo> map =
2237
InstrumenterContext.computeIfAbsent("sanitized-sql-map", unused -> new HashMap<>());
23-
return map.computeIfAbsent(queryText, sanitizer::sanitize);
38+
return map.computeIfAbsent(
39+
CacheKey.create(queryText, dialect),
40+
key -> sanitizer.sanitize(key.getStatement(), key.getDialect()));
41+
}
42+
43+
static SqlDialect getDialect(String dbSystem, boolean ansiQuotes) {
44+
return ansiQuotes || dbsWithAnsiQuotes.contains(dbSystem)
45+
? SqlDialect.ANSI_QUOTES
46+
: SqlDialect.DEFAULT;
2447
}
2548

2649
private SqlStatementSanitizerUtil() {}

instrumentation-api-incubator/src/main/jflex/SqlSanitizer.jflex

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ POSTGRE_PARAM_MARKER = "$"[0-9]*
3939
WHITESPACE = [ \t\r\n]+
4040

4141
%{
42-
static SqlStatementInfo sanitize(String statement, SqlDialect dialect) {
42+
static SqlStatementInfo sanitize(String statement, boolean ansiQuotes) {
4343
AutoSqlSanitizer sanitizer = new AutoSqlSanitizer(new java.io.StringReader(statement));
44-
sanitizer.dialect = dialect;
44+
sanitizer.ansiQuotes = ansiQuotes;
4545
try {
4646
while (!sanitizer.yyatEOF()) {
4747
int token = sanitizer.yylex();
@@ -114,7 +114,7 @@ WHITESPACE = [ \t\r\n]+
114114
private boolean insideComment = false;
115115
private Operation operation = NoOp.INSTANCE;
116116
private boolean extractionDone = false;
117-
private SqlDialect dialect;
117+
private boolean ansiQuotes; // whether double quotes are used for quoting identifiers or string literals
118118

119119
private void setOperation(Operation operation) {
120120
if (this.operation == NoOp.INSTANCE) {
@@ -534,13 +534,13 @@ WHITESPACE = [ \t\r\n]+
534534
}
535535

536536
{DOUBLE_QUOTED_STR} {
537-
if (dialect == SqlDialect.COUCHBASE) {
538-
builder.append('?');
539-
} else {
537+
if (ansiQuotes) {
540538
if (!insideComment && !extractionDone) {
541539
extractionDone = operation.handleIdentifier();
542540
}
543541
appendCurrentFragment();
542+
} else {
543+
builder.append('?');
544544
}
545545
if (isOverLimit()) return YYEOF;
546546
}

0 commit comments

Comments
 (0)