-
Notifications
You must be signed in to change notification settings - Fork 1k
Don't cache sanitization results for large sql statements #13353
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8100515
c79cfd7
c10cb1c
bc71a6b
095dd7c
1e77213
9c3a251
cbeaa21
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ public final class SqlStatementSanitizer { | |
|
|
||
| private static final Cache<CacheKey, SqlStatementInfo> sqlToStatementInfoCache = | ||
| Cache.bounded(1000); | ||
| private static final int LARGE_STATEMENT_THRESHOLD = 10 * 1024; | ||
|
|
||
| public static SqlStatementSanitizer create(boolean statementSanitizationEnabled) { | ||
| return new SqlStatementSanitizer(statementSanitizationEnabled); | ||
|
|
@@ -40,12 +41,24 @@ public SqlStatementInfo sanitize(@Nullable String statement, SqlDialect dialect) | |
| if (!statementSanitizationEnabled || statement == null) { | ||
| return SqlStatementInfo.create(statement, null, null); | ||
| } | ||
| // sanitization result will not be cached for statements larger than the threshold to avoid | ||
| // cache growing too large | ||
| // https://github.com/open-telemetry/opentelemetry-java-instrumentation/issues/13180 | ||
| if (statement.length() > LARGE_STATEMENT_THRESHOLD) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i was thinking we could hash these larger statements instead of using the whole statement as the key, but that might be more computationally expensive, so this seems reasonable to me
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Actually my first attempt was to use hashing. Computing a hash for a very large statement can be more expensive than applying the sanitizer as the sanitizer also applies a size limit. My guess is that many of these super large statements could be dynamically generated so it is likely that the statement is executed only once and would not benefit from caching anyway. |
||
| return sanitizeImpl(statement, dialect); | ||
| } | ||
laurit marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return sqlToStatementInfoCache.computeIfAbsent( | ||
| CacheKey.create(statement, dialect), | ||
| k -> { | ||
| supportability.incrementCounter(SQL_STATEMENT_SANITIZER_CACHE_MISS); | ||
| return AutoSqlSanitizer.sanitize(statement, dialect); | ||
| }); | ||
| CacheKey.create(statement, dialect), k -> sanitizeImpl(statement, dialect)); | ||
| } | ||
|
|
||
| private static SqlStatementInfo sanitizeImpl(@Nullable String statement, SqlDialect dialect) { | ||
| supportability.incrementCounter(SQL_STATEMENT_SANITIZER_CACHE_MISS); | ||
| return AutoSqlSanitizer.sanitize(statement, dialect); | ||
| } | ||
|
|
||
| // visible for tests | ||
| static boolean isCached(String statement) { | ||
| return sqlToStatementInfoCache.get(CacheKey.create(statement, SqlDialect.DEFAULT)) != null; | ||
| } | ||
|
|
||
| @AutoValue | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,27 @@ | ||
| /* | ||
| * Copyright The OpenTelemetry Authors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package io.opentelemetry.instrumentation.api.incubator.semconv.db; | ||
|
|
||
| import io.opentelemetry.instrumentation.api.instrumenter.Instrumenter; | ||
| import io.opentelemetry.instrumentation.api.internal.InstrumenterContext; | ||
| import java.util.HashMap; | ||
| import java.util.Map; | ||
|
|
||
| /** | ||
| * Helper class for sanitizing sql that keeps sanitization results in {@link InstrumenterContext} so | ||
| * that each statement would be sanitized only once for given {@link Instrumenter} call. | ||
| */ | ||
| class SqlStatementSanitizerUtil { | ||
| private static final SqlStatementSanitizer sanitizer = SqlStatementSanitizer.create(true); | ||
|
|
||
| static SqlStatementInfo sanitize(String queryText) { | ||
| Map<String, SqlStatementInfo> map = | ||
| InstrumenterContext.computeIfAbsent("sanitized-sql-map", unused -> new HashMap<>()); | ||
| return map.computeIfAbsent(queryText, sanitizer::sanitize); | ||
| } | ||
|
|
||
| private SqlStatementSanitizerUtil() {} | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| /* | ||
| * Copyright The OpenTelemetry Authors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package io.opentelemetry.instrumentation.api.internal; | ||
|
|
||
| import io.opentelemetry.instrumentation.api.instrumenter.AttributesExtractor; | ||
| import io.opentelemetry.instrumentation.api.instrumenter.Instrumenter; | ||
| import io.opentelemetry.instrumentation.api.instrumenter.SpanNameExtractor; | ||
| import java.util.HashMap; | ||
| import java.util.Map; | ||
| import java.util.function.Function; | ||
|
|
||
| /** | ||
| * Helper class for sharing computed values between different {@link AttributesExtractor}s and | ||
| * {@link SpanNameExtractor} called in the start phase of the {@link Instrumenter}. | ||
| * | ||
| * <p>This class is internal and is hence not for public use. Its APIs are unstable and can change | ||
| * at any time. | ||
| */ | ||
| public final class InstrumenterContext { | ||
| private static final ThreadLocal<InstrumenterContext> instrumenterContext = | ||
| new ThreadLocal<InstrumenterContext>() { | ||
| @Override | ||
| protected InstrumenterContext initialValue() { | ||
| return new InstrumenterContext(); | ||
| } | ||
| }; | ||
|
|
||
| private final Map<String, Object> map = new HashMap<>(); | ||
|
|
||
| private InstrumenterContext() {} | ||
|
|
||
| @SuppressWarnings("unchecked") | ||
| public static <T> T computeIfAbsent(String key, Function<String, T> function) { | ||
| return (T) get().computeIfAbsent(key, function); | ||
| } | ||
|
|
||
| // visible for testing | ||
| static Map<String, Object> get() { | ||
| return instrumenterContext.get().map; | ||
| } | ||
|
|
||
| public static void reset() { | ||
| instrumenterContext.remove(); | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,78 @@ | ||
| /* | ||
| * Copyright The OpenTelemetry Authors | ||
| * SPDX-License-Identifier: Apache-2.0 | ||
| */ | ||
|
|
||
| package io.opentelemetry.instrumentation.api.internal; | ||
|
|
||
| import static io.opentelemetry.instrumentation.testing.junit.db.SemconvStabilityUtil.maybeStable; | ||
| import static org.assertj.core.api.Assertions.assertThat; | ||
|
|
||
| import io.opentelemetry.api.common.Attributes; | ||
| import io.opentelemetry.api.common.AttributesBuilder; | ||
| import io.opentelemetry.context.Context; | ||
| import io.opentelemetry.instrumentation.api.incubator.semconv.db.DbClientSpanNameExtractor; | ||
| import io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlClientAttributesExtractor; | ||
| import io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlClientAttributesGetter; | ||
| import io.opentelemetry.instrumentation.api.incubator.semconv.db.SqlStatementInfo; | ||
| import io.opentelemetry.instrumentation.api.instrumenter.AttributesExtractor; | ||
| import io.opentelemetry.instrumentation.api.instrumenter.SpanNameExtractor; | ||
| import io.opentelemetry.instrumentation.testing.internal.AutoCleanupExtension; | ||
| import io.opentelemetry.semconv.incubating.DbIncubatingAttributes; | ||
| import java.util.Collection; | ||
| import java.util.Collections; | ||
| import java.util.Map; | ||
| import org.junit.jupiter.api.Test; | ||
| import org.junit.jupiter.api.extension.RegisterExtension; | ||
|
|
||
| class InstrumenterContextTest { | ||
| @RegisterExtension static final AutoCleanupExtension cleanup = AutoCleanupExtension.create(); | ||
|
|
||
| @SuppressWarnings({"unchecked", "deprecation"}) // using deprecated DB_SQL_TABLE | ||
| @Test | ||
| void testSqlSanitizer() { | ||
| String testQuery = "SELECT name FROM test WHERE id = 1"; | ||
| SqlClientAttributesGetter<Object, Void> getter = | ||
| new SqlClientAttributesGetter<Object, Void>() { | ||
|
|
||
| @Override | ||
| public Collection<String> getRawQueryTexts(Object request) { | ||
| return Collections.singletonList(testQuery); | ||
| } | ||
| }; | ||
| SpanNameExtractor<Object> spanNameExtractor = DbClientSpanNameExtractor.create(getter); | ||
| AttributesExtractor<Object, Void> attributesExtractor = | ||
| SqlClientAttributesExtractor.create(getter); | ||
|
|
||
| InstrumenterContext.reset(); | ||
| cleanup.deferCleanup(InstrumenterContext::reset); | ||
|
|
||
| assertThat(InstrumenterContext.get()).isEmpty(); | ||
| assertThat(spanNameExtractor.extract(null)).isEqualTo("SELECT test"); | ||
| // verify that sanitized statement was cached, see SqlStatementSanitizerUtil | ||
| assertThat(InstrumenterContext.get()).containsKey("sanitized-sql-map"); | ||
| Map<String, SqlStatementInfo> sanitizedMap = | ||
| (Map<String, SqlStatementInfo>) InstrumenterContext.get().get("sanitized-sql-map"); | ||
| assertThat(sanitizedMap).containsKey(testQuery); | ||
|
|
||
| // replace cached sanitization result to verify it is used | ||
| sanitizedMap.put( | ||
| testQuery, | ||
| SqlStatementInfo.create("SELECT name2 FROM test2 WHERE id = ?", "SELECT", "test2")); | ||
| { | ||
| AttributesBuilder builder = Attributes.builder(); | ||
| attributesExtractor.onStart(builder, Context.root(), null); | ||
| assertThat(builder.build().get(maybeStable(DbIncubatingAttributes.DB_SQL_TABLE))) | ||
| .isEqualTo("test2"); | ||
| } | ||
|
|
||
| // clear cached value to see whether it gets recomputed correctly | ||
| sanitizedMap.clear(); | ||
| { | ||
| AttributesBuilder builder = Attributes.builder(); | ||
| attributesExtractor.onStart(builder, Context.root(), null); | ||
| assertThat(builder.build().get(maybeStable(DbIncubatingAttributes.DB_SQL_TABLE))) | ||
| .isEqualTo("test"); | ||
| } | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since these are removed in the stable semconv we don't need to force users to implement them.