|
19 | 19 |
|
20 | 20 | import org.apache.kafka.common.config.AbstractConfig; |
21 | 21 | import org.apache.kafka.common.config.ConfigDef; |
| 22 | +import org.apache.kafka.common.config.ConfigException; |
22 | 23 | import org.apache.kafka.common.metrics.Metrics; |
23 | 24 |
|
24 | 25 | import java.lang.reflect.InvocationTargetException; |
@@ -136,9 +137,54 @@ public class InklessConfig extends AbstractConfig { |
136 | 137 | private static final int FETCH_DATA_THREAD_POOL_SIZE_DEFAULT = 32; |
137 | 138 |
|
138 | 139 | public static final String FETCH_METADATA_THREAD_POOL_SIZE_CONFIG = "fetch.metadata.thread.pool.size"; |
139 | | - public static final String FETCH_METADATA_THREAD_POOL_SIZE_DOC = "Thread pool size to concurrently fetch metadata from batch coordinator"; |
| 140 | + public static final String FETCH_METADATA_THREAD_POOL_SIZE_DOC = "Thread pool size to concurrently fetch metadata from batch coordinator. " |
| 141 | + + "Note: This executor is shared between hot and cold path requests. The hot/cold path separation " |
| 142 | + + "only applies to data fetching (after metadata is retrieved). A burst of lagging consumer requests " |
| 143 | + + "can still compete with recent consumer requests at the metadata layer. For workloads with significant " |
| 144 | + + "lagging consumer traffic, consider increasing this value proportionally to the combined " |
| 145 | + + "fetch.data.thread.pool.size + fetch.lagging.consumer.thread.pool.size to prevent metadata fetching " |
| 146 | + + "from becoming a bottleneck in mixed hot/cold workloads."; |
140 | 147 | private static final int FETCH_METADATA_THREAD_POOL_SIZE_DEFAULT = 8; |
141 | 148 |
|
| 149 | + public static final String FETCH_LAGGING_CONSUMER_THREAD_POOL_SIZE_CONFIG = "fetch.lagging.consumer.thread.pool.size"; |
| 150 | + public static final String FETCH_LAGGING_CONSUMER_THREAD_POOL_SIZE_DOC = "Thread pool size for lagging consumer fetch requests (consumers reading old data). " |
| 151 | + + "Set to 0 to disable the lagging consumer feature (all requests will use the recent data path). " |
| 152 | + + "The default value of 16 is designed as approximately half of the default fetch.data.thread.pool.size (32), " |
| 153 | + + "providing sufficient capacity for typical cold storage access patterns while leaving headroom for the hot path. " |
| 154 | + + "The queue capacity is automatically set to thread.pool.size * 100, providing burst buffering " |
| 155 | + + "(e.g., 16 threads = 1600 queue capacity ≈ 8 seconds buffer at 200 req/s). " |
| 156 | + + "Tune based on lagging consumer SLA and expected load patterns."; |
| 157 | + // Default 16: Designed as half of default fetch.data.thread.pool.size (32), sufficient for typical |
| 158 | + // cold storage access patterns while leaving headroom for hot path. Tune based on lagging consumer SLA. |
| 159 | + private static final int FETCH_LAGGING_CONSUMER_THREAD_POOL_SIZE_DEFAULT = 16; |
| 160 | + |
| 161 | + public static final String FETCH_LAGGING_CONSUMER_THRESHOLD_MS_CONFIG = "fetch.lagging.consumer.threshold.ms"; |
| 162 | + public static final String FETCH_LAGGING_CONSUMER_THRESHOLD_MS_DOC = "The time threshold in milliseconds to distinguish between recent and lagging consumers. " |
| 163 | + + "Fetch requests for data strictly older than this threshold (dataAge > threshold, based on batch timestamp) will use the lagging consumer path. " |
| 164 | + + "Set to -1 to use the default heuristic: the cache expiration lifespan. " |
| 165 | + + "This provides a grace period ensuring data remains in cache before being considered 'lagging', " |
| 166 | + + "accounting for cache warm-up and typical consumer lag variations. " |
| 167 | + + "Must be >= cache expiration lifespan (see " + CONSUME_CACHE_EXPIRATION_LIFESPAN_SEC_CONFIG + "). " |
| 168 | + + "This is a startup-only configuration (no dynamic reconfiguration support). " |
| 169 | + + "Both threshold and cache lifespan must be set together at startup to maintain the constraint."; |
| 170 | + /** |
| 171 | + * Default value for {@link #FETCH_LAGGING_CONSUMER_THRESHOLD_MS_CONFIG}. |
| 172 | + * A value of -1 means "auto-detect from cache TTL" - the {@link #fetchLaggingConsumerThresholdMs()} method |
| 173 | + * will automatically use the cache expiration lifespan as the effective threshold. |
| 174 | + */ |
| 175 | + private static final int FETCH_LAGGING_CONSUMER_THRESHOLD_MS_DEFAULT = -1; |
| 176 | + |
| 177 | + public static final String FETCH_LAGGING_CONSUMER_REQUEST_RATE_LIMIT_CONFIG = "fetch.lagging.consumer.request.rate.limit"; |
| 178 | + public static final String FETCH_LAGGING_CONSUMER_REQUEST_RATE_LIMIT_DOC = "Maximum requests per second for lagging consumer data fetches. " |
| 179 | + + "Set to 0 to disable rate limiting. " |
| 180 | + + "The upper bound of 10000 req/s is a safety limit to prevent misconfiguration. For high-throughput systems, " |
| 181 | + + "consider the relationship between this rate limit, thread pool size, and storage backend capacity. " |
| 182 | + + "At the default rate of 200 req/s with ~50ms per request latency, this allows ~10 concurrent requests."; |
| 183 | + // Default 200 req/s: Conservative limit based on typical object storage GET request costs and latency. |
| 184 | + // At ~50ms per request, 200 req/s = ~10 concurrent requests, balancing throughput with cost control. |
| 185 | + // Tune based on storage backend capacity and budget constraints. |
| 186 | + private static final int FETCH_LAGGING_CONSUMER_REQUEST_RATE_LIMIT_DEFAULT = 200; |
| 187 | + |
142 | 188 | public static final String FETCH_FIND_BATCHES_MAX_BATCHES_PER_PARTITION_CONFIG = "fetch.find.batches.max.per.partition"; |
143 | 189 | public static final String FETCH_FIND_BATCHES_MAX_BATCHES_PER_PARTITION_DOC = "The maximum number of batches to find per partition when processing a fetch request. " |
144 | 190 | + "A value of 0 means all available batches are fetched. " |
@@ -322,6 +368,32 @@ public static ConfigDef configDef() { |
322 | 368 | ConfigDef.Importance.LOW, |
323 | 369 | FETCH_METADATA_THREAD_POOL_SIZE_DOC |
324 | 370 | ); |
| 371 | + configDef.define( |
| 372 | + FETCH_LAGGING_CONSUMER_THREAD_POOL_SIZE_CONFIG, |
| 373 | + ConfigDef.Type.INT, |
| 374 | + FETCH_LAGGING_CONSUMER_THREAD_POOL_SIZE_DEFAULT, |
| 375 | + ConfigDef.Range.atLeast(0), |
| 376 | + ConfigDef.Importance.LOW, |
| 377 | + FETCH_LAGGING_CONSUMER_THREAD_POOL_SIZE_DOC |
| 378 | + ); |
| 379 | + configDef.define( |
| 380 | + FETCH_LAGGING_CONSUMER_THRESHOLD_MS_CONFIG, |
| 381 | + ConfigDef.Type.LONG, |
| 382 | + FETCH_LAGGING_CONSUMER_THRESHOLD_MS_DEFAULT, |
| 383 | + ConfigDef.Range.atLeast(-1), |
| 384 | + ConfigDef.Importance.MEDIUM, |
| 385 | + FETCH_LAGGING_CONSUMER_THRESHOLD_MS_DOC |
| 386 | + ); |
| 387 | + configDef.define( |
| 388 | + FETCH_LAGGING_CONSUMER_REQUEST_RATE_LIMIT_CONFIG, |
| 389 | + ConfigDef.Type.INT, |
| 390 | + FETCH_LAGGING_CONSUMER_REQUEST_RATE_LIMIT_DEFAULT, |
| 391 | + ConfigDef.Range.between(0, 10000), |
| 392 | + // Safety limit to prevent misconfiguration. For high-throughput systems, |
| 393 | + // consider the relationship between this rate limit, thread pool size, and storage backend capacity. |
| 394 | + ConfigDef.Importance.MEDIUM, |
| 395 | + FETCH_LAGGING_CONSUMER_REQUEST_RATE_LIMIT_DOC |
| 396 | + ); |
325 | 397 | configDef.define( |
326 | 398 | FETCH_FIND_BATCHES_MAX_BATCHES_PER_PARTITION_CONFIG, |
327 | 399 | ConfigDef.Type.INT, |
@@ -362,7 +434,69 @@ public InklessConfig(final AbstractConfig config) { |
362 | 434 | } |
363 | 435 |
|
364 | 436 | public InklessConfig(final Map<String, ?> props) { |
365 | | - super(configDef(), props); |
| 437 | + super(validate(props), props); |
| 438 | + } |
| 439 | + |
| 440 | + private static ConfigDef validate(final Map<String, ?> props) { |
| 441 | + final ConfigDef configDef = configDef(); |
| 442 | + // Parse the properties using ConfigDef directly for validation. This avoids creating a |
| 443 | + // temporary AbstractConfig instance while still leveraging the same parsing and defaulting |
| 444 | + // logic that AbstractConfig would use. Note: We still parse twice (once here, once in super()), |
| 445 | + // but this avoids the overhead of creating an AbstractConfig instance. This is necessary to |
| 446 | + // avoid 'this-escape' warnings in JDK 23+ and ensure super() is the first statement for JDK 17. |
| 447 | + // The performance impact is minimal since config parsing only happens at startup. |
| 448 | + final Map<String, Object> parsedProps = configDef.parse(props); |
| 449 | + |
| 450 | + final long thresholdMs = |
| 451 | + ((Number) parsedProps.get(FETCH_LAGGING_CONSUMER_THRESHOLD_MS_CONFIG)).longValue(); |
| 452 | + final int cacheLifespanSec = |
| 453 | + ((Number) parsedProps.get(CONSUME_CACHE_EXPIRATION_LIFESPAN_SEC_CONFIG)).intValue(); |
| 454 | + final long lifespanMs = Duration.ofSeconds(cacheLifespanSec).toMillis(); |
| 455 | + |
| 456 | + // Validate threshold is not less than cache lifespan (unless using default heuristic). |
| 457 | + // If threshold < cache lifespan, we'd route requests for potentially cached data to the |
| 458 | + // cold path, defeating the cache and unnecessarily loading the cold path/storage backend. |
| 459 | + // |
| 460 | + // Note: This validation occurs at construction time. These configurations are startup-only |
| 461 | + // and do not support dynamic reconfiguration. Both threshold and cache lifespan must be set |
| 462 | + // together at startup to maintain the constraint that threshold >= cache lifespan. |
| 463 | + // |
| 464 | + // Explicitly reject threshold=0 with a clear error message: While threshold=0 would always fail |
| 465 | + // the cache lifespan validation below (since minimum cache lifespan is 10 seconds = 10000ms), |
| 466 | + // we check it explicitly here to provide a more specific error message that explains why 0 is |
| 467 | + // invalid. With threshold=0, the runtime check (dataAge > threshold) would route almost all cached |
| 468 | + // data (anything with dataAge > 0) to the cold path, defeating the cache. Only data with |
| 469 | + // dataAge == 0 would use the hot path, which is negligible. |
| 470 | + if (thresholdMs == 0) { |
| 471 | + throw new ConfigException( |
| 472 | + FETCH_LAGGING_CONSUMER_THRESHOLD_MS_CONFIG, |
| 473 | + thresholdMs, |
| 474 | + "Lagging consumer threshold cannot be 0. Use -1 to auto-detect from cache TTL, or set a value >= cache lifespan (" |
| 475 | + + lifespanMs + "ms). Threshold=0 would route almost all cached data to the cold path, defeating the cache." |
| 476 | + ); |
| 477 | + } |
| 478 | + // |
| 479 | + // Minimum allowed value: threshold == cache lifespan (>=, not >) is valid because: |
| 480 | + // - The runtime check uses dataAge > threshold (strictly greater), so dataAge == threshold uses hot path |
| 481 | + // - Data can still be in cache at exactly TTL seconds old (cache expiration runs periodically) |
| 482 | + // - With threshold == cache lifespan, when dataAge == cache lifespan, data might still be cached |
| 483 | + // and correctly uses hot path. When dataAge > cache lifespan, data is expired and uses cold path. |
| 484 | + // This ensures we only route data to cold path after it's guaranteed to be expired from cache. |
| 485 | + // |
| 486 | + // Special case: thresholdMs == -1 is explicitly excluded from validation (condition checks != -1) |
| 487 | + // because fetchLaggingConsumerThresholdMs() will automatically use cache lifespan as the effective |
| 488 | + // runtime value, which is always >= cache lifespan by definition. This design allows operators |
| 489 | + // to use -1 as a "use cache TTL" heuristic without needing to know the exact cache lifespan value. |
| 490 | + if (thresholdMs != -1 && thresholdMs < lifespanMs) { |
| 491 | + throw new ConfigException( |
| 492 | + FETCH_LAGGING_CONSUMER_THRESHOLD_MS_CONFIG, |
| 493 | + thresholdMs, |
| 494 | + "Lagging consumer threshold (" + thresholdMs + "ms) must be >= cache lifespan (" |
| 495 | + + lifespanMs + "ms) to avoid routing requests for cached data to the lagging path." |
| 496 | + ); |
| 497 | + } |
| 498 | + |
| 499 | + return configDef; |
366 | 500 | } |
367 | 501 |
|
368 | 502 | @SuppressWarnings("unchecked") |
@@ -459,6 +593,34 @@ public int fetchMetadataThreadPoolSize() { |
459 | 593 | return getInt(FETCH_METADATA_THREAD_POOL_SIZE_CONFIG); |
460 | 594 | } |
461 | 595 |
|
| 596 | + public int fetchLaggingConsumerThreadPoolSize() { |
| 597 | + return getInt(FETCH_LAGGING_CONSUMER_THREAD_POOL_SIZE_CONFIG); |
| 598 | + } |
| 599 | + |
| 600 | + /** |
| 601 | + * Returns the effective lagging consumer threshold in milliseconds. |
| 602 | + * <p> |
| 603 | + * If the configured value is -1 (auto), this method returns the cache expiration lifespan, |
| 604 | + * which serves as the default heuristic. This ensures the effective threshold is always >= cache |
| 605 | + * lifespan, which is why validation in the constructor skips threshold=-1 (it will automatically |
| 606 | + * use cache lifespan at runtime). |
| 607 | + * </p> |
| 608 | + * |
| 609 | + * @return the effective threshold in milliseconds (cache lifespan if configured as -1, otherwise the configured value) |
| 610 | + */ |
| 611 | + public long fetchLaggingConsumerThresholdMs() { |
| 612 | + final long configuredValue = getLong(FETCH_LAGGING_CONSUMER_THRESHOLD_MS_CONFIG); |
| 613 | + if (configuredValue == -1) { |
| 614 | + // Use heuristic: cache TTL (provides grace period for recent data) |
| 615 | + return Duration.ofSeconds(getInt(CONSUME_CACHE_EXPIRATION_LIFESPAN_SEC_CONFIG)).toMillis(); |
| 616 | + } |
| 617 | + return configuredValue; |
| 618 | + } |
| 619 | + |
| 620 | + public int fetchLaggingConsumerRequestRateLimit() { |
| 621 | + return getInt(FETCH_LAGGING_CONSUMER_REQUEST_RATE_LIMIT_CONFIG); |
| 622 | + } |
| 623 | + |
462 | 624 | public int maxBatchesPerPartitionToFind() { |
463 | 625 | return getInt(FETCH_FIND_BATCHES_MAX_BATCHES_PER_PARTITION_CONFIG); |
464 | 626 | } |
|
0 commit comments