Skip to content

Commit f0ff5c2

Browse files
authored
Merge branch 'main' into configure-head-chunks-write-queue-size
2 parents fa7dac6 + 2e0f64d commit f0ff5c2

19 files changed

+156
-40
lines changed

.github/workflows/ci.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ on:
99
jobs:
1010
lint:
1111
runs-on: ubuntu-latest
12-
container: quay.io/cortexproject/cortex-jsonnet-build-image:e63d87f
12+
container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda
1313
steps:
1414
- uses: actions/checkout@v2
1515
name: Checkout
@@ -23,7 +23,7 @@ jobs:
2323
run: make lint-playbooks
2424
build:
2525
runs-on: ubuntu-latest
26-
container: quay.io/cortexproject/cortex-jsonnet-build-image:e63d87f
26+
container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda
2727
steps:
2828
- uses: actions/checkout@v2
2929
name: Checkout
@@ -34,7 +34,7 @@ jobs:
3434
run: make build-mixin
3535
readme:
3636
runs-on: ubuntu-latest
37-
container: quay.io/cortexproject/cortex-jsonnet-build-image:e63d87f
37+
container: quay.io/cortexproject/cortex-jsonnet-build-image:e158eda
3838
steps:
3939
- uses: actions/checkout@v2
4040
name: Checkout

CHANGELOG.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,18 @@
11
# Changelog
22

33
## master / unreleased
4+
* [CHANGE] Upgrade memcached to 1.6.23-alpine and memcached-exporter to v0.14.2
5+
* [CHANGE] Use cortex v1.16.0
6+
* [ENHANCEMENT] Enable frontend query stats by default
7+
* [ENHANCEMENT] Enable ruler query stats by default
8+
9+
## 1.15.3 / 2023-11-24
10+
* [CHANGE] Add default instance max series for ingesters
11+
* [CHANGE] Add default instance max inflight pushes for distributors
12+
* [CHANGE] Remove mem-ballast from distributor and querier.
13+
* [CHANGE] Increase cpu requests for querier to 2.
14+
* [CHANGE] Configure GOMAXPROCS and GOMEMLIMIT for all cortex modules based on cpu and memory requests or limits
15+
* [CHANGE] Add default tenant shard sizes
416
* [CHANGE] Use cortex v1.15.3
517
* [CHANGE] Azure storage endpoint suffix is set to `blob.core.windows.net` for backward compatibility
618
* [ENHANCEMENT] Configure head_chunks_write_queue_size to 1 million to avoid high latency when shipping

README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,15 @@ To generate the YAMLs for deploying Cortex:
1717

1818
```console
1919
$ # make sure to be outside of GOPATH or a go.mod project
20-
$ GO111MODULE=on go install github.com/grafana/tanka/cmd/tk@v0.24.0
20+
$ GO111MODULE=on go install github.com/grafana/tanka/cmd/tk@v0.26.0
2121
$ GO111MODULE=on go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/[email protected]
2222
```
2323

2424
1. Initialise the Tanka repo, install the Cortex and Kubernetes Jsonnet libraries.
2525

2626
```console
2727
$ mkdir <name> && cd <name>
28-
$ tk init --k8s=1.24 # this includes github.com/jsonnet-libs/k8s-libsonnet/1.24@main
28+
$ tk init --k8s=1.26 # this includes github.com/jsonnet-libs/k8s-libsonnet/1.26@main
2929
$ jb install github.com/cortexproject/cortex-jsonnet/cortex@main
3030
```
3131

build-image/Dockerfile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# Build jsonnet
2-
FROM alpine:3.17 AS jsonnet-builder
2+
FROM alpine:3.18 AS jsonnet-builder
33
RUN apk add --no-cache git make g++
44
RUN git clone https://github.com/google/jsonnet && \
55
git -C jsonnet checkout v0.20.0 && \
@@ -8,7 +8,7 @@ RUN git clone https://github.com/google/jsonnet && \
88
cp jsonnet/jsonnetfmt /usr/bin
99

1010
# Build jb
11-
FROM alpine:3.17 AS jb-builder
11+
FROM alpine:3.18 AS jb-builder
1212
ARG JSONNET_BUNDLER_VERSION=0.5.1
1313
ARG JSONNET_BUNDLER_CHECKSUM="f5bccc94d28fbbe8ad1d46fd4f208619e45d368a5d7924f6335f4ecfa0605c85 /usr/bin/jb"
1414
RUN apk add --no-cache curl
@@ -17,19 +17,19 @@ RUN echo "${JSONNET_BUNDLER_CHECKSUM}" | sha256sum -c || (printf "wanted: %s\n
1717
RUN chmod +x /usr/bin/jb
1818

1919
# Build tanka
20-
FROM alpine:3.17 AS tk-builder
21-
ARG TANKA_VERSION=0.24.0
22-
ARG TANKA_CHECKSUM="82c8c533c29eefea0af9c28f487203b19dec84ce2624702f99196e777f946ddc /usr/bin/tk"
20+
FROM alpine:3.18 AS tk-builder
21+
ARG TANKA_VERSION=0.26.0
22+
ARG TANKA_CHECKSUM="089796ae2ce65390501b2c68ceca1ce99ff12787d5ae3b4823c825a07e6e22f4 /usr/bin/tk"
2323
RUN apk add --no-cache curl
2424
RUN curl -fSL -o "/usr/bin/tk" "https://github.com/grafana/tanka/releases/download/v${TANKA_VERSION}/tk-linux-amd64"
2525
RUN echo "${TANKA_CHECKSUM}" | sha256sum -c || (printf "wanted: %s\n got: %s\n" "${TANKA_CHECKSUM}" "$(sha256sum /usr/bin/tk)"; exit 1)
2626
RUN chmod +x /usr/bin/tk
2727

2828
# Build mixtool
29-
FROM golang:1.20-alpine AS mixtool-builder
29+
FROM golang:1.21-alpine AS mixtool-builder
3030
RUN GO111MODULE=on go install github.com/monitoring-mixins/mixtool/cmd/mixtool@ae18e31161ea10545b9c1ac0d23c10122f2c12b5
3131

32-
FROM alpine:3.17
32+
FROM alpine:3.18
3333
RUN apk add --no-cache git make libgcc libstdc++ zip findutils sed yq
3434
COPY --from=jsonnet-builder /usr/bin/jsonnetfmt /usr/bin
3535
COPY --from=jsonnet-builder /usr/bin/jsonnet /usr/bin

cortex/alertmanager.libsonnet

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
local volumeMount = $.core.v1.volumeMount,
44
local volume = $.core.v1.volume,
55
local container = $.core.v1.container,
6+
local envType = container.envType,
67
local statefulSet = $.apps.v1.statefulSet,
78
local service = $.core.v1.service,
89
local configMap = $.core.v1.configMap,
@@ -96,6 +97,7 @@
9697
if $._config.alertmanager_enabled then
9798
container.new('alertmanager', $._images.alertmanager) +
9899
container.withPorts($.util.defaultPorts + mode.ports) +
100+
container.withEnvMap($.alertmanager_env_map) +
99101
container.withEnvMixin([container.envType.fromFieldPath('POD_IP', 'status.podIP')]) +
100102
container.withArgsMixin(
101103
$.util.mapToFlags($.alertmanager_args) +
@@ -109,9 +111,13 @@
109111
) +
110112
$.util.resourcesRequests('100m', '1Gi') +
111113
$.util.readinessProbe +
114+
$.go_container_mixin +
112115
$.jaeger_mixin
113116
else {},
114117

118+
alertmanager_env_map:: {
119+
},
120+
115121
alertmanager_statefulset:
116122
if $._config.alertmanager_enabled then
117123
statefulSet.new('alertmanager', $._config.alertmanager.replicas, [$.alertmanager_container], $.alertmanager_pvc) +

cortex/compactor.libsonnet

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
local container = $.core.v1.container,
3+
local envType = container.envType,
34
local pvc = $.core.v1.persistentVolumeClaim,
45
local statefulSet = $.apps.v1.statefulSet,
56
local volumeMount = $.core.v1.volumeMount,
@@ -43,13 +44,18 @@
4344
container.new('compactor', $._images.compactor) +
4445
container.withPorts($.compactor_ports) +
4546
container.withArgsMixin($.util.mapToFlags($.compactor_args)) +
47+
container.withEnvMap($.compactor_env_map) +
4648
container.withVolumeMountsMixin([volumeMount.new('compactor-data', '/data')]) +
4749
// Do not limit compactor CPU and request enough cores to honor configured max concurrency.
48-
$.util.resourcesRequests($._config.cortex_compactor_max_concurrency, '6Gi') +
50+
$.util.resourcesRequests($._config.cortex_compactor_max_concurrency, '5Gi') +
4951
$.util.resourcesLimits(null, '6Gi') +
5052
$.util.readinessProbe +
53+
$.go_container_mixin +
5154
$.jaeger_mixin,
5255

56+
compactor_env_map:: {
57+
},
58+
5359
newCompactorStatefulSet(name, container)::
5460
statefulSet.new(name, 1, [container], compactor_data_pvc) +
5561
statefulSet.mixin.spec.withServiceName(name) +

cortex/config.libsonnet

Lines changed: 47 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -260,6 +260,8 @@
260260

261261
// No retention for now.
262262
compactor_blocks_retention_period: '0',
263+
264+
ingestion_tenant_shard_size: 3,
263265
},
264266

265267
medium_small_user:: {
@@ -277,6 +279,8 @@
277279
// 1000 rules
278280
ruler_max_rules_per_rule_group: 20,
279281
ruler_max_rule_groups_per_tenant: 50,
282+
283+
ingestion_tenant_shard_size: 9,
280284
},
281285

282286
small_user:: {
@@ -294,6 +298,8 @@
294298
// 1400 rules
295299
ruler_max_rules_per_rule_group: 20,
296300
ruler_max_rule_groups_per_tenant: 70,
301+
302+
ingestion_tenant_shard_size: 15,
297303
},
298304

299305
medium_user:: {
@@ -311,6 +317,8 @@
311317
// 1800 rules
312318
ruler_max_rules_per_rule_group: 20,
313319
ruler_max_rule_groups_per_tenant: 90,
320+
321+
ingestion_tenant_shard_size: 30,
314322
},
315323

316324
big_user:: {
@@ -328,6 +336,8 @@
328336
// 2200 rules
329337
ruler_max_rules_per_rule_group: 20,
330338
ruler_max_rule_groups_per_tenant: 110,
339+
340+
ingestion_tenant_shard_size: 60,
331341
},
332342

333343
super_user:: {
@@ -345,6 +355,8 @@
345355
// 2600 rules
346356
ruler_max_rules_per_rule_group: 20,
347357
ruler_max_rule_groups_per_tenant: 130,
358+
359+
ingestion_tenant_shard_size: 120,
348360
},
349361

350362
// This user class has limits increased by +50% compared to the previous one.
@@ -363,6 +375,8 @@
363375
// 3000 rules
364376
ruler_max_rules_per_rule_group: 20,
365377
ruler_max_rule_groups_per_tenant: 150,
378+
379+
ingestion_tenant_shard_size: 180,
366380
},
367381
},
368382

@@ -381,15 +395,41 @@
381395
ingester_stream_chunks_when_using_blocks: true,
382396

383397
// Ingester limits are put directly into runtime config, if not null. Available limits:
384-
// ingester_instance_limits: {
385-
// max_inflight_push_requests: 0, // Max inflight push requests per ingester. 0 = no limit.
386-
// max_ingestion_rate: 0, // Max ingestion rate (samples/second) per ingester. 0 = no limit.
387-
// max_series: 0, // Max number of series per ingester. 0 = no limit.
388-
// max_tenants: 0, // Max number of tenants per ingester. 0 = no limit.
389-
// },
390-
ingester_instance_limits: null,
398+
ingester_instance_limits: {
399+
// max_inflight_push_requests: 0, // Max inflight push requests per ingester. 0 = no limit.
400+
// max_ingestion_rate: 0, // Max ingestion rate (samples/second) per ingester. 0 = no limit.
401+
max_series: 4.8e+6, // Max number of series per ingester. 0 = no limit. 4.8 million is closely tied to 15Gb in requests per ingester
402+
// max_tenants: 0, // Max number of tenants per ingester. 0 = no limit.
403+
},
404+
405+
// if we disable this, we need to make sure we set the resource limits
406+
// Disabling this can potentially increase cortex performance,
407+
// but it will also cause performance inconsistencies
408+
gomaxprocs_based_on_cpu_requests: true,
409+
gomemlimit_based_on_mem_requests: true,
410+
411+
gomaxprocs_resource:
412+
if $._config.gomaxprocs_based_on_cpu_requests then
413+
'requests.cpu'
414+
else
415+
'limits.cpu',
416+
417+
gomemlimit_resource:
418+
if $._config.gomemlimit_based_on_mem_requests then
419+
'requests.memory'
420+
else
421+
'limits.memory',
391422
},
392423

424+
go_container_mixin::
425+
local container = $.core.v1.container;
426+
container.withEnvMixin([
427+
container.envType.withName('GOMAXPROCS') +
428+
container.envType.valueFrom.resourceFieldRef.withResource($._config.gomaxprocs_resource),
429+
container.envType.withName('GOMEMLIMIT') +
430+
container.envType.valueFrom.resourceFieldRef.withResource($._config.gomemlimit_resource),
431+
]),
432+
393433
local configMap = $.core.v1.configMap,
394434

395435
overrides_config:

cortex/distributor.libsonnet

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
{
22
local container = $.core.v1.container,
3+
local envType = container.envType,
34
local containerPort = $.core.v1.containerPort,
45

56
distributor_args::
@@ -20,11 +21,6 @@
2021
'distributor.ha-tracker.etcd.endpoints': 'etcd-client.%s.svc.cluster.local.:2379' % $._config.namespace,
2122
'distributor.ha-tracker.prefix': 'prom_ha/',
2223

23-
// The memory requests are 2G, and we barely use 100M.
24-
// By adding a ballast of 1G, we can drastically reduce GC, but also keep the usage at
25-
// around 1.25G, reducing the 99%ile.
26-
'mem-ballast-size-bytes': 1 << 30, // 1GB
27-
2824
'server.grpc.keepalive.max-connection-age': '2m',
2925
'server.grpc.keepalive.max-connection-age-grace': '5m',
3026
'server.grpc.keepalive.max-connection-idle': '1m',
@@ -36,17 +32,23 @@
3632
// Do not extend the replication set on unhealthy (or LEAVING) ingester when "unregister on shutdown"
3733
// is set to false.
3834
'distributor.extend-writes': $._config.unregister_ingesters_on_shutdown,
35+
'distributor.instance-limits.max-inflight-push-requests': 60, //60 is very conservative to protect the distributor from OOMs
3936
},
4037

38+
distributor_env_map:: {
39+
},
40+
4141
distributor_ports:: $.util.defaultPorts,
4242

4343
distributor_container::
4444
container.new('distributor', $._images.distributor) +
4545
container.withPorts($.distributor_ports) +
4646
container.withArgsMixin($.util.mapToFlags($.distributor_args)) +
47+
container.withEnvMap($.distributor_env_map) +
4748
$.util.resourcesRequests('2', '2Gi') +
4849
$.util.resourcesLimits(null, '4Gi') +
4950
$.util.readinessProbe +
51+
$.go_container_mixin +
5052
$.jaeger_mixin,
5153

5254
local deployment = $.apps.v1.deployment,

cortex/flusher-job-blocks.libsonnet

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,11 +21,16 @@
2121
target: 'flusher',
2222
'blocks-storage.tsdb.retention-period': '10000h', // don't delete old blocks too soon.
2323
})) +
24+
container.withEnvMap($.flusher_env_map) +
2425
$.util.resourcesRequests('4', '15Gi') +
2526
$.util.resourcesLimits(null, '25Gi') +
2627
$.util.readinessProbe +
28+
$.go_container_mixin +
2729
$.jaeger_mixin,
2830

31+
flusher_env_map:: {
32+
},
33+
2934
flusher_job_func(jobName, pvcName)::
3035
job.new() +
3136
job.mixin.spec.template.spec.withContainers([

cortex/images.libsonnet

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
{
22
_images+:: {
33
// Various third-party images.
4-
memcached: 'memcached:1.6.9-alpine',
5-
memcachedExporter: 'prom/memcached-exporter:v0.6.0',
4+
memcached: 'memcached:1.6.23-alpine',
5+
memcachedExporter: 'prom/memcached-exporter:v0.14.2',
66

77
// Our services.
8-
cortex: 'cortexproject/cortex:v1.15.3',
8+
cortex: 'cortexproject/cortex:v1.16.0',
99

1010
alertmanager: self.cortex,
1111
distributor: self.cortex,
@@ -20,7 +20,7 @@
2020
query_scheduler: self.cortex,
2121

2222
overrides_exporter: self.cortex,
23-
query_tee: 'quay.io/cortexproject/query-tee:v1.15.3',
24-
testExporter: 'cortexproject/test-exporter:v1.15.3',
23+
query_tee: 'quay.io/cortexproject/query-tee:v1.16.0',
24+
testExporter: 'cortexproject/test-exporter:v1.16.0',
2525
},
2626
}

0 commit comments

Comments
 (0)